#!/cygdrive/c/python34/python
# Program that makes sure that the bird name mentioned in the text/Description .txt
# file matches the filename and is in the bird_class database.  This is important
# because if I am entering a lot of new birds and adding text on them, I can forget to
# change the line that mentions the bird.  This is not full-proof but probably gets simple
# errors.
# This program only needs to be run when I add a bunch of new text or description files.

import pdb
import subprocess

def astrip(line):
    line = line.rstrip('\n')
    line = line.lstrip()
    line = line.rstrip()
    return line

# Store the information to check to make sure that the
# text/Description file is for the right bird, the filename (birdname)
# should be found in the first few lines of the file.
def check_bird():
    check_bird = {}
    print ('--Checking that filename matches birdname in file')
    ls_in = str(subprocess.check_output(['head', '-n', '2', '-v', '../text/*.txt']), encoding='utf8')
    ls_in1 = str(subprocess.check_output(['head', '-n', '2',  '-v', '../Descriptions/*.txt']), encoding='utf8')
    ls_in = ls_in + ls_in1
    ls_in = ls_in.rstrip('\n')
    alist = ls_in.split('\n')
    for line in alist:
        if len(line) < 1: continue
        if ('<p>' in line) and (len(line) < 6): continue
                # 'head' puts the filename on lines that start with
                # a '='
        if line[0] == '=':
            start = line.rfind('/')
            end = line.find('.txt')
            name = line[start+1:end]
        else:
            check_bird[name] = line
    return check_bird

# main routine

bird_text = {}

# get a combined list of the first lines in ../text and ../Descriptions
# which should have the bird name in the second line.  First line is usually <p>
bird_text = check_bird()

# read in the bird_class database to get a list of the unique birds.
bclass = open('bird_class')
for line in bclass:
    line = astrip(line)
    if 'bird:' in line:
        fields = line.split(':')
        abird = astrip(fields[1])
        key = abird.replace(' ','')
        if key in bird_text:
            if not abird in bird_text[key]:
                  print ('File name is not found in the text file: ', line)
            del bird_text[key]
bclass.close()
# now check if there is anythin left in the bird_text dictionary.  If so, I have
# a dangling file.  But first, get rid of some non-bird entries that I know about.
# That is, Favorites and Latest.
if 'Favorites' in bird_text: del bird_text['Favorites']
if 'Latest' in bird_text: del bird_text['Latest']
length = len(bird_text)
if length > 0:
    keys = list(bird_text.keys())
    print ('There are some leftover text files: ', keys)
