#!/cygdrive/c/python32/python
# Program to check out the text for descriptions, tongue, videos, feet,
# etc.
import pdb
import re
import subprocess

def ls_proc (path, exten):
    out = {}
    npath = path + '*' + exten
    ls_in = str(subprocess.check_output(['ls', npath]), encoding='utf8')
    ls_in = ls_in.rstrip('\n')
    alist = ls_in.split('\n')
    for i in alist:
        i = i.replace(path,'')
        i = i.replace(exten,'')
        i = re.sub('_.','', i, count=0, flags=0)
        if (i in out):
            out[i] += 1
        else:
            out[i] = 1
    return out

# delete beginning and ending spaces and carriage return
def astrip(line):
    line = line.rstrip('\n')
    line = line.lstrip()
    line = line.rstrip()
    return line

# get the latin name from the files
def get_latin(pntr):
    latin = {}
    flag=0
    ls_in = str(subprocess.check_output(['head', '-2', '../text/*.txt']), encoding='utf8')
    ls_in = ls_in.rstrip('\n')
    alist = ls_in.split('\n')
    pdb.set_trace()
    for line in alist:
        if len(line) < 1: continue      # skip zero length lines
        if line[0] == '=':
            if flag:
                print ('No latin name for: ' + name)
            start = line.rfind('/')
            end = line.find('.txt')
            name = line[start+1:end]
            if name not in pntr:
                if 'Favorites' not in name and 'Latest' not in name:
                    print ('Name not in inventory database: ' + name)
                continue
            flag = 1
        elif '<i>' in line:
            if not flag: continue       # already found latin name, skip rest
            flag = 0
            start = line.find('<i>')
            end = line.find('</i>')
            if start < 0 or end < 0:
                print ('Did not find latin name for: ' + i)
                continue
            aname = line[start+3:end]
            aname = aname.rstrip()
            aname = aname.lstrip()
            if name in latin:
                print ('Duplicate entry for: ' + name)
            else:
                latin[name] = aname
    pdb.set_trace()
    return latin

birds = {}          # contains the full name and count of pics
pntr = {}           # relates birdname without spaces to with spaces
feet = {}
tongue = {}
video = {}
text = {}
desc = {}
nofull = []
latin = {}

# First get a list of the files in the various directories being examined.
#ls_text = str(subprocess.check_output(['ls', '../text/*.txt']), encoding='utf8')
text = ls_proc('../text/', '.txt')
desc = ls_proc('../Descriptions/', '.txt')
video = ls_proc('../videos/', '.mp4')
feet = ls_proc('../feet/', '.jpg')
tongue = ls_proc('../Tongue/', '.jpg')
ls_place = ls_proc('../text_place/', '.txt')
ls_class = ls_proc('../text_class/', '.txt')
ls_title = ls_proc('../text_title/', '.txt')


# read in the inventory database and count the number of pics
inv = open('inventory.txt')
for key in inv:
    key = astrip(key)
    if key[0] == '*' or len(key) < 1: continue
    fields = key.split('\t')
    bname = fields[4].rstrip()
    bname = bname.lstrip()
    if bname in birds:
        birds[bname] += 1
    else:
        birds[bname] = 1
    tmp = bname.replace(' ','')
    if not tmp in pntr:
        pntr[tmp] = bname
inv.close()

latin = get_latin(pntr)

print ('Checking short and full Descriptions')
print ('Birds that have no descriptions')

keys = list(pntr.keys())
keys.sort()
pdb.set_trace()
for key in keys:
    if not key in text and not key in desc:
        print (pntr[key])
    if not key in text:
        nofull.append(key)

