#!/cygdrive/c/python32/python # Program to check out the text for descriptions, tongue, videos, feet, # etc. import pdb import re import subprocess def ls_proc (path, exten): out = {} npath = path + '*' + exten ls_in = str(subprocess.check_output(['ls', npath]), encoding='utf8') ls_in = ls_in.rstrip('\n') alist = ls_in.split('\n') for i in alist: i = i.replace(path,'') i = i.replace(exten,'') i = re.sub('_.','', i, count=0, flags=0) if (i in out): out[i] += 1 else: out[i] = 1 return out # delete beginning and ending spaces and carriage return def astrip(line): line = line.rstrip('\n') line = line.lstrip() line = line.rstrip() return line # get the latin name from the files def get_latin(pntr): latin = {} flag=0 ls_in = str(subprocess.check_output(['head', '-2', '../text/*.txt']), encoding='utf8') ls_in = ls_in.rstrip('\n') alist = ls_in.split('\n') pdb.set_trace() for line in alist: if len(line) < 1: continue # skip zero length lines if line[0] == '=': if flag: print ('No latin name for: ' + name) start = line.rfind('/') end = line.find('.txt') name = line[start+1:end] if name not in pntr: if 'Favorites' not in name and 'Latest' not in name: print ('Name not in inventory database: ' + name) continue flag = 1 elif '' in line: if not flag: continue # already found latin name, skip rest flag = 0 start = line.find('') end = line.find('') if start < 0 or end < 0: print ('Did not find latin name for: ' + i) continue aname = line[start+3:end] aname = aname.rstrip() aname = aname.lstrip() if name in latin: print ('Duplicate entry for: ' + name) else: latin[name] = aname pdb.set_trace() return latin birds = {} # contains the full name and count of pics pntr = {} # relates birdname without spaces to with spaces feet = {} tongue = {} video = {} text = {} desc = {} nofull = [] latin = {} # First get a list of the files in the various directories being examined. #ls_text = str(subprocess.check_output(['ls', '../text/*.txt']), encoding='utf8') text = ls_proc('../text/', '.txt') desc = ls_proc('../Descriptions/', '.txt') video = ls_proc('../videos/', '.mp4') feet = ls_proc('../feet/', '.jpg') tongue = ls_proc('../Tongue/', '.jpg') ls_place = ls_proc('../text_place/', '.txt') ls_class = ls_proc('../text_class/', '.txt') ls_title = ls_proc('../text_title/', '.txt') # read in the inventory database and count the number of pics inv = open('inventory.txt') for key in inv: key = astrip(key) if key[0] == '*' or len(key) < 1: continue fields = key.split('\t') bname = fields[4].rstrip() bname = bname.lstrip() if bname in birds: birds[bname] += 1 else: birds[bname] = 1 tmp = bname.replace(' ','') if not tmp in pntr: pntr[tmp] = bname inv.close() latin = get_latin(pntr) print ('Checking short and full Descriptions') print ('Birds that have no descriptions') keys = list(pntr.keys()) keys.sort() pdb.set_trace() for key in keys: if not key in text and not key in desc: print (pntr[key]) if not key in text: nofull.append(key)