[Tutor] Review and criticism of python project
GTXY20
gtxy20 at gmail.com
Fri Jan 4 23:25:07 CET 2008
Hi there.
What this section area does is takes a data file that is comma separated and
imports - there is a unique ID in the first field and a code in the second
field that corresponds to a certain section of information. What I need from
this is for the process to role up against unique ID all section holdings
withot duplicates, report on section combinations, and overal section
counts. In addtion I need the ability to assigna value for page count to
these sections and have the ability to uploada translation file just in case
a section is identiifed by multiple values that needs to be normalized to a
single unique value.
Sorry for the lengthly code response - all commenst are appreciated - as
mentioned I am quite new with Python - it is doing what I need it to do but
I think that it is a mess and needs to be cleaned up a little.
Thanks for any comments.
GTXY20
import sys
import os
class __analysis:
def __init__(self):
print '***Analysis Tool***'
datafile=raw_input('data file name:')
self.datafile=datafile
self.parsefile()
# script to import unitID section data and section page count reference and
create a sorted dictionary
# where in uhdata{} key=unitID and value=unitID section holdings
# where in pgcnt{} key=Section and value=page count
def parsefile(self):
try:
uhdatafile = open(self.datafile, 'r')
records = uhdatafile.read()
uhdatafile.close()
lines = records.split()
self.uhdata={}
for line in lines:
uh, tf = line.split(',')
if uh in self.uhdata:
f=self.uhdata[uh]
if tf not in f:
f.append(tf)
else:
self.uhdata[uh]=[tf]
for uh, Sections in self.uhdata.items():
Sections.sort()
except IOError:
print 'file not found check file name'
analysis()
ftranslateok=raw_input('would you like to translate section codes?
(y/n):')
if ftranslateok == 'y':
self.transFn()
else:
pass
pgcountok=raw_input('would you like to assign section page counts?
(y/n):')
if pgcountok == 'y':
self.setPageCounts()
else:
missingpgcounts={}
fmissingpgcounts=[]
for x in self.uhdata:
for f in self.uhdata[x]:
if f not in fmissingpgcounts:
fmissingpgcounts.append(f)
for x in fmissingpgcounts:
missingpgcounts[x]=0
self.pgcounts = missingpgcounts
fdistmodel=raw_input('would you like to define max section
distribution cut off? (y/n):')
if fdistmodel == 'y':
self.fdistmax=raw_input('what is the max distributions before a
full book?:')
self.fdistmax=int(self.fdistmax)
self.Sectiondistmax()
else:
self.fdistmax=1000000000
self.Sectiondistmax()
sys.exit(1)
# function to determine number of uniqueID for each section
def Sectionqty(self):
Sectionqtyoutfile = open('Sectionqty.txt', 'w+')
Sectionqtyoutfile.write ('Section\tQTY\n')
from collections import defaultdict
fcounts=defaultdict(int)
flst=[]
flst2=[]
if self.fdistmax == 1000000000:
for v in self.uhdata.values():
for item in v:
fcounts[item]+=1
for k,v in sorted(fcounts.items()):
Section=k
fqty=v
Sectionqtyoutfile.write ('%s\t%s\n' % (Section, fqty))
else:
for k,v in self.uhdata.items():
if len(v)<=self.fdistmax:
flst.append(self.uhdata[k])
for i in flst:
for x in i:
flst2.append(x)
for Sections in flst2:
fcounts[Sections]+=1
for k,v in sorted(fcounts.items()):
Section= k
fqty= v
Sectionqtyoutfile.write ('%s\t%s\n' % (Section, fqty))
Sectionqtyoutfile.close()
self.SectionCombqty()
# function to determine number of uniqueID section combinations and
associated section page counts
def SectionCombqty(self):
SectionCombqtyoutfile = open('SectionCombqty.txt', 'w+')
SectionCombqtyoutfile.write('Combination Qty\tNumber of
Sections\tCombination\tCombinationPageCount\tTotalPages\n')
fullbook = 'Full Book'
fgreater=[]
fcheck=0
from collections import defaultdict
fcomb=defaultdict(int)
for uh in self.uhdata.keys():
fcomblst=self.uhdata[uh]
fcomb[tuple(fcomblst)]+=1
if self.fdistmax == 1000000000:
for count, items in sorted( ((v,k) for k,v in fcomb.items
()),reverse=True):
fpgcounts = sum([self.pgcounts.get(i,i) for i in
list(items)])
Sectioncomb = ','.join(items)
holdings = len(items)
totpgcounts = count*fpgcounts
SectionCombqtyoutfile.write ('%s\t%s\t%s\t%s\t%s\n' %
(count,holdings,Sectioncomb,fpgcounts,totpgcounts))
else:
for count, items in sorted( ((v,k) for k,v in fcomb.items
()),reverse=True):
if len(items) <= self.fdistmax:
fpgcounts = sum([self.pgcounts.get(i,i) for i in
list(items)])
Sectioncomb = ','.join(items)
holdings = len(items)
totpgcounts = count*fpgcounts
SectionCombqtyoutfile.write ('%s\t%s\t%s\t%s\t%s\n' %
(count,holdings,Sectioncomb,fpgcounts,totpgcounts))
for count, items in sorted( ((v,k) for k,v in fcomb.items
()),reverse=True):
if len(items)>self.fdistmax:
fgreater.append(count)
fcheck=sum(fgreater)
SectionCombqtyoutfile.write ('%s\t''>''%s\t%s\t%s\t%s\n' %
(fcheck,self.fdistmax,fullbook,fullbook,fullbook))
SectionCombqtyoutfile.close()
# where in pgcnt{} key=Section and value=page count
def setPageCounts(self):
pagecountfile=raw_input('page count file name:')
self.pagecountfile=pagecountfile
try:
pagecountinfile = open(self.pagecountfile, 'r')
records = pagecountinfile.read()
pagecountinfile.close()
self.pgcounts={}
lines = records.split()
for line in lines:
fpg, cnt = line.split(',')
self.pgcounts[fpg]=int(cnt)
except IOError:
print 'file not found check file name'
analysis()
# function to determine number of uniqueID distributions and associated
Sections held
def Sectiondistmax(self):
from collections import defaultdict
Sectiondistoutfile = open('Sectiondist.txt', 'w+')
Sectiondistoutfile.write ('SectionDistributions\tQTY\n')
fgreater=[]
fullbook = "Full Book"
fcheck=0
fcount=defaultdict(int)
for uh in self.uhdata.keys():
f=self.uhdata[uh]
fcount[len(f)]+=1
if self.fdistmax == 1000000000:
for k,v in sorted(fcount.items()):
fdist=k
fqty=v
Sectiondistoutfile.write ('%s\t%s\n' % (fdist,fqty))
else:
for k,v in sorted(fcount.items()):
if k <= self.fdistmax:
fdist=k
fqty=v
Sectiondistoutfile.write ('%s\t%s\n' % (fdist,fqty))
for k,v in sorted(fcount.items()):
if k > self.fdistmax:
fgreater.append(fcount[k])
fcheck=sum(fgreater)
Sectiondistoutfile.write ('%s\t%s\n' % (fullbook,fcheck))
Sectiondistoutfile.close()
self.Sectionqty()
#function to translate UnitID Sectioncodes to normalized assigned Section
code (e.g. parent and mulitple child section codes)
def transFn(self):
transfile=raw_input('Section translate file name:')
self.transfile=transfile
try:
transfilein=open(self.transfile, 'r')
records = transfilein.read()
transfilein.close()
lines = records.split()
transDict = {}
for line in lines:
key, value = line.split(',')
transDict[key] = value
for key, value in self.uhdata.items():
self.uhdata[key] = [ transDict.get(i, i) for i in value ]
for k in self.uhdata:
self.uhdata[k]=sorted(set(self.uhdata[k]))
except IOError:
print 'file not found check file name'
analysis()
On Jan 4, 2008 2:23 PM, bob gailer <bgailer at alum.rpi.edu> wrote:
> GTXY20 wrote:
> >
> > There are no errors per se - the script is doing what it needs to I
> > guess I just want to check it for compliance - for some reason I think
> > itis a mess and should be much cleaner.
> >
> > I am only concerned with one particular area of the complete project -
> > it is 229 lines in total - would this be too much to post?
>
> Did you get my invitation to post programs in that size range? Please
> just do it. The suspense is killing me!
> > I do not have a website to post code to - just don't want to post too
> > much for the group and annoy anyone.
> Dialogging about it is more painful than just reading the code.
> >
> > Thanks for your comments and let me know.
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/tutor/attachments/20080104/f14c4c90/attachment-0001.htm
More information about the Tutor
mailing list