[Tutor] Review and criticism of python project

GTXY20 gtxy20 at gmail.com
Fri Jan 4 23:25:07 CET 2008


Hi there.

What this section area does is takes a data file that is comma separated and
imports - there is a unique ID in the first field and a code in the second
field that corresponds to a certain section of information. What I need from
this is for the process to role up against unique ID all section holdings
withot duplicates, report on section combinations, and overal section
counts. In addtion I need the ability to assigna value for page count to
these sections and have the ability to uploada translation file just in case
a section is identiifed by multiple values that needs to be normalized to a
single unique value.

Sorry for the lengthly code response - all commenst are appreciated - as
mentioned I am quite new with Python - it is doing what I need it to do but
I think that it is a mess and needs to be cleaned up a little.

Thanks for any comments.

GTXY20

import sys
import os
class __analysis:
    def __init__(self):
        print '***Analysis Tool***'
        datafile=raw_input('data file name:')
        self.datafile=datafile
        self.parsefile()

# script to import unitID section data and section page count reference and
create a sorted dictionary
# where in uhdata{} key=unitID and value=unitID section holdings
# where in pgcnt{} key=Section and value=page count

    def parsefile(self):
        try:
            uhdatafile = open(self.datafile, 'r')
            records = uhdatafile.read()
            uhdatafile.close()
            lines = records.split()
            self.uhdata={}
            for line in lines:
                uh, tf = line.split(',')
                if uh in self.uhdata:
                    f=self.uhdata[uh]
                    if tf not in f:
                        f.append(tf)
                else:
                    self.uhdata[uh]=[tf]

            for uh, Sections in self.uhdata.items():
                Sections.sort()
        except IOError:
            print 'file not found check file name'
            analysis()

        ftranslateok=raw_input('would you like to translate section codes?
(y/n):')
        if ftranslateok == 'y':
            self.transFn()
        else:
            pass
        pgcountok=raw_input('would you like to assign section page counts?
(y/n):')
        if pgcountok == 'y':
            self.setPageCounts()
        else:
            missingpgcounts={}
            fmissingpgcounts=[]
            for x in self.uhdata:
                for f in self.uhdata[x]:
                    if f not in fmissingpgcounts:
                        fmissingpgcounts.append(f)
            for x in fmissingpgcounts:
                missingpgcounts[x]=0
            self.pgcounts = missingpgcounts
        fdistmodel=raw_input('would you like to define max section
distribution cut off? (y/n):')
        if fdistmodel == 'y':
            self.fdistmax=raw_input('what is the max distributions before a
full book?:')
            self.fdistmax=int(self.fdistmax)
            self.Sectiondistmax()
        else:
            self.fdistmax=1000000000
            self.Sectiondistmax()
        sys.exit(1)

# function to determine number of uniqueID for each section
    def Sectionqty(self):
        Sectionqtyoutfile = open('Sectionqty.txt', 'w+')
        Sectionqtyoutfile.write ('Section\tQTY\n')
        from collections import defaultdict
        fcounts=defaultdict(int)
        flst=[]
        flst2=[]
        if self.fdistmax == 1000000000:
            for v in self.uhdata.values():
                for item in v:
                    fcounts[item]+=1

            for k,v in sorted(fcounts.items()):
                Section=k
                fqty=v
                Sectionqtyoutfile.write ('%s\t%s\n' % (Section, fqty))

        else:
            for k,v in self.uhdata.items():
                if len(v)<=self.fdistmax:
                    flst.append(self.uhdata[k])
            for i in flst:
                for x in i:
                    flst2.append(x)
            for Sections in flst2:
                    fcounts[Sections]+=1
            for k,v in sorted(fcounts.items()):
                Section= k
                fqty= v
                Sectionqtyoutfile.write ('%s\t%s\n' % (Section, fqty))

        Sectionqtyoutfile.close()
        self.SectionCombqty()

# function to determine number of uniqueID section combinations and
associated section page counts
    def SectionCombqty(self):
        SectionCombqtyoutfile = open('SectionCombqty.txt', 'w+')
        SectionCombqtyoutfile.write('Combination Qty\tNumber of
Sections\tCombination\tCombinationPageCount\tTotalPages\n')
        fullbook = 'Full Book'
        fgreater=[]
        fcheck=0
        from collections import defaultdict
        fcomb=defaultdict(int)
        for uh in self.uhdata.keys():
            fcomblst=self.uhdata[uh]
            fcomb[tuple(fcomblst)]+=1
        if self.fdistmax == 1000000000:
            for count, items in sorted( ((v,k) for k,v in fcomb.items
()),reverse=True):
                fpgcounts = sum([self.pgcounts.get(i,i) for i in
list(items)])
                Sectioncomb = ','.join(items)
                holdings = len(items)
                totpgcounts = count*fpgcounts
                SectionCombqtyoutfile.write ('%s\t%s\t%s\t%s\t%s\n' %
(count,holdings,Sectioncomb,fpgcounts,totpgcounts))

        else:
            for count, items in sorted( ((v,k) for k,v in fcomb.items
()),reverse=True):
                if len(items) <= self.fdistmax:
                    fpgcounts = sum([self.pgcounts.get(i,i) for i in
list(items)])
                    Sectioncomb = ','.join(items)
                    holdings = len(items)
                    totpgcounts = count*fpgcounts
                    SectionCombqtyoutfile.write ('%s\t%s\t%s\t%s\t%s\n' %
(count,holdings,Sectioncomb,fpgcounts,totpgcounts))
            for count, items in sorted( ((v,k) for k,v in fcomb.items
()),reverse=True):
                if len(items)>self.fdistmax:
                    fgreater.append(count)

            fcheck=sum(fgreater)
            SectionCombqtyoutfile.write ('%s\t''>''%s\t%s\t%s\t%s\n' %
(fcheck,self.fdistmax,fullbook,fullbook,fullbook))


        SectionCombqtyoutfile.close()

# where in pgcnt{} key=Section and value=page count
    def setPageCounts(self):
        pagecountfile=raw_input('page count file name:')
        self.pagecountfile=pagecountfile
        try:
            pagecountinfile = open(self.pagecountfile, 'r')
            records = pagecountinfile.read()
            pagecountinfile.close()
            self.pgcounts={}
            lines = records.split()
            for line in lines:
                fpg, cnt = line.split(',')
                self.pgcounts[fpg]=int(cnt)
        except IOError:
            print 'file not found check file name'
            analysis()

# function to determine number of uniqueID distributions and associated
Sections held
    def Sectiondistmax(self):
        from collections import defaultdict
        Sectiondistoutfile = open('Sectiondist.txt', 'w+')
        Sectiondistoutfile.write ('SectionDistributions\tQTY\n')
        fgreater=[]
        fullbook = "Full Book"
        fcheck=0
        fcount=defaultdict(int)
        for uh in self.uhdata.keys():
            f=self.uhdata[uh]
            fcount[len(f)]+=1
        if self.fdistmax == 1000000000:
            for k,v in sorted(fcount.items()):
                fdist=k
                fqty=v
                Sectiondistoutfile.write ('%s\t%s\n' % (fdist,fqty))

        else:
            for k,v in sorted(fcount.items()):
                if k <= self.fdistmax:
                    fdist=k
                    fqty=v
                    Sectiondistoutfile.write ('%s\t%s\n' % (fdist,fqty))

            for k,v in sorted(fcount.items()):
                if k > self.fdistmax:
                    fgreater.append(fcount[k])
            fcheck=sum(fgreater)
            Sectiondistoutfile.write ('%s\t%s\n' % (fullbook,fcheck))
        Sectiondistoutfile.close()
        self.Sectionqty()

#function to translate UnitID Sectioncodes to normalized assigned Section
code (e.g. parent and mulitple child section codes)
    def transFn(self):
        transfile=raw_input('Section translate file name:')
        self.transfile=transfile
        try:
            transfilein=open(self.transfile, 'r')
            records = transfilein.read()
            transfilein.close()
            lines = records.split()
            transDict = {}
            for line in lines:
                key, value = line.split(',')
                transDict[key] = value

            for key, value in self.uhdata.items():
                self.uhdata[key] = [ transDict.get(i, i) for i in value ]
            for k in self.uhdata:
                self.uhdata[k]=sorted(set(self.uhdata[k]))

        except IOError:
            print 'file not found check file name'
            analysis()




On Jan 4, 2008 2:23 PM, bob gailer <bgailer at alum.rpi.edu> wrote:

> GTXY20 wrote:
> >
> > There are no errors per se - the script is doing what it needs to I
> > guess I just want to check it for compliance - for some reason I think
> > itis a mess and should be much cleaner.
> >
> > I am only concerned with one particular area of the complete project -
> > it is 229 lines in total - would this be too much to post?
>
> Did you get my invitation to post programs in that size range? Please
> just do it. The suspense is killing me!
> > I do not have a website to post code to - just don't want to post too
> > much for the group and annoy anyone.
> Dialogging about it is more painful than just reading the code.
>  >
> > Thanks for your comments and let me know.
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/tutor/attachments/20080104/f14c4c90/attachment-0001.htm 


More information about the Tutor mailing list