sorting items in a table problematic because of scientific notation

Davis, Amelie Y aydavis at purdue.edu
Tue Apr 28 19:30:14 EDT 2009


Hi All,

I have a dbf table outputted by another program that I cannot (I'm pretty sure) change the format of.

I use a dbf reader code found online (http://code.activestate.com/recipes/362715/ ) to read the table in and I need to sort it on a particular field but this field has scientific notation in it and when I use the following command, it seems to ignore the scientific notation which is very problematic outlist = sorted(records, key=itemgetter(2)) .  

I read through the help and it says to use '%f' % to change the format but I'm not sure where to put that in the code (pasted below) as I think I'm not fully grasping the table structure...

Any help is appreciated,

 

Amélie

 PS

Here's the data 

>>> db

[['IN_FID', 'NEAR_FID', 'NEAR_DIST'], [('N', 9, 0), ('N', 9, 0), ('F', 19, 11)], [53, 55, ' 1.05646365517e+005'], [53, 6, ' 9.32599134016e+004'], [53, 0, ' 8.97477154418e+004'], [53, 2, ' 8.96449127749e+004'], [53, 1, ' 7.88170078501e+004'], [53, 5, ' 8.29281503631e+004'], [53, 4, ' 8.28902704259e+004'], [53, 3, ' 1.01749422736e+005'], [53, 48, ' 1.02994021758e+005'], [53, 66, ' 7.72929835812e+004'], [53, 50, ' 7.01857314921e+004'], [53, 61, ' 5.27651569092e+004'], [53, 41, ' 5.69077867066e+004'], [53, 29, ' 5.07042022102e+004'], [53, 34, ' 8.09902696654e+004'], [53, 30, ' 7.02916975852e+004'], [53, 70, ' 1.00708126410e+005'], [53, 69, ' 7.44442906028e+004'], [53, 39, ' 5.27278486633e+004'], [53, 63, ' 2.89006720236e+004'], [53, 56, ' 2.51636756075e+004'], [53, 54, ' 2.25242296790e+004'], [53, 57, ' 1.81502789641e+004'], [53, 31, ' 4.14289454344e+004'], [53, 36, ' 6.21677492051e+004'], [53, 33, ' 9.87217951789e+004'], [53, 37, ' 1.26735404517e+005'], [53, 71, ' 7.99338959757e+004'], [53, 45, ' 4.72855618396e+004'], [53, 64, ' 1.16520400070e+004'], [53, 53, ' 0.00000000000e+000'], [53, 40, ' 2.11999118691e+004'], [53, 67, ' 5.39410679925e+004'], [53, 32, ' 1.01794899895e+005'], [53, 28, ' 1.41175706465e+005'], [53, 62, ' 9.14863530014e+004'], [53, 59, ' 9.43920363771e+004'], [53, 58, ' 3.94201603186e+004'], [53, 38, ' 2.42089314736e+004'], [53, 52, ' 2.68895347974e+004'], [53, 51, ' 6.70164077249e+004'], [53, 44, ' 8.95725173668e+004'], [53, 35, ' 1.24638006267e+005'], [53, 60, ' 9.82404538984e+004'], [53, 46, ' 7.51687033246e+004'], [53, 65, ' 5.95516889323e+004'], [53, 42, ' 5.48131163225e+004'], [53, 68, ' 5.40273411761e+004'], [53, 43, ' 7.89218619063e+004'], [53, 49, ' 8.64347257786e+004'], [53, 47, ' 7.37831867582e+004'], [53, 22, ' 1.13733267552e+005'], [53, 26, ' 1.05825818366e+005'], [53, 24, ' 8.98116825097e+004'], [53, 27, ' 9.05995622518e+004'], [53, 21, ' 1.02348160303e+005'], [53, 25, ' 1.27829365440e+005'], [53, 23, ' 1.09929013193e+005'], [53, 7, ' 7.36058798557e+005'], [53, 9, ' 7.10791798735e+005'], [53, 19, ' 8.07431148157e+005'], [53, 16, ' 7.71262921144e+005'], [53, 15, ' 8.25966177283e+005'], [53, 14, ' 7.99479404965e+005'], [53, 13, ' 7.73891783607e+005'], [53, 8, ' 7.51032150353e+005'], [53, 12, ' 7.34343709714e+005'], [53, 20, ' 8.32804863363e+005'], [53, 17, ' 7.98545402595e+005'], [53, 10, ' 7.66197484817e+005'], [53, 18, ' 8.07855510966e+005'], [53, 11, ' 7.74341557743e+005']]

>>> outlist = sorted(records, key=itemgetter(2))

>>> outlist

[[53, 53, ' 0.00000000000e+000'], [53, 70, ' 1.00708126410e+005'], [53, 3, ' 1.01749422736e+005'], [53, 32, ' 1.01794899895e+005'], [53, 21, ' 1.02348160303e+005'], [53, 48, ' 1.02994021758e+005'], [53, 55, ' 1.05646365517e+005'], [53, 26, ' 1.05825818366e+005'], [53, 23, ' 1.09929013193e+005'], [53, 22, ' 1.13733267552e+005'], [53, 64, ' 1.16520400070e+004'], [53, 35, ' 1.24638006267e+005'], [53, 37, ' 1.26735404517e+005'], [53, 25, ' 1.27829365440e+005'], [53, 28, ' 1.41175706465e+005'], [53, 57, ' 1.81502789641e+004'], [53, 40, ' 2.11999118691e+004'], [53, 54, ' 2.25242296790e+004'], [53, 38, ' 2.42089314736e+004'], [53, 56, ' 2.51636756075e+004'], [53, 52, ' 2.68895347974e+004'], [53, 63, ' 2.89006720236e+004'], [53, 58, ' 3.94201603186e+004'], [53, 31, ' 4.14289454344e+004'], [53, 45, ' 4.72855618396e+004'], [53, 29, ' 5.07042022102e+004'], [53, 39, ' 5.27278486633e+004'], [53, 61, ' 5.27651569092e+004'], [53, 67, ' 5.39410679925e+004'], [53, 68, ' 5.40273411761e+004'], [53, 42, ' 5.48131163225e+004'], [53, 41, ' 5.69077867066e+004'], [53, 65, ' 5.95516889323e+004'], [53, 36, ' 6.21677492051e+004'], [53, 51, ' 6.70164077249e+004'], [53, 50, ' 7.01857314921e+004'], [53, 30, ' 7.02916975852e+004'], [53, 9, ' 7.10791798735e+005'], [53, 12, ' 7.34343709714e+005'], [53, 7, ' 7.36058798557e+005'], [53, 47, ' 7.37831867582e+004'], [53, 69, ' 7.44442906028e+004'], [53, 8, ' 7.51032150353e+005'], [53, 46, ' 7.51687033246e+004'], [53, 10, ' 7.66197484817e+005'], [53, 16, ' 7.71262921144e+005'], [53, 66, ' 7.72929835812e+004'], [53, 13, ' 7.73891783607e+005'], [53, 11, ' 7.74341557743e+005'], [53, 1, ' 7.88170078501e+004'], [53, 43, ' 7.89218619063e+004'], [53, 17, ' 7.98545402595e+005'], [53, 71, ' 7.99338959757e+004'], [53, 14, ' 7.99479404965e+005'], [53, 19, ' 8.07431148157e+005'], [53, 18, ' 8.07855510966e+005'], [53, 34, ' 8.09902696654e+004'], [53, 15, ' 8.25966177283e+005'], [53, 4, ' 8.28902704259e+004'], [53, 5, ' 8.29281503631e+004'], [53, 20, ' 8.32804863363e+005'], [53, 49, ' 8.64347257786e+004'], [53, 44, ' 8.95725173668e+004'], [53, 2, ' 8.96449127749e+004'], [53, 0, ' 8.97477154418e+004'], [53, 24, ' 8.98116825097e+004'], [53, 27, ' 9.05995622518e+004'], [53, 62, ' 9.14863530014e+004'], [53, 6, ' 9.32599134016e+004'], [53, 59, ' 9.43920363771e+004'], [53, 60, ' 9.82404538984e+004'], [53, 33, ' 9.87217951789e+004']]

>>> 

è The 'second item' should be NEAR_ID = 64 but because of the scientific notation it returns NEAR_ID = 70.  

 

AND THE CODE

####################################

# code from http://code.activestate.com/recipes/362715/ 

def dbfreader(f):

    """Returns an iterator over records in a Xbase DBF file.

 

    The first row returned contains the field names.

    The second row contains field specs: (type, size, decimal places).

    Subsequent rows contain the data records.

    If a record is marked as deleted, it is skipped.

 

    File should be opened for binary reads.

 

    """

    # See DBF format spec at:

    # http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT

 

    numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32)) 

    numfields = (lenheader - 33) // 32

 

    fields = []

    for fieldno in xrange(numfields):

        name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))

        name = name.replace('\0', '') # eliminate NULs from string 

        fields.append((name, typ, size, deci))

    yield [field[0] for field in fields]

    yield [tuple(field[1:]) for field in fields]

 

    terminator = f.read(1)

    assert terminator == '\r'

 

    fields.insert(0, ('DeletionFlag', 'C', 1, 0))

    fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])

    fmtsiz = struct.calcsize(fmt)

    for i in xrange(numrec):

        record = struct.unpack(fmt, f.read(fmtsiz))

        if record[0] != ' ':

            continue # deleted record

        result = []

        for (name, typ, size, deci), value in itertools.izip(fields, record):

            if name == 'DeletionFlag':

                continue

            if typ == "N":

                value = value.replace('\0', '').lstrip()

                if value == '':

                    value = 0

                elif deci:

                    value = decimal.Decimal(value)

                else:

                    value = int(value)

            elif typ == 'D':

                y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])

                value = datetime.date(y, m, d)

            elif typ == 'L':

                value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'

            result.append(value)

        yield result

 

#######################################

...

 

            f = open(newname3, 'rb')

 

            db = list(dbfreader(f))

            f.close()

            #for record in db:

            # print record

            fieldnames, fieldspec, records = db[0], db[1], db[2:]

            #records.sort(key=itemgetter(2))

            outlist = sorted(records, key=itemgetter(2))

            #print records

            # starts at zero so record 1 is really row 2

            aVal3 = outlist[Cntr3]

            print aVal3

            #print type(aVal3)

            aFid = aVal3[1]

 

 

  Please consider the environment before printing this e-mail or any of its attachments (if applicable)

 

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20090428/6558efc8/attachment.html>


More information about the Python-list mailing list