sorting items in a table problematic because of scientific notation
Davis, Amelie Y
aydavis at purdue.edu
Tue Apr 28 19:30:14 EDT 2009
Hi All,
I have a dbf table outputted by another program that I cannot (I'm pretty sure) change the format of.
I use a dbf reader code found online (http://code.activestate.com/recipes/362715/ ) to read the table in and I need to sort it on a particular field but this field has scientific notation in it and when I use the following command, it seems to ignore the scientific notation which is very problematic outlist = sorted(records, key=itemgetter(2)) .
I read through the help and it says to use '%f' % to change the format but I'm not sure where to put that in the code (pasted below) as I think I'm not fully grasping the table structure...
Any help is appreciated,
Amélie
PS
Here's the data
>>> db
[['IN_FID', 'NEAR_FID', 'NEAR_DIST'], [('N', 9, 0), ('N', 9, 0), ('F', 19, 11)], [53, 55, ' 1.05646365517e+005'], [53, 6, ' 9.32599134016e+004'], [53, 0, ' 8.97477154418e+004'], [53, 2, ' 8.96449127749e+004'], [53, 1, ' 7.88170078501e+004'], [53, 5, ' 8.29281503631e+004'], [53, 4, ' 8.28902704259e+004'], [53, 3, ' 1.01749422736e+005'], [53, 48, ' 1.02994021758e+005'], [53, 66, ' 7.72929835812e+004'], [53, 50, ' 7.01857314921e+004'], [53, 61, ' 5.27651569092e+004'], [53, 41, ' 5.69077867066e+004'], [53, 29, ' 5.07042022102e+004'], [53, 34, ' 8.09902696654e+004'], [53, 30, ' 7.02916975852e+004'], [53, 70, ' 1.00708126410e+005'], [53, 69, ' 7.44442906028e+004'], [53, 39, ' 5.27278486633e+004'], [53, 63, ' 2.89006720236e+004'], [53, 56, ' 2.51636756075e+004'], [53, 54, ' 2.25242296790e+004'], [53, 57, ' 1.81502789641e+004'], [53, 31, ' 4.14289454344e+004'], [53, 36, ' 6.21677492051e+004'], [53, 33, ' 9.87217951789e+004'], [53, 37, ' 1.26735404517e+005'], [53, 71, ' 7.99338959757e+004'], [53, 45, ' 4.72855618396e+004'], [53, 64, ' 1.16520400070e+004'], [53, 53, ' 0.00000000000e+000'], [53, 40, ' 2.11999118691e+004'], [53, 67, ' 5.39410679925e+004'], [53, 32, ' 1.01794899895e+005'], [53, 28, ' 1.41175706465e+005'], [53, 62, ' 9.14863530014e+004'], [53, 59, ' 9.43920363771e+004'], [53, 58, ' 3.94201603186e+004'], [53, 38, ' 2.42089314736e+004'], [53, 52, ' 2.68895347974e+004'], [53, 51, ' 6.70164077249e+004'], [53, 44, ' 8.95725173668e+004'], [53, 35, ' 1.24638006267e+005'], [53, 60, ' 9.82404538984e+004'], [53, 46, ' 7.51687033246e+004'], [53, 65, ' 5.95516889323e+004'], [53, 42, ' 5.48131163225e+004'], [53, 68, ' 5.40273411761e+004'], [53, 43, ' 7.89218619063e+004'], [53, 49, ' 8.64347257786e+004'], [53, 47, ' 7.37831867582e+004'], [53, 22, ' 1.13733267552e+005'], [53, 26, ' 1.05825818366e+005'], [53, 24, ' 8.98116825097e+004'], [53, 27, ' 9.05995622518e+004'], [53, 21, ' 1.02348160303e+005'], [53, 25, ' 1.27829365440e+005'], [53, 23, ' 1.09929013193e+005'], [53, 7, ' 7.36058798557e+005'], [53, 9, ' 7.10791798735e+005'], [53, 19, ' 8.07431148157e+005'], [53, 16, ' 7.71262921144e+005'], [53, 15, ' 8.25966177283e+005'], [53, 14, ' 7.99479404965e+005'], [53, 13, ' 7.73891783607e+005'], [53, 8, ' 7.51032150353e+005'], [53, 12, ' 7.34343709714e+005'], [53, 20, ' 8.32804863363e+005'], [53, 17, ' 7.98545402595e+005'], [53, 10, ' 7.66197484817e+005'], [53, 18, ' 8.07855510966e+005'], [53, 11, ' 7.74341557743e+005']]
>>> outlist = sorted(records, key=itemgetter(2))
>>> outlist
[[53, 53, ' 0.00000000000e+000'], [53, 70, ' 1.00708126410e+005'], [53, 3, ' 1.01749422736e+005'], [53, 32, ' 1.01794899895e+005'], [53, 21, ' 1.02348160303e+005'], [53, 48, ' 1.02994021758e+005'], [53, 55, ' 1.05646365517e+005'], [53, 26, ' 1.05825818366e+005'], [53, 23, ' 1.09929013193e+005'], [53, 22, ' 1.13733267552e+005'], [53, 64, ' 1.16520400070e+004'], [53, 35, ' 1.24638006267e+005'], [53, 37, ' 1.26735404517e+005'], [53, 25, ' 1.27829365440e+005'], [53, 28, ' 1.41175706465e+005'], [53, 57, ' 1.81502789641e+004'], [53, 40, ' 2.11999118691e+004'], [53, 54, ' 2.25242296790e+004'], [53, 38, ' 2.42089314736e+004'], [53, 56, ' 2.51636756075e+004'], [53, 52, ' 2.68895347974e+004'], [53, 63, ' 2.89006720236e+004'], [53, 58, ' 3.94201603186e+004'], [53, 31, ' 4.14289454344e+004'], [53, 45, ' 4.72855618396e+004'], [53, 29, ' 5.07042022102e+004'], [53, 39, ' 5.27278486633e+004'], [53, 61, ' 5.27651569092e+004'], [53, 67, ' 5.39410679925e+004'], [53, 68, ' 5.40273411761e+004'], [53, 42, ' 5.48131163225e+004'], [53, 41, ' 5.69077867066e+004'], [53, 65, ' 5.95516889323e+004'], [53, 36, ' 6.21677492051e+004'], [53, 51, ' 6.70164077249e+004'], [53, 50, ' 7.01857314921e+004'], [53, 30, ' 7.02916975852e+004'], [53, 9, ' 7.10791798735e+005'], [53, 12, ' 7.34343709714e+005'], [53, 7, ' 7.36058798557e+005'], [53, 47, ' 7.37831867582e+004'], [53, 69, ' 7.44442906028e+004'], [53, 8, ' 7.51032150353e+005'], [53, 46, ' 7.51687033246e+004'], [53, 10, ' 7.66197484817e+005'], [53, 16, ' 7.71262921144e+005'], [53, 66, ' 7.72929835812e+004'], [53, 13, ' 7.73891783607e+005'], [53, 11, ' 7.74341557743e+005'], [53, 1, ' 7.88170078501e+004'], [53, 43, ' 7.89218619063e+004'], [53, 17, ' 7.98545402595e+005'], [53, 71, ' 7.99338959757e+004'], [53, 14, ' 7.99479404965e+005'], [53, 19, ' 8.07431148157e+005'], [53, 18, ' 8.07855510966e+005'], [53, 34, ' 8.09902696654e+004'], [53, 15, ' 8.25966177283e+005'], [53, 4, ' 8.28902704259e+004'], [53, 5, ' 8.29281503631e+004'], [53, 20, ' 8.32804863363e+005'], [53, 49, ' 8.64347257786e+004'], [53, 44, ' 8.95725173668e+004'], [53, 2, ' 8.96449127749e+004'], [53, 0, ' 8.97477154418e+004'], [53, 24, ' 8.98116825097e+004'], [53, 27, ' 9.05995622518e+004'], [53, 62, ' 9.14863530014e+004'], [53, 6, ' 9.32599134016e+004'], [53, 59, ' 9.43920363771e+004'], [53, 60, ' 9.82404538984e+004'], [53, 33, ' 9.87217951789e+004']]
>>>
è The 'second item' should be NEAR_ID = 64 but because of the scientific notation it returns NEAR_ID = 70.
AND THE CODE
####################################
# code from http://code.activestate.com/recipes/362715/
def dbfreader(f):
"""Returns an iterator over records in a Xbase DBF file.
The first row returned contains the field names.
The second row contains field specs: (type, size, decimal places).
Subsequent rows contain the data records.
If a record is marked as deleted, it is skipped.
File should be opened for binary reads.
"""
# See DBF format spec at:
# http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
numfields = (lenheader - 33) // 32
fields = []
for fieldno in xrange(numfields):
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
name = name.replace('\0', '') # eliminate NULs from string
fields.append((name, typ, size, deci))
yield [field[0] for field in fields]
yield [tuple(field[1:]) for field in fields]
terminator = f.read(1)
assert terminator == '\r'
fields.insert(0, ('DeletionFlag', 'C', 1, 0))
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
fmtsiz = struct.calcsize(fmt)
for i in xrange(numrec):
record = struct.unpack(fmt, f.read(fmtsiz))
if record[0] != ' ':
continue # deleted record
result = []
for (name, typ, size, deci), value in itertools.izip(fields, record):
if name == 'DeletionFlag':
continue
if typ == "N":
value = value.replace('\0', '').lstrip()
if value == '':
value = 0
elif deci:
value = decimal.Decimal(value)
else:
value = int(value)
elif typ == 'D':
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
value = datetime.date(y, m, d)
elif typ == 'L':
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
result.append(value)
yield result
#######################################
...
f = open(newname3, 'rb')
db = list(dbfreader(f))
f.close()
#for record in db:
# print record
fieldnames, fieldspec, records = db[0], db[1], db[2:]
#records.sort(key=itemgetter(2))
outlist = sorted(records, key=itemgetter(2))
#print records
# starts at zero so record 1 is really row 2
aVal3 = outlist[Cntr3]
print aVal3
#print type(aVal3)
aFid = aVal3[1]
Please consider the environment before printing this e-mail or any of its attachments (if applicable)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20090428/6558efc8/attachment.html>
More information about the Python-list
mailing list