newb comment request

Alexandre ac007 at bluewin.ch
Wed Nov 26 21:21:54 EST 2003


Hi,

Im a newb to dev and python...  my first sefl assigned mission was to read a
pickled file containing a list with DB like data and convert this to
MySQL... So i wrote my first module which reads this pickled file and writes
an XML file with list of tables and fields (... next step will the module
who creates the tables according to details found in the XML file).

If anyone has some minutes to spare, suggestions and comments would be verry
much appreciated to help me make my next modules better.. and not starting
with bad habit :)

Alexandre

######################## <EIPyFormatToXML>
######################################
#pickle.loads a file containing database like data in a python list ->
#Outputs a XML file with "tables" details which will be used in a future
module
#to build MySQL tables. See the comment blocks at the end of the module
#for more details.

import sys
import pickle
import types

infile = open('cached-objects-Python-pickled-sample', 'rb')
_data = pickle.load(infile)
infile.close()

def ExtractTablesFromData(data):
    """Extracts all the table names from the Dumped items data file and
returns the list."""
    tablesR = []
    for tables in data:#For each tables found in 'data'
        tablesR.append([tables[0]])#Appends the list with current table name
    return tablesR

def ExtractFieldNamesFromData(data):
    """Extract all fields from data list (the calling function defines for
which table in 'data' argument)."""
    fieldsR = []
    for fields in data:
        fieldsR.append([fields])
    return fieldsR

def ExtractFieldValuesFromData(data, indexField):
    """Check each value of the field passed as argument to the function, it
returns [valueType, maxLength, maxValue, minValue, floatPrecision,
NoneValues(bool), sampleValue]."""
    valueType, maxLength, maxValue, minValue, values , floatPrecision,
NoneValues = None, None, None, 999999999999, [], None, False
    sampleValue = 'numeric value, check min and max values as sample'
    for valuesD in data:#for each record...
        value = valuesD[indexField]#focus value on required field
        if type(value) is not types.NoneType:#if a value other than None is
found
            valueType = type(value)#valueType is given the type of the
current value
        else:#... if the value is None
            NoneValues = True#None values exist for this field in the record
list
        if valueType is str:#if type is string
            minValue = None#set minValue to None (minValue and maxValue are
only for numeric types)
            if len(value) > maxLength:#if current string lenght is bigger
than maxLength
                maxLength = len(value)#Set maxLength value to current string
length
                sampleValue = value#Sets sampleValue with the longest string
found
        else:#... if not string type
            if value > maxValue:#if current value bigger than maxValue
                maxValue = value#Sets current value to maxValue
            if value and value < minValue:#if value is not None AND smaller
than minValue
                minValue = value#Sets new minValue with current value
            if valueType is float and value != 0:#if value type is float and
not 0
                precisionTemp = len(str(value - int(value)))-2
                if precisionTemp > floatPrecision:#if the current length
after decimal point is bigger than previous
                    floatPrecision = precisionTemp#set current value to
precision
    if valueType is float and floatPrecision == None:#if float could not be
determined because only 0.0 values were found
        floatPrecision = 1#set precision to 1
    if valueType is not float and floatPrecision != None:#if last value type
was not float but some float values were found
        valueType = type(1.234)#set valueType to float
    if valueType is str and maxLength == 0:#if value type found only ''
(empty) records
        NoneValues = True#allow null values
    if minValue == 999999999999:#if minValue was not set
        minValue = None#then minValue is None
    values[:] = [valueType, maxLength, maxValue, minValue, floatPrecision,
NoneValues, sampleValue]
    return values

def AddFieldsPerTable():
    """Appends field list to each table."""
    tables = ExtractTablesFromData(_data) #First extract list of tables
    for i, table in enumerate(tables): #Then for each table in the list
        fields = ExtractFieldNamesFromData(_data[i][1][0])#get field list
([i] as table index, [1][0] to reach field list)
        tables[i].append(fields) #Appends the returned field list to current
table
    return tables

def AddFieldsDetailsPerField():
    """Extend field list with details for each field."""
    tables = AddFieldsPerTable()#First get table list
    for iTable, table in enumerate(tables):#Then for each table
        for iField, field in enumerate(table[1]):#...for each field in the
current table
            values = ExtractFieldValuesFromData(_data[iTable][1][1],
iField)#Get field's details([iTable] as table index, [1][1] to reach records
list, iField to focus search on current field)
            field.extend(values)#Extends the tables list with returned field
details
    return tables

def AddNbOfRecordsPerTable():#Insert number of records per table.
    """Extend 'tables' details with number of records per table."""
    tables = AddFieldsDetailsPerField()#get tables
    for i, table in enumerate(tables):#for each table
        nbOfRecords = len(_data[i][1][1])#get number of records ([i]=table
index, [1][1] = record list)
        table.insert(1, nbOfRecords)#inserts the number of records in tables
list
    return tables

def WriteFileTableFormat(fileName):#Creates the XML with 'tables' list
    tables = AddNbOfRecordsPerTable()#get tables detailed list
    f = open(fileName, 'w')
    f.write("""<?xml version="1.0" encoding="ISO-8859-1"?>\n""")
    f.write("<Root>\n")

    for table in tables:
        f.write("\t<table>\n")
        f.write("\t\t<name>%s</name>\n" % table[0])
        f.write("\t\t<nbOfRecords>%s</nbOfRecords>\n" % table[1])
        for field in table[2][:]:
            f.write("\t\t<field>\n")
            f.write("\t\t\t<name>%s</name>\n" % field[0])
            if str(field[1])[:7] == "<type '":
                field[1] = str(field[1])[7:-2]
            f.write("\t\t\t<pythonType>%s</pythonType>\n" % str(field[1]))
            f.write("\t\t\t<maxLength>%s</maxLength>\n" % str(field[2]))
            f.write("\t\t\t<maxValue>%s</maxValue>\n" % str(field[3]))
            f.write("\t\t\t<minValue>%s</minValue>\n" % str(field[4]))
            f.write("\t\t\t<floatPrecision>%s</floatPrecision>\n" %
str(field[5]))
            f.write("\t\t\t<NoneValues>%s</NoneValues>\n" % str(field[6]))
            f.write("\t\t\t<sampleValue>%s</sampleValue>\n" % str(field[7]))
            f.write("\t\t\t<mysqlFieldType></mysqlFieldType>\n")
            f.write("\t\t</field>\n")
        f.write("\t</table>\n")

    f.write("</Root>")
    f.close

WriteFileTableFormat('EITablesFormat.xml')

############ <Help to understand '_data' structure>
#
#   [['FirstTableName', (['FirstFieldName', 'nFieldName'],
[['FirstFieldFirstValue', 'nFieldFirstValue'],
#   ['FirstFieldnValue', 'nFieldnValue']])], ['nTableName', (['etc..
#   print _data[0][0] #[0]=FirstTable, [0]=TableName -> output :
'FirstTableName'
#   print len(_data) #number of tables in 'data'
#   print _data[0][1] #[0]=FirstTable, [1]=FieldList And Records
#   print _data[0][1][0] #[0]=FirstTable, [1]=FieldList, [0]=FieldNames ->
output : ['FirstFieldName', 'nFieldName']
#   print len(_data[0][1][0]) #number of fields in first table
#   print _data[0][1][1] #[0]=FirstTable, [1]=FieldList, [1]=RecordList
#   print len(_data[0][1][1]) #number of records in first table
#   print _data[0][1][1][0][2] #[0]=firstTable, [1]=FieldList,
[1]=RecordList, [0] = First Record, [2]=Third Field Value
#
######################## </Help to understand '_data' structure>


############ <Final 'tables' variable format>
#
# The final 'tables' format used to build the XML should look like :
#    ([tablename_1, nbOfRecords
#        [
#            [fieldname_1, pythonType, maxLength, maxValue, minValue,
floatPrecision, NoneValues,  sampleValue],
#            [fieldname_1, pythonType, maxLength, maxValue, minValue,
floatPrecision, NoneValues,  sampleValue]
#        ],
#     [tablename_n,
#        [
#            [fieldname_1, ...]
#        ]
#     ])
#
######################## </Final 'tables' variable format>
#################################### </EIPyFormatToXML>






More information about the Python-list mailing list