Memory problems (garbage collection)
Carbon Man
darwin at nowhere.com
Thu Apr 23 08:09:46 EDT 2009
Thanks for the help.
I converted everything into the StringIO() format. Memory is still getting
chewed up. I will look at ElementTree later but for now I believe the speed
issue must be related to the amount of memory that is getting used. It is
causing all of windows to slow to a crawl. gc.collect() still reports the
same quantity as before.
Don't know what to try next. Updated program is below:
from xml.dom import minidom
import os
from cStringIO import StringIO
class xmlProcessing:
""" General class for XML processing"""
def process(self, filename="", xmlString=""):
if xmlString:
pass
elif filename:
xmldoc = minidom.parse(filename)
self.parse( xmldoc.documentElement )
def parseBranch(self, parentNode):
""" Process an XML branch """
for node in parentNode.childNodes:
try:
parseMethod = getattr(self, "parse_%s" %
node.__class__.__name__)
except AttributeError:
continue
if parseMethod(node):
continue
self.parseBranch(node)
del node
def parse_Document(self, node):
pass
def parse_Text(self, node):
pass
def parse_Comment(self, node):
pass
def parse_Element(self, node):
try:
handlerMethod = getattr(self, "do_%s" % node.tagName)
except AttributeError:
return False
handlerMethod(node)
return True
class reptorParsing(xmlProcessing):
""" Specific class for generating a SQLalchemy program to create tables
and populate them with data"""
def __init__(self):
self.schemaPreface = StringIO()
self.schemaPreface.write("""from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///tutorial.db', echo=False)
metadata = MetaData()
Base = declarative_base()""")
self.schemaTables = StringIO()
self.schemaFields = StringIO()
self.dataUpdate = StringIO()
self.tableDict = {}
self.tableName = StringIO()
self.tables = StringIO()
def parse(self, parentNode):
"""Main entry point to begin processing a XML document"""
self.parseBranch(parentNode)
# Properties such as schemaTables and .tables are populated by the
various methods below
fupdate=open(os.path.join(os.getcwd(), "update.py"), 'w')
if self.schemaTables:
fupdate.write("import schema\n")
f=open(os.path.join(os.getcwd(), "schema.py"), 'w')
f.write(self.schemaPreface+"\n"+self.schemaTables+
'\n' + "metadata.create_all(engine)\n"+
"print 'hello 2'")
f.close()
if self.tables:
fupdate.write(self.tables)
fupdate.close()
def do_TABLES(self, tableNode):
"""Process schema for tables"""
for node in tableNode.childNodes:
self.tableName = node.tagName
# Define a declaritive mapping class
self.schemaTables.write("""\nclass %s(Base):
__tablename__ = '%s'
""" % (self.tableName, self.tableName))
self.schemaFields = StringIO()
# allow for userA = users("Billy","Bob") via a __init__()
self.schemaInitPreface = StringIO()
self.schemaInitPreface.write(" def __init__(self")
self.schemaInitBody = StringIO()
self.parseBranch(node)
self.schemaInitPreface.write("):\n")
self.schemaTables.write(self.schemaFields.read() + "\n" + \
self.schemaInitPreface.read() + \
self.schemaInitBody.read() + "\n")
def do_FIELDS(self, fieldsNode):
"""Process schema for fields within tables"""
for node in fieldsNode.childNodes:
if self.schemaFields:
self.schemaFields.write("\n")
cType = ""
# The attribute type holds the type of field
crType = node.attributes["type"].value
if crType==u"C":
cType = "String(length=%s)" % node.attributes["len"].value
elif crType==u"N" and node.attributes["dec"].value==u'0':
cType = "Integer"
elif crType==u"N":
cType = "Numeric(precision=%s, scale=%s)" %
(node.attributes["len"].value,node.attributes["dec"].value)
elif crType==u"L":
cType = "Boolean"
elif crType==u"T":
cType = "DateTime"
elif crType==u"D":
cType = "Date"
elif crType==u"M" or crType==u"G":
cType = "Text"
if node.attributes.getNamedItem("primary"):
cType += ", primary_key=True"
self.schemaFields.write(" %s = Column(%s)" % (node.tagName,
cType))
self.schemaInitPreface.write(", \\\n %s" %
(node.tagName))
self.schemaInitBody.write(" self.%s = %s\n" %
(node.tagName, node.tagName))
self.tableDict[self.tableName + "." + node.tagName] = crType
def do_DATA(self, dataNode):
"""This is for processing actual data to be pushed into the tables
Layout is DATA -> TABLE_NAME key='primary_field' -> TUPLE ->
FIELD_NAME -> VALUE"""
for node in dataNode.childNodes:
self.tableName = node.tagName
self.dataUpdate=open(os.path.join(os.getcwd(), self.tableName +
"_update.py"), 'w')
self.dataUpdate.write("""
import time
from datetime import *
from sqlalchemy import *
from sqlalchemy.orm import *
engine = create_engine('sqlite:///tutorial.db', echo=False)
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()
""")
self.keyValue = ""
self.keyField = node.attributes["key"].value
self.parseBranch(node)
self.tables.write("\nimport %s_update.py" % (self.tableName))
# f.write(self.dataUpdate)
self.dataUpdate.close()
def do_TUPLE(self, tupleNode):
""" A TUPLE is what the XML file refers to a table row
Sits below a DATA child"""
self.dataUpdate.write("""
entry = %s()
session.add(entry)
""" % (self.tableName))
for node in tupleNode.childNodes:
for dataNode in node.childNodes:
crType = self.tableDict[self.tableName + "." + node.tagName]
if crType==u"C" or crType==u"M":
cValue = u'"""%s"""' % dataNode.data
elif crType==u"T":
cValue = 'datetime.strptime("'+dataNode.data+'",
"%Y-%m-%d %H:%M")'
elif crType==u"D":
cValue = 'datetime.strptime("'+dataNode.data+'",
"%Y-%m-%d")'
else:
cValue = dataNode.data
self.dataUpdate.write(u"\nentry."+node.tagName+ u" = " +
cValue)
self.dataUpdate.write("\nsession.commit()")
if __name__ == '__main__':
replicate = reptorParsing()
replicate.process(filename=os.path.join(os.getcwd(), "request.xml"))
import update
More information about the Python-list
mailing list