xml SAX Parsing in python
Abubakar Roko
abroko at yahoo.com
Wed Dec 17 01:30:14 EST 2014
Good day,
Please I am new in using python to write program. I am trying to parse an XML document using sax parse and store the parsed result in a tree like definedbelow. XNode class define an xml element which has an ID , a tag, a text value, children element and a parent element
class XNode(object):
def __init__(self, ID ="", elmName="", elmValue="", parent=None):
self.ID = ID self.elmName=elmName self.elmValue=elmValue self.childs=[] self.parent=parent
def getPath(self): if self.parent is None: return self.elmName else: return self.parent.getPath()+"/"+ self.elmName
I wrote a program that parse an XML document , convert the document into the tree like structure defined above and then return the parsed result tothe program that call it. The program shown below.
import xml.saximport XMLnode as n
class XML_Handler ( xml.sax.ContentHandler):
def __init__(self, root): self.root = root self.tmp = n.XNode()
def startElement(self, tag, attributes): #if self.root != None:
if self.root is not None:
if len(self.tmp.childs) < 10: ID = self.tmp.ID +"." + "0" + str( len(self.tmp.childs)) else: ID = self.tmp.ID +"." + str( len(self.tmp.childs)) self.tmp.childs.append( n.XNode(ID,tag,"",self.tmp))
self.tmp= self.tmp.childs[len(self.tmp.childs)-1] else: print "0", tag, self.tmp.getPath() self.root= n.XNode("0", tag,"",None) self.tmp=self.root
def characters(self, content): self.tmp.elmValue += content.strip()
def endElement(self, tag): self.tmp= self.tmp.parent
def parse(self, f): xml.sax.parse(self,f) return self.root
if ( __name__ == "__main__"):
parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 0) root = None Handler = XML_Handler(root) parser.setContentHandler( Handler ) treRoot= parser.parse("Movies.xml") print treRoot
Can somebody help me answer the following questionMy Question is how do I return the parsed result through the root instance variable of of XML_Handler classI try to do it but i always get None as answerI am using Window 7 professional and python 2.7
Thank you
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20141217/9f036e7e/attachment.html>
More information about the Python-list
mailing list