[XML-SIG] Error using xml.sax.{xmlreader,expatreader}
Mike Orr
iron@mso.oz.net
Sun, 26 Nov 2000 17:25:34 -0800
--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
I'm getting an error trying to do a simple XML parse into a dictionary, using
Python 2.0 and PyXML 0.6.2. xml.make_parser() is choosing expatreader, and
expatreader says:
File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py", line 81, in feed
self._parser.Parse(data, isFinal)
TypeError: not enough arguments; expected 4, got 2
Attached is a small program which demonstrates this. Am I doing something
wrong or is there a bug in the library?
When I run the program, it says:
$ python -i esquimel_bug_test.py
Python version is:
2.0 (#10, Nov 11 2000, 20:39:18)
[GCC 2.95.2 20000220 (Debian GNU/Linux)]
XML version is 0.6.2
*** BEGIN XML DATA ***
<esquimel_data>
<!-- *** My comment *** -->
<field1>Mary</field1>
<field2>
had a little lamb</field2>
</esquimel_data>
*** END XML DATA ***
Traceback (most recent call last):
File "esquimel_bug_test.py", line 116, in ?
if __name__ == "__main__": main()
File "esquimel_bug_test.py", line 109, in main
result = fromxml(f)
File "esquimel_bug_test.py", line 44, in fromxml
parser.parse(fp)
File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py", line 42, in parse
xmlreader.IncrementalParser.parse(self, source)
File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/xmlreader.py", line 120, in parse
self.feed(buffer)
File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py", line 81, in feed
self._parser.Parse(data, isFinal)
TypeError: not enough arguments; expected 4, got 2
>>> import pdb
>>> pdb.pm()
> /opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py(82)feed()
-> except expat.error:
(Pdb) where
... lines deleted ...
> /opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py(82)feed()
-> except expat.error:
(Pdb) list
... lines deleted ...
81 self._parser.Parse(data, isFinal)
82 -> except expat.error:
... lines deleted ...
(Pdb) self._parser
<xmlparser object at 0x8167228> (Pdb) self._parser.Parse
<built-in method Parse of xmlparser object at 0x8167228> (Pdb) p data
'<esquimel_data>\012 <!-- *** My comment *** -->\012 <field1>Mary</field1>\012 <field2>\012had a little lamb</field2>\012</esquimel_data>\012' (Pdb) p isFinal
0
(Pdb) p self._parser.Parse.__doc__
"Parse(data[, isfinal])\012Parse XML data. `isfinal' should be true at end of input."
--
-Mike (Iron) Orr, iron@mso.oz.net (if mail problems: mso@jimpick.com)
http://mso.oz.net/ English * Esperanto * Russkiy * Deutsch * Espan~ol
--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="esquimel_bug_test.py"
#!/usr/bin/python
"""esquimel_bug_test.py -- Why is this giving an error?
"""
import os, pprint, string, sys, tempfile
import xml
import xml.sax
import xml.sax.saxutils
xmlfile = "/tmp/xmlfile.xml"
XML_ROOT_NAME = "esquimel_data"
True = (1==1); False = (1==0)
XML_DATA = """\
<esquimel_data>
<!-- *** My comment *** -->
<field1>Mary</field1>
<field2>
had a little lamb</field2>
</esquimel_data>
"""
################ EXCEPTIONS ##############
class DataError(Exception):
def __init__(self, *args):
if len(args) == 3:
what, expected, found = args
s = "expected %s '%s', found '%s'" % (what, expected, found)
args = (s,)
Exception.__init__(self, *args)
class NotFoundError(Exception): pass
################ XML FUNCTIONS #########
def fromxml(fp):
"""Read xml from the file object, return the tags inside the root tag as a
dictionary, tag name -> characters. Attributes are ignored.
"""
parser = xml.sax.make_parser()
content_handler = MyContentHandler()
error_handler = xml.sax.saxutils.ErrorRaiser()
parser.setContentHandler(content_handler)
parser.setErrorHandler(error_handler)
parser.parse(fp)
return content_handler.dic
class MyContentHandler(xml.sax.saxlib.ContentHandler):
"""Sets 'self.dic' to a dictionary containing the tags inside the root tag,
tag name -> characters. Attributes are ignored. Subtags (3rd-level tags) raise
DataError.
"""
def __init__(self):
self.dic = {}
self.in_root = False # Are we inside the root tag?
self.in_second = False # Are we inside a second-level tag?
def startElement(self, name, attrs):
"""attrs are ignored.
"""
if self.in_second: # Shouldn't happen.
raise DataError(
"a field should never have subtags (this tag = '%s')." % name)
elif self.in_root: # Begin a field.
self.field = name # The current field name.
self.chars = [] # Characters we've read for this field.
self.in_field = True
else: # Begin a record.
if name != XML_ROOT_NAME:
raise DataError("XML root tag", XML_ROOT_NAME, name)
self.in_table = True
def endElement(self, name):
if self.in_field: # Gather the field data.
chars = map(string.strip, self.chars)
chars = " ".join(chars)
chars = chars.strip()
self.fields[self.field] = chars
self.field = self.chars = None # Will raise TypeError if
# these are erroneously used before being reassigned.
self.in_field = False
elif self.in_root: # Leaving the root tag.
self.in_root = False
def characters(self, ch, start, length):
end = start + length
chars = ch[start:end]
if not self.in_field:
raise DataError("characters outside a field: %s" % `chars`)
self.chars.append(chars)
def main():
print "Python version is:\n", sys.version
print "XML version is ", xml.__version__
file = tempfile.mktemp()
f = open(file, 'w+')
f.write(XML_DATA)
f.seek(0)
print
print "*** BEGIN XML DATA ***"
print f.read()
print "*** END XML DATA ***"
f.seek(0)
try:
result = fromxml(f)
print "The result is:"
pprint.pprint(result)
finally:
f.close()
os.remove(file)
if __name__ == "__main__": main()
--SUOF0GtieIMvvwua--