[XML-SIG] Error using xml.sax.{xmlreader,expatreader}

Mike Orr iron@mso.oz.net
Sun, 26 Nov 2000 17:25:34 -0800


--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I'm getting an error trying to do a simple XML parse into a dictionary, using
Python 2.0 and PyXML 0.6.2.  xml.make_parser() is choosing expatreader, and
expatreader says:

  File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py", line 81, in feed
    self._parser.Parse(data, isFinal)
TypeError: not enough arguments; expected 4, got 2

Attached is a small program which demonstrates this.  Am I doing something
wrong or is there a bug in the library?

When I run the program, it says:

$ python -i esquimel_bug_test.py
Python version is:
2.0 (#10, Nov 11 2000, 20:39:18)
[GCC 2.95.2 20000220 (Debian GNU/Linux)]
XML version is  0.6.2
 
*** BEGIN XML DATA ***
<esquimel_data>
  <!-- *** My comment *** -->
  <field1>Mary</field1>
  <field2>
had a little lamb</field2>
</esquimel_data>
 
*** END XML DATA ***
Traceback (most recent call last):
  File "esquimel_bug_test.py", line 116, in ?
    if __name__ == "__main__":  main()
  File "esquimel_bug_test.py", line 109, in main
    result = fromxml(f)
  File "esquimel_bug_test.py", line 44, in fromxml
    parser.parse(fp)
  File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py", line 42, in parse
    xmlreader.IncrementalParser.parse(self, source)
  File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/xmlreader.py", line 120, in parse
    self.feed(buffer)
  File "/opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py", line 81, in feed
    self._parser.Parse(data, isFinal)
TypeError: not enough arguments; expected 4, got 2
>>> import pdb
>>> pdb.pm()
> /opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py(82)feed()
-> except expat.error:
(Pdb)  where
... lines deleted ...
> /opt/python-2.0/lib/python2.0/site-packages/_xmlplus/sax/expatreader.py(82)feed()
-> except expat.error:                                                                  
(Pdb)  list
... lines deleted ...
 81                 self._parser.Parse(data, isFinal)
 82  ->         except expat.error:                                                                                                                                        
... lines deleted ...
(Pdb) self._parser
<xmlparser object at 0x8167228>                                                         (Pdb) self._parser.Parse
<built-in method Parse of xmlparser object at 0x8167228>                                (Pdb) p data
'<esquimel_data>\012  <!-- *** My comment *** -->\012  <field1>Mary</field1>\012  <field2>\012had a little lamb</field2>\012</esquimel_data>\012'                               (Pdb) p isFinal
0
(Pdb) p self._parser.Parse.__doc__                                                      
"Parse(data[, isfinal])\012Parse XML data.  `isfinal' should be true at end of input."

-- 
-Mike (Iron) Orr, iron@mso.oz.net  (if mail problems: mso@jimpick.com)
   http://mso.oz.net/     English * Esperanto * Russkiy * Deutsch * Espan~ol

--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="esquimel_bug_test.py"

#!/usr/bin/python
"""esquimel_bug_test.py -- Why is this giving an error?

"""
import os, pprint, string, sys, tempfile
import xml
import xml.sax
import xml.sax.saxutils

xmlfile = "/tmp/xmlfile.xml"
XML_ROOT_NAME = "esquimel_data"
True = (1==1);  False = (1==0)

XML_DATA = """\
<esquimel_data>
  <!-- *** My comment *** -->
  <field1>Mary</field1>
  <field2>
had a little lamb</field2>
</esquimel_data>
"""

################ EXCEPTIONS ##############
class DataError(Exception):
	def __init__(self, *args):
		if len(args) == 3:
			what, expected, found = args
			s = "expected %s '%s', found '%s'" % (what, expected, found)
			args = (s,)
		Exception.__init__(self, *args)
class NotFoundError(Exception):  pass


################ XML FUNCTIONS #########
def fromxml(fp):
	"""Read xml from the file object, return the tags inside the root tag as a
	dictionary, tag name -> characters.  Attributes are ignored.
	"""
	parser = xml.sax.make_parser()
	content_handler = MyContentHandler()
	error_handler = xml.sax.saxutils.ErrorRaiser()
	parser.setContentHandler(content_handler)
	parser.setErrorHandler(error_handler)
	parser.parse(fp)
	return content_handler.dic


class MyContentHandler(xml.sax.saxlib.ContentHandler):
	"""Sets 'self.dic' to a dictionary containing the tags inside the root tag,
	tag name -> characters.  Attributes are ignored.  Subtags (3rd-level tags) raise
	DataError.
	"""
	def __init__(self):
		self.dic = {}
		self.in_root = False # Are we inside the root tag?
		self.in_second = False # Are we inside a second-level tag?
	

	def startElement(self, name, attrs):
		"""attrs are ignored.
		"""
		if   self.in_second: # Shouldn't happen.
			raise DataError(
"a field should never have subtags (this tag = '%s')." % name)
		elif self.in_root: # Begin a field.
			self.field = name # The current field name.
			self.chars = [] # Characters we've read for this field.
			self.in_field = True
		else: # Begin a record.
			if name != XML_ROOT_NAME:
				raise DataError("XML root tag", XML_ROOT_NAME, name)
			self.in_table = True


	def endElement(self, name):
		if   self.in_field: # Gather the field data.
			chars = map(string.strip, self.chars)
			chars = " ".join(chars)
			chars = chars.strip()
			self.fields[self.field] = chars
			self.field = self.chars = None # Will raise TypeError if
				# these are erroneously used before being reassigned.
			self.in_field = False
		elif self.in_root: # Leaving the root tag.
			self.in_root = False
		
			
	def characters(self, ch, start, length):
		end = start + length
		chars = ch[start:end]
		if not self.in_field:
			raise DataError("characters outside a field: %s" % `chars`)
		self.chars.append(chars)

	
def main():
	print "Python version is:\n", sys.version
	print "XML version is ", xml.__version__
	file = tempfile.mktemp()
	f = open(file, 'w+')
	f.write(XML_DATA)
	f.seek(0)
	print
	print "*** BEGIN XML DATA ***"
	print f.read()
	print "*** END XML DATA ***"
	f.seek(0)
	try:
		result = fromxml(f)
		print "The result is:"
		pprint.pprint(result)
	finally:
		f.close()
		os.remove(file)

if __name__ == "__main__":  main()

--SUOF0GtieIMvvwua--