[XML-SIG] BUG: External entity in the internal subset

Juergen Hermann Juergen Hermann" <jh@web.de
Thu, 07 Jun 2001 17:08:53 +0200


Hi!

This week I had "fun" with external entities. ;)

Consider this output (see file listings below):

> python2.1 enttest.py
(0, 6, 5)
+++ parser is <xml.sax.drivers2.drv_xmlproc.XmlprocDriver instance at 0x81f9d1c>
(None, 'root') root []
(None, 'param') param [((None, 'name'), 'http-port'), ((None, 'value'), '80')]
(None, 'param') param [((None, 'name'), 'http-ssl'), ((None, 'value'), '443')] 

Now, if I move the "HTTP_SSL" entity so it appears _after_ the external entity,
like this:

<?xml version="1.0" encoding="ISO-8859-1"?>

<!DOCTYPE root [
    <!ELEMENT root (param*)>
    <!ELEMENT param EMPTY>
    <!ATTLIST param 
        name NMTOKEN #REQUIRED
        value CDATA #REQUIRED
    >

    <!ENTITY % env SYSTEM "enttest.ent">
    %env;

    <!ENTITY HTTP_SSL   "443">
]>

<root>
  <param name="http-port"  value="&HTTP_PORT;"/>
  <param name="http-ssl"   value="&HTTP_SSL;"/>
</root>

... it is not found anymore (no change to the script) and produces this traceback.

> python2.1 enttest.py

(0, 6, 5)
+++ parser is <xml.sax.drivers2.drv_xmlproc.XmlprocDriver instance at 0x81f9d1c>
(None, 'root') root []
(None, 'param') param [((None, 'name'), 'http-port'), ((None, 'value'), '80')]
Traceback (most recent call last):
  File "enttest.py", line 35, in ?
    test = TestLoader('enttest.xml')
  File "enttest.py", line 27, in __init__
    parser.parse(self.fileurl)
  File "/netsite/lib/python2.1/_xmlplus/sax/drivers2/drv_xmlproc.py", line 90, in parse
    parser.read_from(source.getByteStream(), bufsize)
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlval.py", line 105, in read_from
    self.parser.read_from(file,bufsize)
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlutils.py", line 137, in read_from
    self.feed(buf)
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlutils.py", line 185, in feed
    self.do_parse()
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlproc.py", line 96, in do_parse
    self.parse_start_tag()                        
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlproc.py", line 161, in parse_start_tag
    a_val=self.parse_att_val()
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlproc.py", line 245, in parse_att_val
    self.report_error(3021,name) ## FIXME: Check standalone dcl
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlproc.py", line 63, in report_error
    EntityParser.report_error(self,number,args)
  File "/netsite/lib/python2.1/_xmlplus/parsers/xmlproc/xmlutils.py", line 372, in report_error
    self.err.fatal(msg)
  File "/netsite/lib/python2.1/_xmlplus/sax/drivers2/drv_xmlproc.py", line 214, in fatal
    self._err_handler.fatalError(saxlib.SAXParseException(msg, None, self))
  File "/netsite/lib/python2.1/_xmlplus/sax/handler.py", line 38, in fatalError
    raise exception
xml.sax._exceptions.SAXParseException: file:///export/home/jhe/tmp/enttest.xml:19:44: Undeclared entity 'HTTP_SSL'


~~~ SNIP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

==> enttest.py <==
import os
import xml.sax
import xml.sax.saxutils
import xml.sax.handler
import xml.sax.sax2exts

class TestLoader(xml.sax.saxutils.DefaultHandler):

    def __init__(self, filename):
        self.fileurl = 'file://' + os.path.abspath(filename)

        # create parser
        parser = xml.sax.sax2exts.XMLValParserFactory.make_parser()
        print '+++ parser is', parser
        parser.setFeature(xml.sax.handler.feature_namespaces, 1)
        parser.setFeature(xml.sax.handler.feature_validation, 1)
        parser.setFeature(xml.sax.handler.feature_external_ges, 1)
        parser.setFeature(xml.sax.handler.feature_external_pes, 1)

        # set handlers
        parser.setContentHandler(self)
        parser.setDTDHandler(self)
        parser.setErrorHandler(self)
        parser.setEntityResolver(self)

        # parse the XML into events
        parser.parse(self.fileurl)

    def startElementNS(self, name, qname, attrs):
        print name, qname, attrs.items()


if __name__ == "__main__":
    print xml.version_info
    test = TestLoader('enttest.xml')
==> enttest.xml <==
<?xml version="1.0" encoding="ISO-8859-1"?>

<!DOCTYPE root [
    <!ELEMENT root (param*)>
    <!ELEMENT param EMPTY>
    <!ATTLIST param 
        name NMTOKEN #REQUIRED
        value CDATA #REQUIRED
    >

    <!ENTITY HTTP_SSL   "443">

    <!ENTITY % env SYSTEM "enttest.ent">
    %env;
]>

<root>
  <param name="http-port"  value="&HTTP_PORT;"/>
  <param name="http-ssl"   value="&HTTP_SSL;"/>
</root>
==> enttest.ent <==
<?xml version="1.0" encoding="ISO-8859-1"?>

<!ENTITY HTTP_PORT  "80">