xml.sax problem, help needed.

Hari Sekhon hpsekhon at googlemail.com
Tue Aug 1 18:01:12 EDT 2006


I've written an xml parser using xml.sax which works most of the time
but often traces back when trying to read a file. The output of the
traceback is below:

Traceback (most recent call last):
  File "/usr/lib/python2.4/site-packages/cherrypy/_cphttptools.py",
line 271, in run
    main()
  File "/usr/lib/python2.4/site-packages/cherrypy/_cphttptools.py",
line 502, in main
    body = page_handler(*args, **cherrypy.request.paramMap)
  File "netscan.py", line 160, in index
    parse()
  File "netscan.py", line 117, in parse
    parser.parse ( scan_results )
  File "/usr/lib/python2.4/xml/sax/expatreader.py", line 107, in parse
    xmlreader.IncrementalParser.parse(self, source)
  File "/usr/lib/python2.4/xml/sax/xmlreader.py", line 125, in parse
    self.close()
  File "/usr/lib/python2.4/xml/sax/expatreader.py", line 217, in close
    self.feed("", isFinal = 1)
  File "/usr/lib/python2.4/xml/sax/expatreader.py", line 211, in feed
    self._err_handler.fatalError(exc)
  File "/usr/lib/python2.4/xml/sax/handler.py", line 38, in fatalError
    raise exception
SAXParseException: /var/log/netscan/scanresults.txt:8:0: no element found


I don't understand why it's telling me that no element is found. It
looks like a problem inside xml.sax, but I'm not sure if I've caused
it or how. The xml file is good and is generated by nmap, it's not
missing tags or anything and is quite small. My script code which has
generated this is below:


#!/usr/bin/env python

import xml.sax
import sys
import os

dir  = '/var/log/netscan'
scan = 'scanresults.txt'
temp = 'tempscan.txt'

scan_results = dir + '/' + scan
temp_results = dir + '/' + temp

if not os.path.isdir(dir):
    sys.exit("%s does not exist! exiting..." % dir)

network   = [
#                {
#                "status"        :   "",
#                "address"       :   "",
#                "hostname"      :   "",
#                "port[0]"       :   "",
#                "protocol[0]"   :   "",
#                "service[0]"    :   "",
#                "state[0]"      :   "",
#                "product[0]"    :   "",
#                "version[0]"    :   "",
#                "extrainfo[0]"  :   ""
#                }
            ]

class scanparser( xml.sax.ContentHandler ):
    def __init__(self):
        self.host                   =  {}
        self.host['status']         =  ""
        self.host['address']        =  ""
        self.host['hostname']       =  ""
        self.host['port']           =  []
        self.host['protocol']       =  []
        self.host['service']        =  []
        self.host['state']          =  []
        self.host['product']        =  []
        self.host['version']        =  []
        self.host['extrainfo']      =  []


    def startElement(self,name,attributes):
        global scan_start,scan_stop
        if name =='nmaprun':
            scan_start = attributes.getValue('startstr')
        elif name == 'finished':
            scan_stop = attributes.getValue('timestr')
        elif name =='status':
            self.host['status'] = attributes.getValue('state')

        elif name == 'address':
            if attributes.getValue('addrtype') == 'ipv4':
                self.host['address'] = attributes.getValue('addr')

        elif name == 'hostname':
            self.host['hostname'] = attributes.getValue('name')

        elif name == 'port':
            self.host['port'].append( attributes.getValue('portid') )
            self.host['protocol'].append( attributes.getValue('protocol') )

        elif name == 'service':
            self.host['service'].append( attributes.getValue('name') )
            if attributes.has_key('product'):
                self.host['product'].append( attributes.getValue('product') )
            else:
                self.host['product'].append("")
            if attributes.has_key('version'):
                self.host['version'].append( attributes.getValue('version') )
            else:
                self.host['version'].append('')
            if attributes.has_key('extrainfo'):
                self.host['extrainfo'].append(
attributes.getValue('extrainfo') )
            else:
                self.host['extrainfo'].append('')

        elif name == 'state':
            self.host['state'].append( attributes.getValue('state') )


    def endElement(self,name):
        if name == 'host':
            network.append(self.host.copy())
            self.host                   =  {}
            self.host['status']         =  ""
            self.host['address']        =  ""
            self.host['hostname']       =  ""
            self.host['port']           =  []
            self.host['protocol']       =  []
            self.host['service']        =  []
            self.host['state']          =  []
            self.host['product']        =  []
            self.host['version']        =  []
            self.host['extrainfo']      =  []


def parse():
    global network
    parser = xml.sax.make_parser()
    parser.setContentHandler( scanparser() )
    network = []
    parser.parse ( scan_results )

parse()

(Well, really it runs from cherrypy but for brevity I've left that out)


Any help or pointers anybody could give would be greatly appreciated...


Hari



More information about the Python-list mailing list