xml.sax problem, help needed.
Hari Sekhon
hpsekhon at googlemail.com
Tue Aug 1 18:01:12 EDT 2006
I've written an xml parser using xml.sax which works most of the time
but often traces back when trying to read a file. The output of the
traceback is below:
Traceback (most recent call last):
File "/usr/lib/python2.4/site-packages/cherrypy/_cphttptools.py",
line 271, in run
main()
File "/usr/lib/python2.4/site-packages/cherrypy/_cphttptools.py",
line 502, in main
body = page_handler(*args, **cherrypy.request.paramMap)
File "netscan.py", line 160, in index
parse()
File "netscan.py", line 117, in parse
parser.parse ( scan_results )
File "/usr/lib/python2.4/xml/sax/expatreader.py", line 107, in parse
xmlreader.IncrementalParser.parse(self, source)
File "/usr/lib/python2.4/xml/sax/xmlreader.py", line 125, in parse
self.close()
File "/usr/lib/python2.4/xml/sax/expatreader.py", line 217, in close
self.feed("", isFinal = 1)
File "/usr/lib/python2.4/xml/sax/expatreader.py", line 211, in feed
self._err_handler.fatalError(exc)
File "/usr/lib/python2.4/xml/sax/handler.py", line 38, in fatalError
raise exception
SAXParseException: /var/log/netscan/scanresults.txt:8:0: no element found
I don't understand why it's telling me that no element is found. It
looks like a problem inside xml.sax, but I'm not sure if I've caused
it or how. The xml file is good and is generated by nmap, it's not
missing tags or anything and is quite small. My script code which has
generated this is below:
#!/usr/bin/env python
import xml.sax
import sys
import os
dir = '/var/log/netscan'
scan = 'scanresults.txt'
temp = 'tempscan.txt'
scan_results = dir + '/' + scan
temp_results = dir + '/' + temp
if not os.path.isdir(dir):
sys.exit("%s does not exist! exiting..." % dir)
network = [
# {
# "status" : "",
# "address" : "",
# "hostname" : "",
# "port[0]" : "",
# "protocol[0]" : "",
# "service[0]" : "",
# "state[0]" : "",
# "product[0]" : "",
# "version[0]" : "",
# "extrainfo[0]" : ""
# }
]
class scanparser( xml.sax.ContentHandler ):
def __init__(self):
self.host = {}
self.host['status'] = ""
self.host['address'] = ""
self.host['hostname'] = ""
self.host['port'] = []
self.host['protocol'] = []
self.host['service'] = []
self.host['state'] = []
self.host['product'] = []
self.host['version'] = []
self.host['extrainfo'] = []
def startElement(self,name,attributes):
global scan_start,scan_stop
if name =='nmaprun':
scan_start = attributes.getValue('startstr')
elif name == 'finished':
scan_stop = attributes.getValue('timestr')
elif name =='status':
self.host['status'] = attributes.getValue('state')
elif name == 'address':
if attributes.getValue('addrtype') == 'ipv4':
self.host['address'] = attributes.getValue('addr')
elif name == 'hostname':
self.host['hostname'] = attributes.getValue('name')
elif name == 'port':
self.host['port'].append( attributes.getValue('portid') )
self.host['protocol'].append( attributes.getValue('protocol') )
elif name == 'service':
self.host['service'].append( attributes.getValue('name') )
if attributes.has_key('product'):
self.host['product'].append( attributes.getValue('product') )
else:
self.host['product'].append("")
if attributes.has_key('version'):
self.host['version'].append( attributes.getValue('version') )
else:
self.host['version'].append('')
if attributes.has_key('extrainfo'):
self.host['extrainfo'].append(
attributes.getValue('extrainfo') )
else:
self.host['extrainfo'].append('')
elif name == 'state':
self.host['state'].append( attributes.getValue('state') )
def endElement(self,name):
if name == 'host':
network.append(self.host.copy())
self.host = {}
self.host['status'] = ""
self.host['address'] = ""
self.host['hostname'] = ""
self.host['port'] = []
self.host['protocol'] = []
self.host['service'] = []
self.host['state'] = []
self.host['product'] = []
self.host['version'] = []
self.host['extrainfo'] = []
def parse():
global network
parser = xml.sax.make_parser()
parser.setContentHandler( scanparser() )
network = []
parser.parse ( scan_results )
parse()
(Well, really it runs from cherrypy but for brevity I've left that out)
Any help or pointers anybody could give would be greatly appreciated...
Hari
More information about the Python-list
mailing list