[XML-SIG] I am confused...
Martin v. Loewis
martin@mira.cs.tu-berlin.de
Mon, 29 Jan 2001 17:34:20 +0100
> I do not remember if this was what I used for measuring, but
> this was my another effort to create query-mechanisms
> (It doesnt work anymore due to lack of xml.dom.utils)
Thanks. I've ported it to minidom, see the code below. Fortunately,
the DOM implementations follow the official API quite closely these
days, so it is easy to move from one implementation to another.
Using Uche's 640k document, I get the following timings:
minidom: 6.4s
4DOM: 45s
pDomlette: 8.9s
cDomlette fails since it does not support createElement (pDomlette
only has create*NS operations, so I added None as the namespace
everywhere).
Remember, this is the same machine where Uche's cDomlette/XPath query
took 0.5s. So it *does* matter how exactly you approach a certain task
(you can easily get a factor of 90 between solutions). However, if I
had to guess in advance what the approximate outcome would have been
in each of the solutions, I had been totally wrong.
Regards,
Martin
#!/usr/local/bin/python
print "1. simple"
from xml.dom import minidom
from string import split, index
def portr(node):
typ = node.nodeType
value = node.nodeValue
name = node.nodeName
atts = node.attributes
par = node.parentNode
print "t ", typ, "v ",value, "n ",name, "a ", atts, "p ", par
class strstream:
def __init__(self, str):
self.str = str
# print "strstream init"
def read(self, n):
tmp = self.str[:n]
self.str = self.str[n:]
return tmp
def readline(self):
return self.str
def _normalize_tokens(tl):
""" rules:
$,word,$ --> $word$
"""
rules2 = {
("/","/") : "//",
(".","/") : "./",
("!","=") : "$ne$",
("<","=") : "$le$",
(">","=") : "$ge$",
("=","~") : "$match$",
("!","~") : "$no_match$",
(";",";") : ";",
}
rules1 = {
"=" : "$eq$",
"!" : "$lt$",
"<" : "$lt$",
">" : "$gt$",
}
ntl = []
i = 0
while i < len(tl)-1:
if rules2.has_key( tuple(tl[i:i+2]) ):
toapp = rules2[tuple(tl[i:i+2])]
i = i+2
else:
if tl[i] == "$":
if i+2 < len(tl):
toapp = tl[i] + tl[i+1] + tl[i+2]
i = i+3
else:
raise "Query error !!!" + `tl`
else:
toapp = tl[i]
i = i+1
if rules1.has_key( toapp ):
toapp = rules1[toapp]
ntl.append( toapp )
return ntl
def _parse_query(q):
from shlex import shlex
# i1 = index(q, "/")
lexer = shlex(strstream(q))
tokens = []
tt = lexer.get_token()
while tt:
tokens.append(tt)
tt = lexer.get_token()
return _normalize_tokens(tokens)
def find_all_descendants(node, cond):
return None # XXX !!! stub
def find_all_children(node, cond):
lst = []
exec(cond) ### must define condition !!!
for n in node.childNodes:
if condition(n):
lst.append(n)
return lst
class PYQL:
def __init__(self, file):
self.dom = minidom.parse(file)
self.docel = self.dom.documentElement
def query(self, q):
qr = self._query(self.docel, _parse_query(q), self.dom)
qel = self.dom.createElement("xql:result")
if qr:
qel.appendChild(qr)
qel.setAttribute("orig", str(q))
return qel
def _query(self, node, subq, qrdoc):
#print subq
#print find_all_children(node,
#"""def condition(n): return n.nodeName == "fig" """)
if subq[0] == "//":
self._query(node, subq[1:], qrdoc)
elif subq[0] == "/":
if subq[1] == node.nodeName:
if len(subq) > 2:
if subq[2] == "/":
qel = qrdoc.createElement(node.nodeName)
for a in node.attributes.keys():
qel.setAttribute(a, node.attributes[a].nodeValue)
for node1 in node.childNodes:
q2 = self._query(node1, subq[2:], qrdoc)
# print "q2: ", q2
if q2:
qel.appendChild(q2)
if len(qel.childNodes)==0:
del qel
return None
else:
return qel
else:
return node
else:
return node
else:
return None
a = PYQL('bigxml')
# a.query('$or$ != 1.23E-4 /article/text/topic$')
# print a.query('/article/text/topic.').toxml()
import time;start=time.time()
res=a.query('/article/author/name.').toxml()
print time.time()-start
print len(res)
# print a.query('//fig.').toxml()