[XML-SIG] I am confused...
Roman Suzi
rnd@onego.ru
Tue, 30 Jan 2001 11:13:57 +0300 (MSK)
On Mon, 29 Jan 2001, Martin v. Loewis wrote:
>Using Uche's 640k document, I get the following timings:
>
>minidom: 6.4s
>4DOM: 45s
>pDomlette: 8.9s
My computer has only 64M of RAM - so I was not able to measure anything
because the system just dig into swap...
(top showed 33M of memory used by Python... :-(
>cDomlette fails since it does not support createElement (pDomlette
>only has create*NS operations, so I added None as the namespace
>everywhere).
>
>Remember, this is the same machine where Uche's cDomlette/XPath query
>took 0.5s. So it *does* matter how exactly you approach a certain task
>(you can easily get a factor of 90 between solutions). However, if I
>had to guess in advance what the approximate outcome would have been
>in each of the solutions, I had been totally wrong.
>
>Regards,
>Martin
>
>#!/usr/local/bin/python
>
>print "1. simple"
>
>from xml.dom import minidom
>from string import split, index
>
>def portr(node):
> typ = node.nodeType
> value = node.nodeValue
> name = node.nodeName
> atts = node.attributes
> par = node.parentNode
> print "t ", typ, "v ",value, "n ",name, "a ", atts, "p ", par
>
>class strstream:
> def __init__(self, str):
> self.str = str
># print "strstream init"
>
> def read(self, n):
> tmp = self.str[:n]
> self.str = self.str[n:]
> return tmp
>
> def readline(self):
> return self.str
>
>def _normalize_tokens(tl):
> """ rules:
> $,word,$ --> $word$
> """
> rules2 = {
> ("/","/") : "//",
> (".","/") : "./",
> ("!","=") : "$ne$",
> ("<","=") : "$le$",
> (">","=") : "$ge$",
> ("=","~") : "$match$",
> ("!","~") : "$no_match$",
> (";",";") : ";",
> }
>
> rules1 = {
> "=" : "$eq$",
> "!" : "$lt$",
> "<" : "$lt$",
> ">" : "$gt$",
> }
>
> ntl = []
> i = 0
> while i < len(tl)-1:
> if rules2.has_key( tuple(tl[i:i+2]) ):
> toapp = rules2[tuple(tl[i:i+2])]
> i = i+2
> else:
> if tl[i] == "$":
> if i+2 < len(tl):
> toapp = tl[i] + tl[i+1] + tl[i+2]
> i = i+3
> else:
> raise "Query error !!!" + `tl`
> else:
> toapp = tl[i]
> i = i+1
> if rules1.has_key( toapp ):
> toapp = rules1[toapp]
> ntl.append( toapp )
> return ntl
>
>def _parse_query(q):
> from shlex import shlex
> # i1 = index(q, "/")
> lexer = shlex(strstream(q))
> tokens = []
> tt = lexer.get_token()
> while tt:
> tokens.append(tt)
> tt = lexer.get_token()
> return _normalize_tokens(tokens)
>
>def find_all_descendants(node, cond):
> return None # XXX !!! stub
>
>def find_all_children(node, cond):
> lst = []
> exec(cond) ### must define condition !!!
> for n in node.childNodes:
> if condition(n):
> lst.append(n)
> return lst
>
>class PYQL:
> def __init__(self, file):
> self.dom = minidom.parse(file)
> self.docel = self.dom.documentElement
>
> def query(self, q):
> qr = self._query(self.docel, _parse_query(q), self.dom)
> qel = self.dom.createElement("xql:result")
> if qr:
> qel.appendChild(qr)
> qel.setAttribute("orig", str(q))
> return qel
>
> def _query(self, node, subq, qrdoc):
> #print subq
> #print find_all_children(node,
> #"""def condition(n): return n.nodeName == "fig" """)
> if subq[0] == "//":
> self._query(node, subq[1:], qrdoc)
> elif subq[0] == "/":
> if subq[1] == node.nodeName:
> if len(subq) > 2:
> if subq[2] == "/":
> qel = qrdoc.createElement(node.nodeName)
> for a in node.attributes.keys():
> qel.setAttribute(a, node.attributes[a].nodeValue)
> for node1 in node.childNodes:
> q2 = self._query(node1, subq[2:], qrdoc)
># print "q2: ", q2
> if q2:
> qel.appendChild(q2)
> if len(qel.childNodes)==0:
> del qel
> return None
> else:
> return qel
> else:
> return node
> else:
> return node
> else:
> return None
>
>
>a = PYQL('bigxml')
># a.query('$or$ != 1.23E-4 /article/text/topic$')
># print a.query('/article/text/topic.').toxml()
>import time;start=time.time()
>res=a.query('/article/author/name.').toxml()
>print time.time()-start
>print len(res)
># print a.query('//fig.').toxml()
>
Sincerely yours, Roman Suzi
--
Vote for my design: http://silvermouse.onego.ru/gray.php3?id=0018
_/ Russia _/ Karelia _/ Petrozavodsk _/ rnd@onego.ru _/
_/ Tuesday, January 30, 2001 _/ Powered by Linux RedHat 6.2 _/
_/ "Give instruction to a wise man and he will be yet wiser." _/