[XML-SIG] I am confused...

Roman Suzi rnd@onego.ru
Tue, 30 Jan 2001 11:13:57 +0300 (MSK)


On Mon, 29 Jan 2001, Martin v. Loewis wrote:

>Using Uche's 640k document, I get the following timings:
>
>minidom: 6.4s
>4DOM: 45s
>pDomlette: 8.9s


My computer has only 64M of RAM - so I was not able to measure anything
because  the system just dig into swap...
(top showed 33M of memory used by Python... :-(

>cDomlette fails since it does not support createElement (pDomlette
>only has create*NS operations, so I added None as the namespace
>everywhere).
>
>Remember, this is the same machine where Uche's cDomlette/XPath query
>took 0.5s. So it *does* matter how exactly you approach a certain task
>(you can easily get a factor of 90 between solutions). However, if I
>had to guess in advance what the approximate outcome would have been
>in each of the solutions, I had been totally wrong.
>
>Regards,
>Martin
>
>#!/usr/local/bin/python
>
>print "1. simple"
>
>from xml.dom import minidom
>from string import split, index
>
>def portr(node):
>    typ = node.nodeType
>    value = node.nodeValue
>    name = node.nodeName
>    atts = node.attributes
>    par = node.parentNode
>    print "t ",   typ, "v ",value, "n ",name, "a ", atts, "p ", par
>
>class strstream:
>  def __init__(self, str):
>     self.str = str
>#     print "strstream init"
>
>  def read(self, n):
>     tmp = self.str[:n]
>     self.str = self.str[n:]
>     return tmp
>
>  def readline(self):
>     return self.str
>
>def _normalize_tokens(tl):
>    """ rules:
>    $,word,$ --> $word$
>    """
>    rules2 = {
>    ("/","/") : "//",
>    (".","/") : "./",
>    ("!","=") : "$ne$",
>    ("<","=") : "$le$",
>    (">","=") : "$ge$",
>    ("=","~") : "$match$",
>    ("!","~") : "$no_match$",
>    (";",";") : ";",
>    }
>
>    rules1 = {
>    "=" : "$eq$",
>    "!" : "$lt$",
>    "<" : "$lt$",
>    ">" : "$gt$",
>    }
>
>    ntl = []
>    i = 0
>    while i < len(tl)-1:
>      if rules2.has_key( tuple(tl[i:i+2]) ):
>        toapp = rules2[tuple(tl[i:i+2])]
>        i = i+2
>      else:
>        if tl[i] == "$":
>          if i+2 < len(tl):
>            toapp = tl[i] + tl[i+1] + tl[i+2]
>            i = i+3
>          else:
>            raise "Query error !!!" + `tl`
>        else:
>          toapp = tl[i]
>          i = i+1
>      if rules1.has_key( toapp ):
>        toapp = rules1[toapp]
>      ntl.append( toapp )
>    return ntl
>
>def _parse_query(q):
>    from shlex import shlex
>    #  i1 = index(q, "/")
>    lexer = shlex(strstream(q))
>    tokens = []
>    tt = lexer.get_token()
>    while tt:
>      tokens.append(tt)
>      tt = lexer.get_token()
>    return _normalize_tokens(tokens)
>
>def find_all_descendants(node, cond):
>    return None     # XXX !!! stub
>
>def find_all_children(node, cond):
>    lst = []
>    exec(cond)       ### must define condition !!!
>    for n in node.childNodes:
>      if condition(n):
>        lst.append(n)
>    return lst
>
>class PYQL:
>  def __init__(self, file):
>    self.dom = minidom.parse(file)
>    self.docel = self.dom.documentElement
>
>  def query(self, q):
>    qr = self._query(self.docel, _parse_query(q), self.dom)
>    qel = self.dom.createElement("xql:result")
>    if qr:
>      qel.appendChild(qr)
>    qel.setAttribute("orig", str(q))
>    return qel
>
>  def _query(self, node, subq, qrdoc):
>    #print subq
>    #print find_all_children(node,
>    #"""def condition(n): return n.nodeName == "fig" """)
>    if subq[0] == "//":
>      self._query(node, subq[1:], qrdoc)
>    elif subq[0] == "/":
>      if subq[1] == node.nodeName:
>        if len(subq) > 2:
>          if subq[2] == "/":
>            qel = qrdoc.createElement(node.nodeName)
>            for a in node.attributes.keys():
>              qel.setAttribute(a, node.attributes[a].nodeValue)
>            for node1 in node.childNodes:
>              q2 = self._query(node1, subq[2:], qrdoc)
>#              print "q2: ", q2
>              if q2:
>                 qel.appendChild(q2)
>            if len(qel.childNodes)==0:
>              del qel
>              return None
>            else:
>              return qel
>          else:
>            return node
>        else:
>          return node
>      else:
>        return None
>
>
>a = PYQL('bigxml')
>#  a.query('$or$ != 1.23E-4          /article/text/topic$')
>#  print a.query('/article/text/topic.').toxml()
>import time;start=time.time()
>res=a.query('/article/author/name.').toxml()
>print time.time()-start
>print len(res)
>#   print a.query('//fig.').toxml()
>


Sincerely yours, Roman Suzi
-- 
Vote for my design: http://silvermouse.onego.ru/gray.php3?id=0018
_/ Russia _/ Karelia _/ Petrozavodsk _/ rnd@onego.ru _/
_/ Tuesday, January 30, 2001 _/ Powered by Linux RedHat 6.2 _/
_/ "Give instruction to a wise man and he will be yet wiser." _/