[XML-SIG] SAX prettyprinter (was:Pretty-printing DOM trees)

Christian Tismer tismer@appliedbiometrics.com
Fri, 22 Jan 1999 19:14:05 +0100


This is a multi-part message in MIME format.
--------------2FF7655D01AF62D3F1B5AD1E
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

A.M. Kuchling wrote:
> 
> The format() function below pretty-prints a DOM tree.  It strips away
> all the whitespace, and then inserts Text nodes containing white
> space, producing output like this:
> 
> <?xml version="1.0"?>
> <?IS10744:arch name="xsa"?>
> <HTML>
>     <HEAD>
>         <TITLE>xmlproc: A Python XML parser</TITLE>
>         <META xsa='last-release' VALUE='19980718'/>
>     </HEAD>
>     <BODY>
>         <H1>
>             <SPAN xsa='name'>xmlproc</SPAN>: A Python XML parser
>        </H1>
>    </BODY>
> </HTML>

I wrote something similar for the SAX interface.

indenter.py is appended.

ciao - chris

-- 
Christian Tismer             :^)   <mailto:tismer@appliedbiometrics.com>
Applied Biometrics GmbH      :     Have a break! Take a ride on Python's
Kaiserin-Augusta-Allee 101   :    *Starship* http://starship.skyport.net
10553 Berlin                 :     PGP key -> http://pgp.ai.mit.edu/
     we're tired of banana software - shipped green, ripens at home
--------------2FF7655D01AF62D3F1B5AD1E
Content-Type: text/plain; charset=us-ascii; name="indenter.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="indenter.py"


# pretty printer for SAX
# CT990122
# based upon the saxutils.Canonizer code

from xml.sax import saxexts, saxlib, saxutils


import sys

class Indenter(saxlib.HandlerBase):
    "A SAX document handler that produces indented XML output."

    def __init__(self,writer=sys.stdout, indent=2):
        self.elem_level=0
        self.writer=writer
        self.indent=indent
        self.last_level=-1
    
    def processingInstruction (self,target, remainder):
        if not target=="xml":
            self.writer.write("<?"+target+" "+remainder+"?>\n")

    def startElement(self,name,amap):
        self.writer.write("\n"+self.indent*self.elem_level*" "+"<"+name)
        
        a_names=amap.keys()
        a_names.sort()

        for a_name in a_names:
            self.writer.write(" "+a_name+"=\"")
            self.write_data(amap[a_name])
            self.writer.write("\"")
        self.writer.write(">")
        self.last_level = self.elem_level
        self.elem_level=self.elem_level+1

    def endElement(self,name):
        self.elem_level=self.elem_level-1
        if self.last_level < self.elem_level:
            self.writer.write("\n"+self.indent*self.elem_level*" "+"</"+name+">")
        else:
            self.writer.write("</"+name+">")
            self.last_level = -1

    def ignorableWhitespace(self,data,start_ix,length):
        # we drop white space here.
        # self.characters(data,start_ix,length)
        
    def characters(self,data,start_ix,length):
        if self.elem_level>0:
            self.write_data(data[start_ix:start_ix+length])
            
    def write_data(self,data):
        "Writes datachars to writer."
        data=string.replace(data,"&","&amp;")
        data=string.replace(data,"<","&lt;")
        data=string.replace(data,"\"","&quot;")
        data=string.replace(data,">","&gt;")
#        data=string.replace(data,chr(9),"&#9;")
#        data=string.replace(data,chr(10),"&#10;")
#        data=string.replace(data,chr(13),"&#13;")
#        data = string.strip(data)
        self.writer.write(data)
        
    def endDocument(self):
        self.writer.write("\n")
        try:
            pass #self.writer.close()
        except NameError:
            pass # It's OK, if the method isn't there we probably don't need it


--------------2FF7655D01AF62D3F1B5AD1E--