Python-list digest, Vol 1 #455 - 20 msgs

Lars Marius Garshol larsga at ifi.uio.no
Wed Jun 16 12:09:34 EDT 1999


* Matej Cepl
|
| That was exactly my question: how to make filter with htmllib.py
| doing nothing and just copying HTML input on output?

Here's a simple implementation. Note that you'll need to escape all
'<'s and '&'s in element content and in attribute values. And in
attribute values you'll also need to escape '"'s.

import sys,htmllib

class IDTransform(htmllib.HTMLParser):

    def __init__(self,out=sys.stdout):
        htmllib.HTMLParser.__init__(self,None)
        self.out=out
    
    def handle_pi(self,data):
        self.out.write("<?%s>\n" % data)

    def handle_starttag(self,tag,method,attributes):
        self.out.write("<%s " % tag)
        for pair in attributes:
            self.out.write("%s=\"%s\"" % pair)
        self.out.write(">\n")

    def handle_endtag(self,tag,method):
        self.out.write("</%s>\n" % tag)
        
    def unknown_entityref(self,name):
        self.out.write("&%s;" % name)        

    def unknown_charref(self,no):
        self.out.write("&#%s;" % no)

    def handle_data(self,data):
        self.out.write(data)


--Lars M.




More information about the Python-list mailing list