htmllib & do_meta question

Marcus Schopen marcus.schopen at uni-bielefeld.de
Thu May 2 23:13:13 EDT 2002


Hi,

I use this for parsing a HTML File for META TAGS description and
keywords. It seems to work fine. Did I miss something or is there a
better way to do this?

Thanks
Marcus

class WPage(htmllib.HTMLParser):

    def __init__(self, verbose=0):
        self.description = ""
        self.keywords = ""
        htmllib.HTMLParser.__init__(self, formatter.NullFormatter(),
verbose)

    def do_meta(self, attributes):
        name = content = ""
        for key, value in attributes:
            if key == "name":
                name = value
            elif key == "content":
                content = value
        if string.lower(name) == "description":
            self.description = string.strip(content)
        elif string.lower(name) == "keywords":
            self.keywords = string.strip(content)



file = open("test.html")
html = file.read()
file.close()

p = WPage()
p.feed(html)
p.close()

descr = p.description
keys = p.keywords

-- 
Petition for a Software Patent Free Europe 
http://petition.eurolinux.org



More information about the Python-list mailing list