Newbie, list has no attribute iteritems

rabad ruabag at gmail.com
Fri Jul 4 05:37:38 EDT 2008


Hi,
I've created a custom filter based on HTMLParser, with the following
source:

class Filter(HTMLParser):

    def __init__(self, keyfile):
        HTMLParser.__init__(self)
        mykwfile = open(keyfile, 'r')
        self._keywords = []
        for kw in mykwfile.read().split('\n'):
            self._keywords.append(kw)
            print kw
        mykwfile.close()
        self._toProcess = False
        self.stack = []

    def handle_starttag(self, tag, attrs):
        if 'a' != tag:
            self.stack.append(self.__html_start_tag(tag, attrs))
            return
        attrs = dict(attrs)
        self._toProcess = True
        for key in self._keywords:
            if 'a' == tag:
                p = re.compile(key, re.IGNORECASE)
                if 'href' in attrs:
                    attrs['href'] = p.sub(r'XXXXX',attrs['href'])
        self.stack.append(self.__html_start_tag(tag, attrs))

    def handle_startendtag(self, tag, attrs):
        if 'img' != tag and 'meta' != tag:
            self.stack.append(self.__html_startend_tag(tag, attrs))
            return
        attrs = dict(attrs)
        self._toProcess = True
        for key in self._keywords:
            p = re.compile(key, re.IGNORECASE)
            if 'img' == tag:
                if 'src' in attrs:
                    attrs['src'] = p.sub(r'XXXXX',attrs['src'])
                if 'alt' in attrs:
                    attrs['alt'] = p.sub(r'XXXXX',attrs['alt'])
            if 'meta' == tag:
                if 'description' in attrs:
                    attrs['description'] =
p.sub(r'XXXXX',attrs['description'])
                if 'content' in attrs:
                    attrs['content'] =
p.sub(r'XXXXX',attrs['content'])
        if 'meta' == tag or 'img' == tag:
            self._toProcess = False
        self.stack.append(self.__html_startend_tag(tag, attrs))

    def handle_endtag(self, tag):
        self.stack.append(self.__html_end_tag(tag))
        if self._toProcess:
            self._toProcess = False

    def handle_data(self, data):
        if self._toProcess:
            for key in self._keywords:
                p = re.compile(key,re.IGNORECASE)
                data = p.sub(r'XXXXX',data)
        self.stack.append(data)

    def __html_start_tag(self, tag, attrs):
        return '<%s%s>' % (tag, self.__html_attrs(attrs))

    def __html_startend_tag(self, tag, attrs):
        return '<%s%s/>' % (tag, self.__html_attrs(attrs))

    def __html_end_tag(self, tag):
            return '</%s>' % (tag)

    def __html_attrs(self, attrs):
        _attrs = ''
        if attrs:
            _attrs = ' %s' % (' '.join([('%s="%s"' % (k,v)) for k,v in
attrs.iteritems()]))
        return _attrs

But when I use it, it gives me the following error message:
  ERROR  Processor exception: AttributeError: 'list' object has no
attribute 'it
eritems'
Traceback (most recent call last):
  File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 87, in
Process
    p.feed(document.GetValue("data"))
  File "HTMLParser.py", line 108, in feed
  File "HTMLParser.py", line 148, in goahead
  File "HTMLParser.py", line 281, in parse_starttag
  File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 121, in
handle_startt
ag
    self.stack.append(self.__html_start_tag(tag, attrs))
  File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 167, in
__html_start_
tag
    return '<%s%s>' % (tag, self.__html_attrs(attrs))
  File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 178, in
__html_attrs
    _attrs = ' %s' % (' '.join([('%s="%s"' % (k,v)) for k,v in
attrs.iteritems()
]))

Anybody knows why it says attrs is not a list element?
Thanks,
Rubén



More information about the Python-list mailing list