which datastructure for fast sorted insert?

notnorwegian at yahoo.se notnorwegian at yahoo.se
Sun May 25 21:42:06 EDT 2008


Traceback (most recent call last):
  File "C:/Python25/Progs/WebCrawler/spider2.py", line 47, in <module>
    x = scrapeSites("http://www.yahoo.com")
  File "C:/Python25/Progs/WebCrawler/spider2.py", line 31, in
scrapeSites
    site = iterator.next()
RuntimeError: Set changed size during iteration


def joinSets(set1, set2):
    for i in set2:
        set1.add(i)
    return set1

def scrapeSites(startAddress):
    site = startAddress
    sites = set()
    iterator = iter(sites)
    pos = 0
    while pos < 10:#len(sites):
        newsites = scrapeSite(site)
        joinSets(sites, newsites)
        pos += 1
        site = iterator.next()
    return sites

def scrapeSite(address):
    toSet = set()
    site = urllib.urlopen(address)
    for row in site:
        obj = url.search(row)
        if obj != None:
            toSet.add(obj.group())
    return toSet


wtf? im not multithreading or anything so how can the size change here?



More information about the Python-list mailing list