get links?

Julius Welby jwelby at waitrose.com
Fri May 4 18:16:04 EDT 2001


This might help. I've been learning how to do this myself in the last few
days.

Try running this and entering http://www.python.org at the prompt.

#!/usr/bin/env python
import formatter
import htmllib
import urllib

writer = formatter.NullWriter()
formatter = formatter.AbstractFormatter(writer)
parser = htmllib.HTMLParser(formatter)
def readfile(filepath):
    try:
        sourcefile = open(filepath, 'r')
        content = sourcefile.read()
        sourcefile.close()
        return content
    except:
        print "\nCould not open", filepath + "."

def connect(url):
    try:
            content = urllib.urlopen(url).read()
            print "Connecting.\n"
            return content
    except IOError:
            print "\nCould not connect to", url + "."

locator = raw_input("Enter URL or file path --> ")
start = locator[0:4]
start = start.lower()
if start == "http":
    file = connect(locator)
else:
    file = readfile(locator)

if file:
    parser.feed(file)
    parser.close()
    print parser.title, "\n"
    for link in parser.anchorlist:
        print link
    print "\nThe page has", len(parser.anchorlist), "links.\n"







More information about the Python-list mailing list