get links?
Julius Welby
jwelby at waitrose.com
Fri May 4 18:16:04 EDT 2001
This might help. I've been learning how to do this myself in the last few
days.
Try running this and entering http://www.python.org at the prompt.
#!/usr/bin/env python
import formatter
import htmllib
import urllib
writer = formatter.NullWriter()
formatter = formatter.AbstractFormatter(writer)
parser = htmllib.HTMLParser(formatter)
def readfile(filepath):
try:
sourcefile = open(filepath, 'r')
content = sourcefile.read()
sourcefile.close()
return content
except:
print "\nCould not open", filepath + "."
def connect(url):
try:
content = urllib.urlopen(url).read()
print "Connecting.\n"
return content
except IOError:
print "\nCould not connect to", url + "."
locator = raw_input("Enter URL or file path --> ")
start = locator[0:4]
start = start.lower()
if start == "http":
file = connect(locator)
else:
file = readfile(locator)
if file:
parser.feed(file)
parser.close()
print parser.title, "\n"
for link in parser.anchorlist:
print link
print "\nThe page has", len(parser.anchorlist), "links.\n"
More information about the Python-list
mailing list