Recursive functions
Nachiket
nachiket_gole at hotmail.com
Fri Oct 19 21:50:41 EDT 2001
I have this code to create a map of webpages.
Can I put it in a recursive function that calls itself a specific
number of times instead of writing multiple loops.
I tried to put it in a method called say handlelinks but when I called
it it gives me a attribute error. the code is as follows
#!/usr/bin/env python^M
import sys
from os.path import walk
from htmllib import HTMLParser
from formatter import NullFormatter
import urllib
#---read in URL as first argument on command line
try:
inf = urllib.urlopen(sys.argv[1])
except IndexError:
inf = sys.stdin
#---extracts all the links from html pages
counter = 3
class myparser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self,NullFormatter())
self.linktext = {}
self.lasturl = None
def start_a(self,attr):
for key,val in attr:
if key == 'href': self.lasturl = val
#self.linktext.append(val)
HTMLParser.save_bgn(self)
def end_a(self):
self.linktext[self.lasturl] = HTMLParser.save_end(self)
p = myparser()
p.feed(inf.read())
depth = 2
#-----------------------------------------------------------
print inf
for link in p.linktext.keys():
if(link[:8] == 'https://'):
print 'Secure Website found...cannot open'
elif(link[:7] == 'http://'):
print link[:20],'-->',(p.linktext[link[:25]])
temp = urllib.urlopen(link)
depth-1
if depth == 0:
print 'exit'
p.feed(temp.read())
for link in p.linktext.keys():
if(link[:8] == 'https://'):
print 'Secure Website found...cannot open'
elif(link[:7] == 'http://'):
print '','|',link[:25],'-->',p.linktext[link]
temp1 = urllib.urlopen(link)
depth-1
if depth == 0:
print 'exit'
p.feed(temp1.read())
else:return 'unknown format'
for link in p.linktext.keys():
if(link[:8] == 'https://'):
print 'Secure Website found...cannot open'
elif(link[:7] == 'http://'):
print ' ','|','+',link[:25],'-->',p.linktext[link]
temp1 = urllib.urlopen(link)
depth-1
if depth == 0:
print 'exit'
p.feed(temp1.read())
for link in p.linktext.keys():
if(link[:8] == 'https://'):
print 'Secure Website found...cannot open'
elif(link[:7] == 'http://'):
print ' ','|','-',link[:25],'-->',p.linktext[link]
temp1 = urllib.urlopen(link)
depth-1
if depth == 0:
print 'exit'
p.feed(temp1.read())
elif(link[:8] == 'https://'):
print 'Secure Website'
elif(link[:7] == 'https:/'):
print 'Secure Website found...cannot open'
else:return 'unknow
More information about the Python-list
mailing list