Recursive functions

Nachiket nachiket_gole at hotmail.com
Fri Oct 19 21:50:41 EDT 2001


I have this code to create a map of webpages.
Can I put it in a recursive function that calls itself a specific
number of times instead of writing multiple loops.
I tried to put it in a method called say handlelinks but when I called
it it gives me a attribute error.  the code is as follows
#!/usr/bin/env python^M
import sys
from os.path import walk
from htmllib import HTMLParser
from formatter import NullFormatter
import urllib

#---read in URL as first argument on command line
try:
  inf = urllib.urlopen(sys.argv[1])
except IndexError:
  inf = sys.stdin
#---extracts all the links from html pages

counter = 3
class myparser(HTMLParser):
  def __init__(self):
    HTMLParser.__init__(self,NullFormatter())
    self.linktext = {}
    self.lasturl = None
  def start_a(self,attr):
    for key,val in attr:
     if key == 'href': self.lasturl = val
       #self.linktext.append(val)
    HTMLParser.save_bgn(self)
  def end_a(self):
    self.linktext[self.lasturl] = HTMLParser.save_end(self)

p = myparser()
p.feed(inf.read())
depth = 2
#-----------------------------------------------------------
print inf
for link in p.linktext.keys():
  if(link[:8] == 'https://'):
     print 'Secure Website found...cannot open'
  elif(link[:7] == 'http://'):
    print link[:20],'-->',(p.linktext[link[:25]])
    temp = urllib.urlopen(link)
    depth-1
    if depth == 0:    
      print 'exit'
    p.feed(temp.read())    
    for link in p.linktext.keys():
      if(link[:8] == 'https://'):
        print 'Secure Website found...cannot open'
      elif(link[:7] == 'http://'):
       print '','|',link[:25],'-->',p.linktext[link]
       temp1 = urllib.urlopen(link)
       depth-1
       if depth == 0: 
         print 'exit'
       p.feed(temp1.read())
   else:return 'unknown format'
       for link in p.linktext.keys():
        if(link[:8] == 'https://'):
         print 'Secure Website found...cannot open'
        elif(link[:7] == 'http://'):
         print ' ','|','+',link[:25],'-->',p.linktext[link]
         temp1 = urllib.urlopen(link)
         depth-1
         if depth == 0: 
          print 'exit'
         p.feed(temp1.read())        
         for link in p.linktext.keys():
          if(link[:8] == 'https://'):
           print 'Secure Website found...cannot open'
          elif(link[:7] == 'http://'):
           print '   ','|','-',link[:25],'-->',p.linktext[link]
           temp1 = urllib.urlopen(link)
           depth-1
           if depth == 0: 
            print 'exit'
           p.feed(temp1.read())
     elif(link[:8] == 'https://'):
       print 'Secure Website'
  elif(link[:7] == 'https:/'):
    print 'Secure Website found...cannot open'
  else:return 'unknow



More information about the Python-list mailing list