[Tutor] create 1000 000 variables
Сергій
kyxaxa at gmail.com
Sat Jul 15 22:11:39 CEST 2006
>
> But again, like others have suggested, you should rethink your problem and
> your solution before starting down your path. What are you really
> capturing?
>
Rethink problem...
I try to use sgmllib - get all info tagged in "h1"... "h6"
I've created file lister.py:
"from sgmllib import SGMLParser
class Lister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.h1 = []
self.h2 = []
self.h3 = []
self.h4 = []
self.h5 = []
self.h6 = []
self.in_h1 = False
self.in_h2 = False
self.in_h3 = False
self.in_h4 = False
self.in_h5 = False
self.in_h6 = False
def handle_data(self, text):
if self.in_h1 == True:
self.h1.append(text)
elif self.in_h2 == True:
self.h2.append(text)
elif self.in_h3 == True:
self.h3.append(text)
elif self.in_h4 == True:
self.h4.append(text)
elif self.in_h5 == True:
self.h5.append(text)
elif self.in_h6 == True:
self.h6.append(text)
#AND NOW "BAD CODE1":
def start_h1(self, attrs):
self.in_h1 = True
def end_h1(self):
self.in_h1 = False
def start_h2(self, attrs):
self.in_h2 = True
def end_h2(self):
self.in_h2 = False
def start_h3(self, attrs):
self.in_h3 = True
def end_h3(self):
self.in_h3 = False
def start_h4(self, attrs):
self.in_h4 = True
def end_h4(self):
self.in_h4 = False
def start_h5(self, attrs):
self.in_h5 = True
def end_h5(self):
self.in_h5 = False
def start_h6(self, attrs):
self.in_h6 = True
def end_h6(self):
self.in_h6 = False
"
And now I want to print all text in this tags.
file use_lister.py:
"
import urllib, lister
f = open('_1.html', 'r')
text = f.read()
f.close()
parser = urllister.Lister()
parser.feed(text)
parser.close()
#AND NOW "BAD CODE2":
Show_step('h1')
for i in parser.h1:
print i
Show_step('h2')
for i in parser.h2:
print i
Show_step('h3')
for i in parser.h3:
print i
Show_step('h4')
for i in parser.h4:
print i
Show_step('h5')
for i in parser.h5:
print i
Show_step('h6')
for i in parser.h6:
print i
"
And I don't like this "BAD CODE1" and "BAD CODE2"
How to rewrite bad codes???
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/tutor/attachments/20060715/8347edca/attachment-0001.htm
More information about the Tutor
mailing list