make a simple search function for homepage

HYRY zhangry at feng.co.jp
Tue Oct 31 01:11:25 EST 2006


I want to add some simple search function for my homepage. It need to
search through all the html files of my homepage (about 300 pages), and
highlight the search words.

I made some test with HTMLParser, it works but slow. So, my question is
how can I improve its speed?

from HTMLParser import HTMLParser

class HightLightParser(HTMLParser):
	def __init__(self, outfile, words):
		self.outfile = outfile
		self.words = words
		self.found = False
		HTMLParser.__init__(self)

	def handle_starttag(self, tag, attrs):
		self.outfile.write( self.get_starttag_text( ) )

	def handle_endtag(self, tag):
		self.outfile.write( "</%s>" % tag )

	def handle_data(self, data):
		for word in self.words:
			data = data.replace(word, "<font color=red>%s</font>" % word)
#highlight
		self.outfile.write(data)

class SearchParser(HTMLParser):
	def __init__(self, words):
		self.words = words
		self.found = False
		HTMLParser.__init__(self)

	def handle_data(self, data):
		for word in self.words:
			if word in data:      # search
				self.found = True


words = ["the"]
x = SearchParser(words)
data = file("input.htm").read()
x.feed(data)
if x.found:
	y = HightLightParser(file("output.htm", "w"),words)
	y.feed(data)




More information about the Python-list mailing list