HTMLParser and write

Stephen Ferg steve at ferg.org
Fri Mar 5 15:04:55 EST 2004


You're right.  The example is REALLY feeble.  Maybe this will help:

"""HTMLParserDemoProgram
Use HTMLParser to read in an HTML file and write it out again.
This will put all tag and attribute names into lowercase.
"""

"""
REVISION HISTORY
2 2004-01-05 added handle_pi and improved attribute processing
"""

from HTMLParser import HTMLParser

class CustomizedParser(HTMLParser):

	def setOutfileName(self, argOutfileName):
		"""Remember the output file, so it is easy to write to it.
		"""
		self.OutfileName = argOutfileName
		self.Outfile     = open(self.OutfileName, "w")

	def closeOutfile(self):
		self.Outfile.close()

	def write(self, argString):
		self.Outfile.write(argString)

	def handle_starttag(self, argTag, argAttrs):
		""" argAttrs is a list of tuples.
		Each tuple is a pair of (attribute_name, attribute_value)
		"""
		attributes = "".join([' %s="%s"' % (key, value) for key, value in argAttrs])
		self.Outfile.write("<%s%s>" % (argTag, attributes))

	def handle_startendtag(self, argTag, argAttrs):
		""" argAttrs is a list of tuples.
		Each tuple is a pair of (attribute_name, attribute_value)
		"""
		attributes = "".join([' %s="%s"' % (key, value) for key, value in argAttrs])
		self.Outfile.write("<%s%s/>" % (argTag, attributes))


	def handle_endtag(self, argTag):
		self.write("</%s>" % argTag)

	def handle_data(self, argString):
		self.write(argString)

	def handle_charref(self, argString):
		self.write("&#%s;" % argString)

	def handle_entityref(self, argString):
		self.write("&%s;" % argString)

	def handle_comment(self, argString):
		self.write("<!--%s-->" % argString)

	def handle_decl(self, argString):
		self.write("<!%s>" % argString)
		
	def handle_pi(self, argString):
		# handle a processing instruction
		self.write("<?%s>" % argString)
		
def main(myInfileName, myOutfileName ):
	myInfile = open(myInfileName, "r")
	myParser = CustomizedParser()
	myParser.setOutfileName(myOutfileName)

	myParser.feed(myInfile.read())

	myInfile.close()
	myParser.closeOutfile()


def dq(s):
	"""Enclose a string argument in double quotes"""
	return '"'+ s + '"'

if __name__ == "__main__":
	print "Starting HTMLParserDemoProgram"
	main("c:\junk\slide01.html",  "c:\junk\slide01a.html")
	print "Ending   HTMLParserDemoProgram"



More information about the Python-list mailing list