Htmlizing text
Bjorn Pettersen
bjorn at roguewave.com
Mon Nov 29 16:49:24 EST 1999
> Is there a function in the standard Python library to HTML-ize text,
> i.e. to replace 'a > b & c < d' with 'a > b & c < d'?
This is the one I use (look for text2html below). It does a little bit more
than what you asked for, like turning URLs into clickable links, changing
*word* to italicized etc.
(and yes, it's extracted almost verbatim from the faqwizard :-)
-- bjorn
import string
import re
def translate(text, pre=0):
translate_prog = prog =
re.compile(r'\b(http|ftp|https)://\S+(\b|/)|\b[-.\w]+@[-.\w]+')
i = 0
list = []
while 1:
m = prog.search(text, i)
if not m:
break
j = m.start()
list.append(escape(text[i:j]))
i = j
url = m.group(0)
while url[-1] in '();:,.?\'"<>':
url = url[:-1]
i = i + len(url)
url = escape(url)
if not pre:
if ':' in url:
repl = '<A HREF="%s">%s</A>' % (url, url)
else:
repl = '<A HREF="mailto:%s"><%s></A>' % (url, url)
else:
repl = url
list.append(repl)
j = len(text)
list.append(escape(text[i:j]))
return string.join(list, '')
def escape(s):
s = string.replace(s, '&', '&')
s = string.replace(s, '<', '<')
s = string.replace(s, '>', '>')
return s
def escapeq(s):
s = escape(s)
s = string.replace(s, '"', '"')
return s
def emphasize(line):
return re.sub(r'\*([a-zA-Z]+)\*', r'<I>\1</I>', line)
def text2html(body):
res = []
pre = 0
raw = 0
for line in string.split(body, '\n'):
tag = string.lower(string.rstrip(line))
if tag == '<html>':
raw = 1
continue
if tag == '</html>':
raw = 0
continue
if raw:
res.append(line)
continue
if not string.strip(line):
if pre:
res.append('</PRE>')
pre = 0
else:
res.append('<P>')
else:
if line[0] not in string.whitespace:
if pre:
res.append('</PRE>')
pre = 0
else:
if not pre:
res.append('<PRE>')
pre = 1
if '/' in line or '@' in line:
line = translate(line, pre)
elif '<' in line or '&' in line:
line = escape(line)
if not pre and '*' in line:
line = emphasize(line)
res.append(line)
if pre:
res.append('</PRE>')
pre = 0
return string.join(res)
More information about the Python-list
mailing list