HTML to LaTeX RE pattern and sub
cepl@surfbest.net
ceplma at gmail.com
Fri Sep 23 16:37:09 EDT 2005
Hi,
trying to make a small script which would translate wiki file into
LaTeX and when trying to translate possible HTML elements into LaTeX I
did this:
def latexEnvironments(matchobj):
print >>sys.stderr,str(matchobj.groups())
inStr = matchobj.group(1)
translDict = {'dl': ('\\begin{description}','\\end{description}'),
'ol': ('\\begin{enumerate}','\\end{enumerate}'),
'ul': ('\\begin{itemize}','\\end{itemize}'),
'blockquote': ('\\begin{quote}','\\end{quote}'),
'center': ('\\begin{center}','\\end{center}'),
'li': ('\\item',''),
'pre': ('\\begin{ttfamily}','\\end{ttfamily}')}
if translDict.has_key(inStr):
retTuple = translDict[inStr]
else:
retTuple = ('','')
if matchobj.group(0) == '/':
return retTuple[1]
else:
return retTuple[0]
def latexHeadings(matchobj):
print >>sys.stderr,str(matchobj.groups())
inStr = matchobj.groups[1]
translDict = {'h1': '\\section{}',
'h2': '\\subsection{}',
'h3': '\\subsubsection{}',
'h4': '\\subsubsubsection{}',
'h5': '\\paragraph{}'}
if translDict.has_key(inStr) and (matchobj.groups[0] != '/'):
return translDict[inStr]
else:
return ''
def latexEmptyElements(matchobj):
retString = ""
print >>sys.stderr,str(matchobj.groups())
inStr = matchobj.group(0)
translDict = {'br//': '\\\\',
'hr': '\\par{}\\hrulefill{}\\par{}'}
if translDict.has_key(inStr):
return translDict[inStr]
else:
return ''
[... snip ...]
# Remove superfluous HTML elements
reEnvironments =
re.compile(r'<(/?)(dl|ol|ul|address|blockquote|center|del\
|ins|div|isindex|noscript|p|pre)>',re.IGNORE)
reHeadings = re.compile(r'<(/?)(h1|h2|h3|h4|h5|h6)>',re.IGNORE)
reEmpty = re.compile(r'<(hr|br)\s*/?>',re.IGNORE)
body = reEnvironments.sub(latexEnvironments,body)
body = reHeadings.sub(latexHeadings,body)
body = reEmpty.sub(latexEmptyElements,body)
The problem is that apparently RE never matches (and no function is
thus called). Can anybody tell me what's wrong with my REs, please?
Thanks for any help,
Matej
More information about the Python-list
mailing list