HTML to LaTeX RE pattern and sub

cepl@surfbest.net ceplma at gmail.com
Fri Sep 23 16:37:09 EDT 2005


Hi,

trying to make a small script which would translate wiki file into
LaTeX and when trying to translate possible HTML elements into LaTeX I
did this:

def latexEnvironments(matchobj):
    print >>sys.stderr,str(matchobj.groups())
    inStr = matchobj.group(1)
    translDict = {'dl': ('\\begin{description}','\\end{description}'),
        'ol': ('\\begin{enumerate}','\\end{enumerate}'),
        'ul': ('\\begin{itemize}','\\end{itemize}'),
        'blockquote': ('\\begin{quote}','\\end{quote}'),
        'center': ('\\begin{center}','\\end{center}'),
        'li': ('\\item',''),
        'pre': ('\\begin{ttfamily}','\\end{ttfamily}')}
    if translDict.has_key(inStr):
        retTuple = translDict[inStr]
    else:
        retTuple = ('','')
    if matchobj.group(0) == '/':
        return retTuple[1]
    else:
        return retTuple[0]

def latexHeadings(matchobj):
    print >>sys.stderr,str(matchobj.groups())
    inStr = matchobj.groups[1]
    translDict = {'h1': '\\section{}',
        'h2': '\\subsection{}',
        'h3': '\\subsubsection{}',
        'h4': '\\subsubsubsection{}',
        'h5': '\\paragraph{}'}
    if translDict.has_key(inStr) and (matchobj.groups[0] != '/'):
        return translDict[inStr]
    else:
        return ''

def latexEmptyElements(matchobj):
    retString = ""
    print >>sys.stderr,str(matchobj.groups())
    inStr = matchobj.group(0)
    translDict = {'br//': '\\\\',
        'hr': '\\par{}\\hrulefill{}\\par{}'}
    if translDict.has_key(inStr):
        return translDict[inStr]
    else:
        return ''



[... snip ...]

# Remove superfluous HTML elements
reEnvironments =
re.compile(r'<(/?)(dl|ol|ul|address|blockquote|center|del\
|ins|div|isindex|noscript|p|pre)>',re.IGNORE)
reHeadings = re.compile(r'<(/?)(h1|h2|h3|h4|h5|h6)>',re.IGNORE)
reEmpty = re.compile(r'<(hr|br)\s*/?>',re.IGNORE)
body = reEnvironments.sub(latexEnvironments,body)
body = reHeadings.sub(latexHeadings,body)
body = reEmpty.sub(latexEmptyElements,body)

The problem is that apparently RE never matches (and no function is
thus called). Can anybody tell me what's wrong with my REs, please?

Thanks for any help,

Matej




More information about the Python-list mailing list