python cgi syntax highlighter

Rafal Smotrzyk rav at smo3k.z.pl
Tue Mar 6 06:39:11 EST 2001


# Rafal Smotrzyk
# 24/11/2000
# mailto:rsmotrzyk at mikroplan.com.pl

import tokenize
import token
import keyword
import string
import sys
import os

d_builtins = {
   '__debug__' : 1,
   '__doc__' : 1,
   '__import__' : 1,
   '__name__' : 1,
   'abs' : 1,
   'apply' : 1,
   'buffer' : 1,
   'callable' : 1,
   'chr' : 1,
   'cmp' : 1,
   'coerce' : 1,
   'compile' : 1,
   'complex' : 1,
   'delattr' : 1,
   'dir' : 1,
   'divmod' : 1,
   'eval' : 1,
   'execfile' : 1,
   'exit' : 1,
   'filter' : 1,
   'float' : 1,
   'getattr' : 1,
   'globals' : 1,
   'hasattr' : 1,
   'hash' : 1,
   'hex' : 1,
   'id' : 1,
   'input' : 1,
   'int' : 1,
   'intern' : 1,
   'isinstance' : 1,
   'issubclass' : 1,
   'len' : 1,
   'list' : 1,
   'locals' : 1,
   'long' : 1,
   'map' : 1,
   'max' : 1,
   'min' : 1,
   'oct' : 1,
   'open' : 1,
   'ord' : 1,
   'pow' : 1,
   'quit' : 1,
   'range' : 1,
   'raw_input' : 1,
   'reduce' : 1,
   'reload' : 1,
   'repr' : 1,
   'round' : 1,
   'setattr' : 1,
   'slice' : 1,
   'str' : 1,
   'tuple' : 1,
   'type' : 1,
   'unload' : 1,
   'vars' : 1,
   'xrange' : 1,
}

class PrettyPyPrinter:
   def __init__(self,aspage=1):
      self.AsPage=aspage
   def
TokenEater(self,atokentype,atokenstring,abeginrowcolumntuple,aendrowcolumntu
ple,aline):
      brow,bcol=abeginrowcolumntuple
      erow,ecol=aendrowcolumntuple
      self.token_list.append([brow,bcol,erow,ecol,atokentype,atokenstring])
   def Process(self,fnamein,fnameout):
      fin=open(fnamein,'r')
      try:
         self.Tokenize(fin)
      finally:
         fin.close()
      fout=open(fnameout,'w')
      try:
         self.Generate(fout)
      finally:
         fout.close()
   def InsertTag(self,arow,acol,s):

self.lines[arow-1]=self.lines[arow-1][:acol]+s+self.lines[arow-1][acol:]
   def Tokenize(self,fin):
      self.token_list=[]
      tokenize.tokenize(fin.readline,self.TokenEater)
      fin.seek(0)
      self.lines=fin.readlines()
      self.token_list.sort()
      self.token_list.reverse()
   def Generate(self,fout):
      for i in range(len(self.token_list)):
         brow,bcol,erow,ecol,atokentype,atokenstring=self.token_list[i]
         processed=' '
         if atokentype==token.OP:
            self.InsertTag(erow,ecol,'</font>')
            self.InsertTag(brow,bcol,'<font color="PURPLE">')
            processed='*'
         elif atokentype==token.NAME:
            if keyword.iskeyword(atokenstring):
               self.InsertTag(erow,ecol,'</b></font>')
               self.InsertTag(brow,bcol,'<font color="NAVY"><b>')
            elif d_builtins.has_key(atokenstring) and
self.token_list[i+1][5]!='.':
               self.InsertTag(erow,ecol,'</font>')
               self.InsertTag(brow,bcol,'<font color="NAVY">')
            elif i>0 and self.token_list[i+1][5] in ['def','class']:
               self.InsertTag(erow,ecol,'</B></font>')
               self.InsertTag(brow,bcol,'<font color="BLUE"><B>')
            elif i>0 and self.token_list[i-1][5]=='(':
               self.InsertTag(erow,ecol,'</font>')
               self.InsertTag(brow,bcol,'<font color="PURPLE">')
            elif atokenstring=='None':
               self.InsertTag(erow,ecol,'</font>')
               self.InsertTag(brow,bcol,'<font color="RED">')
            else:
               self.InsertTag(erow,ecol,'</font>')
               self.InsertTag(brow,bcol,'<font color="BLACK">')
            processed='*'
         elif atokentype==token.NUMBER:
            self.InsertTag(erow,ecol,'</font>')
            self.InsertTag(brow,bcol,'<font color="RED">')
            processed='*'
         elif atokentype==token.STRING:
            self.InsertTag(erow,ecol,'</font>')
            self.InsertTag(brow,bcol,'<font color="MAGENTA">')
            processed='*'
         elif atokentype==token.N_TOKENS:
            self.InsertTag(erow,ecol,'</I></font>')
            self.InsertTag(brow,bcol,'<font color="NAVY"
style="background=YELLOW;"><I>')
            processed='*'
         elif atokentype in
[token.NEWLINE,token.INDENT,token.DEDENT,token.ENDMARKER,40]: #40==NL
            processed='*'
         if processed==' ':
            print '%s %5d%5d%5d%5d | %d%20s
>%s<'%(processed,brow,bcol,erow,ecol,atokentype,token.tok_name[atokentype],r
epr(atokenstring))
         i=i+1
      if self.AsPage:
         fout.write('<html><body>\n')
      fout.write('<pre>')
      for aline in self.lines:
         s=string.replace(aline,'\n','<br>\n')
         s=string.replace(aline,chr(255),' ')
         fout.write(s)
      fout.write('</pre>')
      if self.AsPage:
         fout.write('\n</body></html>')

if __name__=='__main__':
   if len(sys.argv)==3:
      fname1=sys.argv[1]
      fname2=sys.argv[2]
   elif len(sys.argv)==2:
      fname1=sys.argv[1]
      fname2=os.path.splitext(fname1)[0]+'.html'
   else:
      print 'py2html input.py [output.html]'
      sys.exit()
   aprinter=PrettyPyPrinter()
   aprinter.Process(fname1,fname2)






More information about the Python-list mailing list