Python Documentation generation
Matthias Baas
baas at ira.uka.de
Thu Jun 27 17:05:40 EDT 2002
On Thu, 20 Jun 2002 14:49:14 -0400, "Mahrt, Dallas"
<dallasm at aiinet.com> wrote:
>I am looking for an application which can generate HTML documentation from
>Python modules.
>(Note: some examples based on JavaDoc syntax)
You can use doxygen (http://www.doxygen.org) to create such a
documentation. Basically, it's a tool like JavaDoc but for the C/C++
language. It has no built-in support for Python but it allows to pass
files through filters before they're actually processed.
I've written a filter that converts Python code into something that
looks like a C++ header files (see below). That way doxygen can also
be used to document Python code.
Every comment starting with ## and every docstring is literally
translated into a doxygen comment block.
>I have a few desires from such a program:
> - Ability to exclude elements based on naming conventions or
>keywords
> (ex. Exclude names starting with a single underscore '_foo'
>or containing the keyword @private)
My filter understands the special comments:
## public:
## protected:
## private:
which will insert the corresponding keyword in the C++ code. Thus, you
can mark methods as protected or private and tell doxygen to exclude
them from the documentation. (However, this won't work for functions).
> - Ability to have inter module linking. (If module A uses module B,
>then there should be an '@see B' like link [JavaDoc-ish])
You can add such commands into the doxygen comments (just like
JavaDoc). Here you should know that my filter puts everything into a
namespace with the same name as the file name. This means, if you
manually link to other parts of the documentation you have to include
the namespace.
> - The program should also allow for either:
> - generating module documentation from C extension modules
>(Should be tricky)
Well, that's what doxygen was actually written for. :)
> - allow for other documentation sections to be linked
>automatically by registration of module name
Uhm, I'm not sure if I know what you mean here. Maybe it's this: If
you mention a class name in your documentation this class name will
automatically appear as a link to the documentation of the class
(however, the "namespace feature" often prevents the automatic linking
from working properly).
- Matthias -
---------------------------------------------------------------------------------------------------
Here's the filter (pythfilter.py), I hope the word wrapping didn't
mangle the script....
#! /usr/bin/python
# Doxygen filter which can be used to document Python source code.
# Classes (incl. methods) and functions can be documented.
# Every comment that begins with ## is literally turned into an
# Doxygen comment. Consecutive comment lines are turned into
# comment blocks (-> /** ... */).
# All the stuff is put inside a namespace with the same name as
# the source file.
# Conversions:
# ============
# ##-blocks -> /** ... */
# "class name(base): ..." -> "class name : public base {...}"
# "def name(params): ..." -> "name(params) {...}"
# Written by Matthias Baas (baas at ira.uka.de)
# Changelog:
# 18.06.2002: Es gibt jetzt auch ein ## public:
# 21.01.2002: from ... import wird umgesetzt in "using namespace ...;"
# TODO: "from ... import *" vs "from ... import names"
# TODO: Bei normalem Import: name.name -> name::name
# 20.01.2002: #includes werden VOR den Namespace gesetzt
######################################################################
# The program is written as a state machine with the following states:
#
# - OUTSIDE The current position is outside any comment,
# class definition or function.
#
# - BUILD_COMMENT Begins with first "##".
# Ends with the first token that is no "##"
# at the same column as before.
#
# - BUILD_CLASS_DECL Begins with "class".
# Ends with ":"
# - BUILD_CLASS_BODY Begins just after BUILD_CLASS_DECL.
# The first following token (which is no
comment)
# determines indentation depth.
# Ends with a token that has a smaller
indendation.
#
# - BUILD_DEF_DECL Begins with "def".
# Ends with ":".
# - BUILD_DEF_BODY Begis just after BUILD_DEF_DECL.
# The first following token (which is no
comment)
# determines indentation depth.
# Ends with a token that has a smaller
indendation.
import sys,os.path,string,token,tokenize
OUTSIDE = 0
BUILD_COMMENT = 1
BUILD_CLASS_DECL = 2
BUILD_CLASS_BODY = 3
BUILD_DEF_DECL = 4
BUILD_DEF_BODY = 5
IMPORT = 6
IMPORT_OP = 7
IMPORT_APPEND = 8
# Output buffer
outbuffer = []
out_row = 0
out_col = 0
# Variables used by rec_name_n_param()
name = ""
param = ""
doc_string = ""
record_state = 0
# Tuple: (row,column)
class_spos = (0,0)
def_spos = (0,0)
import_spos = (0,0)
# Which import was used? ("import" or "from")
import_token = ""
# Comment block buffer
comment_block = []
comment_finished = 0
# Imported modules
modules = []
# Program state
stateStack = [OUTSIDE]
######################################################################
# Output string s. '\n' may only be at the end of the string (not
# somewhere in the middle).
#
# In: s - String
# spos - Startpos
######################################################################
def output(s,spos, immediate=0):
global outbuffer, out_row, out_col
os = string.rjust(s,spos[1]-out_col+len(s))
if immediate:
sys.stdout.write(os)
else:
outbuffer.append(os)
if (s[-1:]=="\n"):
out_row = out_row+1
out_col = 0
else:
out_col = spos[1]+len(s)
######################################################################
# Records a name and parameters. The name is either a class name or
# a function name. Then the parameter is either the base class or
# the function parameters.
# The name is stored in the global variable "name", the parameters
# in "param".
# The variable "record_state" holds the current state of this internal
# state machine.
# The recording is started by calling start_recording().
#
# In: type, tok
######################################################################
def rec_name_n_param(type, tok):
global record_state,name,param,doc_string
s = record_state
# State 0: Do nothing.
if (s==0):
return
# State 1: Remember name.
elif (s==1):
name = tok
record_state = 2
# State 2: Wait for opening bracket or colon
elif (s==2):
if (tok=='('): record_state=3
if (tok==':'): record_state=4
# State 3: Store parameter (or base class) and wait for an ending
bracket
elif (s==3):
if (tok==')'):
record_state=4
else:
param=param+tok
# State 4: Look for doc string
elif (s==4):
if (type==token.NEWLINE or type==token.INDENT or
type==token.SLASHEQUAL):
return
elif (tok==":"):
return
elif (type==token.STRING):
while tok[:1]=='"':
tok=tok[1:]
while tok[-1:]=='"':
tok=tok[:-1]
doc_string=tok
record_state=0
######################################################################
# Starts the recording of a name & param part.
# The function rec_name_n_param() has to be fed with tokens. After
# the necessary tokens are fed the name and parameters can be found
# in the global variables "name" und "param".
######################################################################
def start_recording():
global record_state,param,name, doc_string
record_state=1
name=""
param=""
doc_string=""
######################################################################
# Test if recording is finished
######################################################################
def is_recording_finished():
global record_state
return record_state==0
######################################################################
## Gather comment block
######################################################################
def gather_comment(type,tok,spos):
global comment_block,comment_finished
if (type!=tokenize.COMMENT):
comment_finished = 1
else:
# Output old comment block if a new one is started.
if (comment_finished):
print_comment(spos)
comment_finished=0
if (tok[0:2]=="##" and tok[0:3]!="###"):
comment_block.append(tok[2:])
######################################################################
## Output comment block and empty buffer.
######################################################################
def print_comment(spos):
global comment_block,comment_finished
if (comment_block!=[]):
output("/**\n",spos)
for c in comment_block:
output(c,spos)
output("*/\n",spos)
comment_block = []
comment_finished = 0
######################################################################
def set_state(s):
global stateStack
stateStack[len(stateStack)-1]=s
######################################################################
def get_state():
global stateStack
return stateStack[len(stateStack)-1]
######################################################################
def push_state(s):
global stateStack
stateStack.append(s)
######################################################################
def pop_state():
global stateStack
stateStack.pop()
######################################################################
def tok_eater(type, tok, spos, epos, line):
global stateStack,name,param,class_spos,def_spos,import_spos
global doc_string, modules, import_token
rec_name_n_param(type,tok)
if (string.replace(string.strip(tok)," ","")=="##private:"):
output("private:\n",spos)
elif (string.replace(string.strip(tok)," ","")=="##protected:"):
output("protected:\n",spos)
elif (string.replace(string.strip(tok)," ","")=="##public:"):
output("public:\n",spos)
else:
gather_comment(type,tok,spos)
state = get_state()
# OUTSIDE
if (state==OUTSIDE):
if (tok=="class"):
start_recording()
class_spos = spos
push_state(BUILD_CLASS_DECL)
elif (tok=="def"):
start_recording()
def_spos = spos
push_state(BUILD_DEF_DECL)
elif (tok=="import") or (tok=="from"):
import_token = tok
import_spos = spos
modules = []
push_state(IMPORT)
# IMPORT
elif (state==IMPORT):
if (type==token.NAME):
modules.append(tok)
set_state(IMPORT_OP)
# IMPORT_OP
elif (state==IMPORT_OP):
if (tok=="."):
set_state(IMPORT_APPEND)
elif (tok==","):
set_state(IMPORT)
else:
for m in modules:
output('#include "'+m+'.py"\n', import_spos, immediate=1)
if import_token=="from":
output('using namespace '+m+';\n', import_spos)
pop_state()
# IMPORT_APPEND
elif (state==IMPORT_APPEND):
if (type==token.NAME):
modules[len(modules)-1]+="."+tok
set_state(IMPORT_OP)
# BUILD_CLASS_DECL
elif (state==BUILD_CLASS_DECL):
if (is_recording_finished()):
s = "class "+name
if (param!=""): s = s+" : public "+param
if (doc_string!=""): comment_block.append(doc_string)
print_comment(class_spos)
output(s+"\n",class_spos)
output("{\n",(class_spos[0]+1,class_spos[1]))
output(" public:\n",(class_spos[0]+2,class_spos[1]))
set_state(BUILD_CLASS_BODY)
# BUILD_CLASS_BODY
elif (state==BUILD_CLASS_BODY):
if (type!=token.INDENT and type!=token.NEWLINE and type!=40 and
type!=tokenize.NL\
and type!=tokenize.COMMENT \
and (spos[1]<=class_spos[1])):
output("}; // end of class\n",(out_row+1,class_spos[1]))
pop_state()
elif (tok=="def"):
start_recording()
def_spos = spos
push_state(BUILD_DEF_DECL)
# BUILD_DEF_DECL
elif (state==BUILD_DEF_DECL):
if (is_recording_finished()):
s = name+"("+param+");\n"
if (doc_string!=""): comment_block.append(doc_string)
print_comment(def_spos)
output(s,def_spos)
# output("{\n",(def_spos[0]+1,def_spos[1]))
set_state(BUILD_DEF_BODY)
# BUILD_DEF_BODY
elif (state==BUILD_DEF_BODY):
if (type!=token.INDENT and type!=token.NEWLINE \
and type!=40 and type!=tokenize.NL \
and (spos[1]<=def_spos[1])):
# output("} // end of
method/function\n",(out_row+1,def_spos[1]))
pop_state()
# else:
# output(tok,spos)
def dump(filename):
f = open(filename)
r = f.readlines()
for s in r:
sys.stdout.write(s)
def filter(filename):
path,name = os.path.split(filename)
root,ext = os.path.splitext(name)
output("namespace "+root+" {\n",(0,0))
sys.stderr.write('Filtering "'+filename+'"...')
f = open(filename)
tokenize.tokenize(f.readline, tok_eater)
f.close()
print_comment((0,0))
output("\n",(0,0))
output("} // end of namespace\n",(0,0))
for s in outbuffer:
sys.stdout.write(s)
######################################################################
######################################################################
######################################################################
try:
filename = string.join(sys.argv[1:])
root,ext = os.path.splitext(filename)
if ext==".py":
filter(filename)
else:
dump(filename)
sys.stderr.write("OK\n")
except IOError,e:
sys.stderr.write(e[1]+"\n")
More information about the Python-list
mailing list