Python/Perl Popularity (Re: A Mountain of Perl...)
Janko Hauser
jhauser at ifm.uni-kiel.de
Wed Apr 12 01:21:59 EDT 2000
Use docindex.py!
This module is definetly no substitute for perldoc but a start that
answers your question. It gets the relevant information from the html
tree which needs to be installed on your system.
A benefit over docstrings is, that one does not need to import a
module. This is a prototype, but shows that it is possible. Not that
this will change your mind about Python :-)
HTH,
__Janko
#!/usr/bin/env python
#
# docindex -- get HTML documentation for an index of keywords.
#
__version__ = '0.1.0'
__author__ = 'Janko Hauser <jhauser at starship.python.net>'
import re, htmllib, formatter
import os, string, cPickle
import glob
# A simple help system which builds on top of docreader.py from
# Jim Eggleston <jimegg at gol.com>. These classes are included here to
# have only one file. The interface is changed a little bit and the
# indices are generated once and can then be stored in a pickle.
#
#
## START docreader.py -- read Python HTML docs from the command line.
## docreader builds a dictionary of terms from the index of the Python Library
## HTML documentation. The user is prompted to enter a term. If there is only
## one entry for the term, the section of the relevant document node is displayed.
## If there is more than one entry, a numbered list of index entries for that term
## is displayed. Individual entries can be read by entering the list item number.
## docreader will exit if a null string is entered as a search term.
## Setup:
## The global variables docdir, contents_doc and pager must be set according to
## your particular configuration.
## Changes:
## 14-Mar-1998 Created
## 15-Mar-1998 Added MyWriter class taken from Tony Ibbs 'html2text.py'
## Jim Eggleston <jimegg at gol.com>
#
# Change these according to your configuration
# (This is now done in the shell class
#
# directory where the 'lib' HTML doc files are kept
# docdir = '/home/hauser/public_html/doc/python/lib'
# the index document
# index_doc = 'genindex.html' # 'node268.html'
# An external pager program (e.g. 'more', 'less') which accepts stdin
# (Set to None if no pager available)
pager = 'less'
#
# Sub-class HTMLParser to add minimal support for tables, and also for
# (future) support of links.
#
class MyHTMLParser(htmllib.HTMLParser):
def __init__(self, formatter, verbose=0):
htmllib.HTMLParser.__init__(self, formatter, verbose)
def anchor_bgn(self, href, name, type):
self.anchor = href
self.anchorname = name
if self.anchor:
self.anchorlist.append(href, name)
def anchor_end(self):
if self.anchor:
self.handle_data("[%d]" % len(self.anchorlist))
# (Look at handling anchor names later)
# elif self.anchorname:
# self.handle_data("[#%s]" % self.anchorname)
# Send a line break before each table row
def do_tr(self, attr):
self.formatter.add_line_break()
#
# Based on text2html.py, by Tony Ibbs (Tibbs).
#
DEBUG = 0
class MyWriter(formatter.NullWriter):
def __init__(self, file=None, maxcol=72):
self.file = file or sys.stdout
self.maxcol = maxcol
self.margin = 0 # margin
self.extra_margin = 0 # extra margin in a list element
self.indent = 3 # increment for margin
self.extra_indent = 2 # indent for extra margin
self.level = {} # dictionary of levels
formatter.NullWriter.__init__(self)
self.reset()
def reset(self):
self.col = 0 # current column
self.atbreak = 0 # is this a good place to linebreak?
def new_alignment(self, align):
if DEBUG: print "new_alignment(%s)" % `align`
def new_font(self, font):
if DEBUG: print "new_font(%s)" % `font`
def new_margin(self, margin, level):
if DEBUG: print "new_margin(%s, %d)" % (`margin`, level)
if margin == None:
self.margin = 0
elif self.level.has_key(margin):
if level == 1:
self.level[margin] = level
self.margin = self.margin + self.indent
elif level == 2:
self.level[margin] = level
self.margin = self.margin - self.indent
if self.margin < 0: self.margin = 0
else:
self.level[margin] = level
self.margin = self.margin + self.indent
if DEBUG: print " margin =",self.margin
self.extra_margin = 0
return
if margin == None:
self.margin = 0
elif level == 1:
self.margin = self.margin + self.indent
elif level == 2:
self.margin = self.margin - self.indent
if self.margin < 0: self.margin = 0
else:
print "***Margin %s level %d (not 1 or 2)"%(margin,level)
self.margin = 0
if DEBUG: print " margin =",self.margin
self.extra_margin = 0
#self.reset() #???
#self.start_margin()
def new_spacing(self, spacing):
if DEBUG: print "new_spacing(%s)" % `spacing`
def new_styles(self, styles):
if DEBUG: print "new_styles(%s)" % `styles`
def send_paragraph(self, blankline):
if DEBUG: print "send_paragraph(%s)" % `blankline`
if blankline > 1:
self.file.write('\n'*(blankline-1))
self.reset()
self.file.write("\n")
##self.start_line()
def send_line_break(self):
if DEBUG: print "send_line_break()"
#self.file.write('\n')
self.reset()
self.file.write("\n")
##self.start_line()
def send_hor_rule(self, *args, **kw):
if DEBUG: print "send_hor_rule(%s,%s)"%(args,kw)
self.file.write('\n')
self.file.write('-'*self.maxcol)
self.file.write('\n')
self.reset()
def start_margin(self):
if DEBUG: print " start_margin()"
self.file.write(" "*self.margin + " "*self.extra_margin)
self.col = self.col + self.margin
def start_line(self):
if DEBUG: print " start_line()"
self.file.write("\n")
self.start_margin()
#self.file.write("\n" + " "*self.margin + " "*self.extra_margin)
#self.col = self.col + self.margin
def send_label_data(self, data):
if DEBUG: print "send_label_data(%s)" % `data`
self.extra_margin = 0
self.start_line()
self.file.write(data+" ")
self.extra_margin = self.extra_indent
def send_flowing_data(self, data):
if DEBUG: print "send_flowing_data(%s)" % `data`
if not data: return
atbreak = self.atbreak or data[0] in string.whitespace
col = self.col
if col == 0:
self.start_margin()
maxcol = self.maxcol
write = self.file.write
#self.start_line()
for word in string.split(data):
if atbreak:
if col + len(word) >= maxcol:
#write('\n')
col = 0
self.start_line()
else:
write(' ')
col = col + 1
write(word)
if DEBUG: print " ",word
col = col + len(word)
atbreak = 1
self.col = col
self.atbreak = data[-1] in string.whitespace
def send_literal_data(self, data):
if DEBUG: print "send_literal_data(%s)" % `data`
self.file.write(data)
i = string.rfind(data, '\n')
if i >= 0:
self.col = 0
data = data[i+1:]
data = string.expandtabs(data)
self.col = self.col + len(data)
self.atbreak = 0
######
# Helper functions for building the index datastructure
######
# Scan through all nodes and build a dictionary mapping node file names
# to document titles.
#
def build_std_nodes(docdir):
lower = string.lower
node_path = os.path.join(docdir, "*.html")
title_re = r'(<TITLE>|<title>)([0-9\.]*) (.*)((--)|(</TITLE>|</title>))'
node_titles = {}
for path in glob.glob(node_path):
node = os.path.split(path)[1]
f = open(path)
while 1:
line = f.readline()
if not line: break
m = re.match(title_re, line)
if m:
#print m.groups()
node_titles[node] = string.strip(m.group(2))
break
return node_titles
#
# Create a dictionary from the index document that maps index terms
# to node documents and anchors.
#
def build_std_index(docdir, docindex):
index_path = os.path.join(docdir, docindex)
#index_re = r'<dt>.*HREF="([^#]*)#(\d*)"><tt>(.*)</tt>'
index_re = r'<dt><a href=["|\']([^#]*)#(l2h-(\d*))["|\']>(.*)</a>'
extra_re = r'<A NAME=.*HREF="([^#]*)#(\d*)">'
index = {}
nterm = ''
lines = open(index_path).readlines()
for line in lines:
m = re.match(index_re, line)
if m:
term = m.group(4)
nterm = re.match(r'([^ ()]*)', term).group(1)
if index.has_key(nterm):
index[nterm].append((m.group(1),m.group(2)))
else:
index[nterm] = [(m.group(1),m.group(2))]
m = re.match(extra_re, line)
if nterm and m and index.has_key(nterm):
index[nterm].append((m.group(1),m.group(2)))
return index
#
# Extract a section of a node referenced by an anchor.
#
class docreader:
"""Get documentation from an index of keywords"""
def __init__(self,stdroot, docdb=''):
self.std_docdir = stdroot
self.std_index_file = 'genindex.html'
self.pager = 'less'
self.use_pager = 1
self.page_size = 1000
self.docdb = docdb
self.ask_fullview = 1
self.show_url = 1
self.load_db()
def show(self,data):
""" Format HTML and write it to the screen.
A pager is use if documentation text is to long"""
if self.pager and self.use_pager:
out = os.popen(pager, 'w')
else:
import sys
out = sys.stdout
fmtr = formatter.AbstractFormatter(MyWriter(out))
p = MyHTMLParser(fmtr)
try:
p.feed(data)
p.close()
except IOError:
pass
def load_db(self):
"""Get the indices"""
# Look first if there is already a pickle of the indices, currently no
# global place but in the home of the user. Needs to be fixed!
if not self.docdb:
self.docdb = os.path.join(os.environ['HOME'],'.ipphelp')
has_docdb = os.path.isfile(self.docdb)
if has_docdb:
self.index_cont = cPickle.load(open(self.docdb,'r'))[0]
self.hasdb = 1
elif self.std_docdir:
# Build an index for every directory
self.index_cont={}
for help_dir in self.std_docdir:
cont={}
nodes = build_std_nodes( help_dir)
index = build_std_index( help_dir, self.std_index_file)
cont['directory'] = help_dir
cont['nodes']=nodes
cont['indices']=index
self.index_cont[help_dir]=cont
self.help_hasdb = 1
def save(self):
"""Saves the indices for later use"""
cPickle.dump((self.index_cont,), open(self.docdb, 'w'))
return
def __call__(self, name):
self.get(name)
def get(self, name):
"""Get info for the given name"""
entries = []
for dsource in self.index_cont.values():
if dsource['indices'].has_key(name):
for entry in dsource['indices'][name]:
found = [dsource['directory'], entry[0], entry[1]]
entries.append(found)
if len(entries) > 1:
sdir, node, anchor = self.display_node_select(name, entries)
elif entries:
sdir, node, anchor = entries[0]
else:
print 'No help available'
if anchor:
self.display_node_section(sdir, node, anchor)
if self.ask_fullview:
resp = raw_input('View entire node [yN]? ')
if resp and resp[0] in 'yY':
self.display_node(sdir, node)
else:
self.display_node(sdir, node)
def display_node_section(self, sdir, node, anchor):
node_path = os.path.join(sdir, node)
anchor_re = re.compile(r'<a name=["|\']%s["|\']>(.*?)(?:<a name|<HR>|</BODY>)' % \
anchor, re.DOTALL)
data = open(node_path).read()
m = anchor_re.search(data)
if m:
if self.show_url:
data = 'file:%s%s#%s<P>%s' % (sdir, node, anchor, m.group(1))
else:
data = 'Lookup in %s<P>%s' % (node, m.group(1))
else:
data = 'No help available'
if len(data) > self.page_size: # page_size in character
self.use_pager=1
else:
self.use_pager=0
self.show(data)
def display_node(self, sdir, node):
"""Display an entire node"""
self.use_pager = 1 # always use the pager for a full node
node_path = os.path.join(sdir, node)
data = open(node_path).read()
self.show(data)
def display_node_select(self, name, entries):
print "\nEntries for '%s':" % name
i = 0
for entry in entries:
sdir, node, anchor=entry
i = i + 1
fnode = node
if fnode[:7] == 'module-':
fnode = fnode[7:-5]
print " %s: %s" % (i, fnode)
print
item = raw_input('Lookup in module? ')
if item:
i = string.atoi(item) - 1
sdir, node, anchor = entries[i]
return sdir, node, anchor
if __name__ == '__main__':
# Put the path to the HTML library documentation on your system here.
help = docreader(('/home/user/Doc/html/NumPy/',
'/home/user/PYHTML/lib/'))
help('round')
--
Institut fuer Meereskunde phone: 49-431-597 3989
Dept. Theoretical Oceanography fax : 49-431-565876
Duesternbrooker Weg 20 email: jhauser at ifm.uni-kiel.de
24105 Kiel, Germany
--
Institut fuer Meereskunde phone: 49-431-597 3989
Dept. Theoretical Oceanography fax : 49-431-565876
Duesternbrooker Weg 20 email: jhauser at ifm.uni-kiel.de
24105 Kiel, Germany
More information about the Python-list
mailing list