Python/Perl Popularity (Re: A Mountain of Perl...)

Wed Apr 12 01:21:59 EDT 2000

Use docindex.py!

This module is definetly no substitute for perldoc but a start that
answers your question. It gets the relevant information from the html
tree which needs to be installed on your system.

A benefit over docstrings is, that one does not need to import a
module. This is a prototype, but shows that it is possible. Not that
this will change your mind about Python :-)

HTH,
__Janko

#!/usr/bin/env python
#
# docindex -- get HTML documentation for an index of keywords.
#

__version__ = '0.1.0'
__author__ = 'Janko Hauser <jhauser at starship.python.net>'

import re, htmllib, formatter
import os, string, cPickle
import glob

# A simple help system which builds on top of docreader.py from
# Jim Eggleston  <jimegg at gol.com>. These classes are included here to
# have only one file. The interface is changed a little bit and the
# indices are generated once and can then be stored in a pickle.
#
#
## START docreader.py -- read Python HTML docs from the command line.

## docreader builds a dictionary of terms from the index of the Python Library
## HTML documentation. The user is prompted to enter a term. If there is only
## one entry for the term, the section of the relevant document node is displayed.
## If there is more than one entry, a numbered list of index entries for that term
## is displayed. Individual entries can be read by entering the list item number.

## docreader will exit if a null string is entered as a search term.

## Setup: 
## The global variables docdir, contents_doc and pager must be set according to
## your particular configuration.

## Changes:
## 14-Mar-1998 Created
## 15-Mar-1998 Added MyWriter class taken from Tony Ibbs 'html2text.py'

## Jim Eggleston  <jimegg at gol.com>
#
# Change these according to your configuration
# (This is now done in the shell class 
#

# directory where the 'lib' HTML doc files are kept
# docdir = '/home/hauser/public_html/doc/python/lib'

# the index document
# index_doc = 'genindex.html' # 'node268.html'

# An external pager program (e.g. 'more', 'less') which accepts stdin
# (Set to None if no pager available)
pager = 'less'

#
# Sub-class HTMLParser to add minimal support for tables, and also for
# (future) support of links.
#
class MyHTMLParser(htmllib.HTMLParser):

    def __init__(self, formatter, verbose=0):
        htmllib.HTMLParser.__init__(self, formatter, verbose)

    def anchor_bgn(self, href, name, type):
        self.anchor = href
        self.anchorname = name
        if self.anchor:
            self.anchorlist.append(href, name)

    def anchor_end(self):
        if self.anchor:
            self.handle_data("[%d]" % len(self.anchorlist))

        # (Look at handling anchor names later)
        # elif self.anchorname:
        #     self.handle_data("[#%s]" % self.anchorname)

    # Send a line break before each table row       
    def do_tr(self, attr):
        self.formatter.add_line_break()

#
# Based on text2html.py, by Tony Ibbs (Tibbs).
#
DEBUG = 0

class MyWriter(formatter.NullWriter):

    def __init__(self, file=None, maxcol=72):
        self.file = file or sys.stdout
        self.maxcol = maxcol
        self.margin = 0         # margin
        self.extra_margin = 0   # extra margin in a list element
        self.indent = 3         # increment for margin
        self.extra_indent = 2   # indent for extra margin
        self.level = {}         # dictionary of levels
        formatter.NullWriter.__init__(self)
        self.reset()

    def reset(self):
        self.col = 0            # current column
        self.atbreak = 0        # is this a good place to linebreak?

    def new_alignment(self, align):
        if DEBUG: print "new_alignment(%s)" % `align`

    def new_font(self, font):
        if DEBUG: print "new_font(%s)" % `font`

    def new_margin(self, margin, level):
        if DEBUG: print "new_margin(%s, %d)" % (`margin`, level)

        if margin == None:
            self.margin = 0
        elif self.level.has_key(margin):
            if level == 1:
                self.level[margin] = level
                self.margin = self.margin + self.indent
            elif level == 2:
                self.level[margin] = level
                self.margin = self.margin - self.indent
                if self.margin < 0: self.margin = 0
        else:
            self.level[margin] = level
            self.margin = self.margin + self.indent
        if DEBUG: print "   margin =",self.margin
        self.extra_margin = 0
        return

        if margin == None:
            self.margin = 0
        elif level == 1:
            self.margin = self.margin + self.indent
        elif level == 2:
            self.margin = self.margin - self.indent
            if self.margin < 0: self.margin = 0
        else:
            print "***Margin %s level %d (not 1 or 2)"%(margin,level)
            self.margin = 0
        if DEBUG: print "   margin =",self.margin
        self.extra_margin = 0
        #self.reset()  #???
        #self.start_margin()

    def new_spacing(self, spacing):
        if DEBUG: print "new_spacing(%s)" % `spacing`

    def new_styles(self, styles):
        if DEBUG: print "new_styles(%s)" % `styles`

    def send_paragraph(self, blankline):
        if DEBUG: print "send_paragraph(%s)" % `blankline`
        if blankline > 1:
            self.file.write('\n'*(blankline-1))
        self.reset()
        self.file.write("\n")
        ##self.start_line()

    def send_line_break(self):
        if DEBUG: print "send_line_break()"
        #self.file.write('\n')
        self.reset()
        self.file.write("\n")
        ##self.start_line()

    def send_hor_rule(self, *args, **kw):
        if DEBUG: print "send_hor_rule(%s,%s)"%(args,kw)
        self.file.write('\n')
        self.file.write('-'*self.maxcol)
        self.file.write('\n')
        self.reset()

    def start_margin(self):
        if DEBUG: print "   start_margin()"
        self.file.write(" "*self.margin + " "*self.extra_margin)
        self.col = self.col + self.margin

    def start_line(self):
        if DEBUG: print "   start_line()"
        self.file.write("\n")
        self.start_margin()
        #self.file.write("\n" + " "*self.margin + " "*self.extra_margin)
        #self.col = self.col + self.margin

    def send_label_data(self, data):
        if DEBUG: print "send_label_data(%s)" % `data`
        self.extra_margin = 0
        self.start_line()
        self.file.write(data+" ")
        self.extra_margin = self.extra_indent

    def send_flowing_data(self, data):
        if DEBUG: print "send_flowing_data(%s)" % `data`
        if not data: return
        atbreak = self.atbreak or data[0] in string.whitespace
        col = self.col

        if col == 0:
            self.start_margin()

        maxcol = self.maxcol
        write = self.file.write
        #self.start_line()
        for word in string.split(data):
            if atbreak:
                if col + len(word) >= maxcol:
                    #write('\n')
                    col = 0
                    self.start_line()
                else:
                    write(' ')
                    col = col + 1
            write(word)
            if DEBUG: print "   ",word
            col = col + len(word)
            atbreak = 1
        self.col = col
        self.atbreak = data[-1] in string.whitespace

    def send_literal_data(self, data):
        if DEBUG: print "send_literal_data(%s)" % `data`
        self.file.write(data)
        i = string.rfind(data, '\n')
        if i >= 0:
            self.col = 0
            data = data[i+1:]
        data = string.expandtabs(data)
        self.col = self.col + len(data)
        self.atbreak = 0

######
# Helper functions for building the index datastructure
######
# Scan through all nodes and build a dictionary mapping node file names
# to document titles.
#
def build_std_nodes(docdir):
    lower = string.lower
    node_path = os.path.join(docdir, "*.html")
    title_re = r'(<TITLE>|<title>)([0-9\.]*) (.*)((--)|(</TITLE>|</title>))'  
    node_titles = {}
    for path in glob.glob(node_path):
        node = os.path.split(path)[1]
        f = open(path)
        while 1:
            line = f.readline()
            if not line: break
            m = re.match(title_re, line)
            if m:
                #print m.groups()
                node_titles[node] = string.strip(m.group(2))
                break
    return node_titles

#
# Create a dictionary from the index document that maps index terms
# to node documents and anchors.
#
def build_std_index(docdir, docindex):
    index_path = os.path.join(docdir, docindex)
    #index_re = r'<dt>.*HREF="([^#]*)#(\d*)"><tt>(.*)</tt>'
    index_re = r'<dt><a href=["|\']([^#]*)#(l2h-(\d*))["|\']>(.*)</a>'
    extra_re = r'<A NAME=.*HREF="([^#]*)#(\d*)">'  
    index = {}
    nterm = ''
    lines = open(index_path).readlines()
    for line in lines:
        m = re.match(index_re, line)
        if m:
            term = m.group(4)
            nterm = re.match(r'([^ ()]*)', term).group(1)
            if index.has_key(nterm):
                index[nterm].append((m.group(1),m.group(2)))
            else:
                index[nterm] = [(m.group(1),m.group(2))]
        m = re.match(extra_re, line)
        if nterm and m and index.has_key(nterm):
            index[nterm].append((m.group(1),m.group(2)))
    return index

#
# Extract a section of a node referenced by an anchor.
#
class docreader:
    """Get documentation from an index of keywords"""
    def __init__(self,stdroot, docdb=''):
        self.std_docdir = stdroot
        self.std_index_file = 'genindex.html'
        self.pager = 'less'
        self.use_pager = 1
        self.page_size = 1000
        self.docdb = docdb
        self.ask_fullview = 1
        self.show_url = 1
        self.load_db()

    def show(self,data):
        """ Format HTML and write it to the screen.
        A pager is use if documentation text is to long"""
        if self.pager and self.use_pager:
            out = os.popen(pager, 'w')
        else:
            import sys
            out = sys.stdout   
        fmtr = formatter.AbstractFormatter(MyWriter(out))
        p = MyHTMLParser(fmtr)
        try:
            p.feed(data)
            p.close()
        except IOError:
            pass

    def load_db(self):
        """Get the indices"""
        # Look first if there is already a pickle of the indices, currently no
        # global place but in the home of the user. Needs to be fixed!
        if not self.docdb:
            self.docdb = os.path.join(os.environ['HOME'],'.ipphelp')
        has_docdb = os.path.isfile(self.docdb)
        if has_docdb:
            self.index_cont = cPickle.load(open(self.docdb,'r'))[0]
            self.hasdb = 1
        elif self.std_docdir:
            # Build an index for every directory
            self.index_cont={}
            for help_dir in self.std_docdir:
                cont={}

                nodes = build_std_nodes( help_dir)
                index = build_std_index( help_dir, self.std_index_file)

                cont['directory'] = help_dir
                cont['nodes']=nodes
                cont['indices']=index
                self.index_cont[help_dir]=cont
            self.help_hasdb = 1

    def save(self):
        """Saves the indices for later use"""
        cPickle.dump((self.index_cont,), open(self.docdb, 'w'))
        return

    def __call__(self, name):
        self.get(name)

    def get(self, name):
        """Get info for the given name"""
        entries = []
        for dsource in self.index_cont.values():
            if dsource['indices'].has_key(name):
                for entry in dsource['indices'][name]:
                    found = [dsource['directory'], entry[0], entry[1]]
                    entries.append(found)
        if len(entries) > 1:
            sdir, node, anchor = self.display_node_select(name, entries)
        elif entries:
            sdir, node, anchor = entries[0]
        else:
            print 'No help available'

        if anchor:
            self.display_node_section(sdir, node, anchor)
            if self.ask_fullview:
                resp = raw_input('View entire node [yN]? ')
                if resp and resp[0] in 'yY':
                    self.display_node(sdir, node)
        else:
            self.display_node(sdir, node)

    def display_node_section(self, sdir, node, anchor):
        node_path = os.path.join(sdir, node)
        anchor_re = re.compile(r'<a name=["|\']%s["|\']>(.*?)(?:<a name|<HR>|</BODY>)' % \
                               anchor, re.DOTALL)
        data = open(node_path).read()
        m = anchor_re.search(data)
        if m:
            if self.show_url:
                data = 'file:%s%s#%s<P>%s' % (sdir, node, anchor, m.group(1))
            else:
                data = 'Lookup in %s<P>%s' % (node, m.group(1))
        else:
            data = 'No help available'
        if len(data) > self.page_size: # page_size in character
            self.use_pager=1
        else:
            self.use_pager=0

        self.show(data)

    def display_node(self, sdir, node):
        """Display an entire node"""
        self.use_pager = 1 # always use the pager for a full node
        node_path = os.path.join(sdir, node)
        data = open(node_path).read()
        self.show(data)

    def display_node_select(self, name, entries):
        print "\nEntries for '%s':" % name
        i = 0
        for entry in entries:
            sdir, node, anchor=entry
            i = i + 1
            fnode = node
            if fnode[:7] == 'module-':
                fnode = fnode[7:-5]
            print "    %s: %s" % (i, fnode)
        print
        item = raw_input('Lookup in module? ')
        if item:
            i = string.atoi(item) - 1
            sdir, node, anchor = entries[i]
        return sdir, node, anchor

if __name__ == '__main__':

    # Put the path to the HTML library documentation on your system here.
    help = docreader(('/home/user/Doc/html/NumPy/',
                   '/home/user/PYHTML/lib/'))

    help('round')

-- 
  Institut fuer Meereskunde             phone: 49-431-597 3989
  Dept. Theoretical Oceanography        fax  : 49-431-565876
  Duesternbrooker Weg 20                email: jhauser at ifm.uni-kiel.de
  24105 Kiel, Germany

-- 
  Institut fuer Meereskunde             phone: 49-431-597 3989
  Dept. Theoretical Oceanography        fax  : 49-431-565876
  Duesternbrooker Weg 20                email: jhauser at ifm.uni-kiel.de
  24105 Kiel, Germany