organize, annotate, bookamark personal web page collection

Huaiyu Zhu hzhu at yahoo.com
Tue Sep 5 18:25:08 EDT 2000


On Tue, 05 Sep 2000 09:35:07 GMT, bayinnaung at my-deja.com
<bayinnaung at my-deja.com> wrote: 
>
>
>Is there any work in Python out there for organizing, annotating, and
>bookmarking...(searching would also be nice) one's own personal
>collection of web pages?

Enclosed is a "quick and dirty indexing" tool in python, as its name says.
You create a bunch of directories (or use existing ones), place a couple of
text files in it (00annot.in, 00index.in, 00tabs.in) and call python qdindex
dir1 dir2, etc to generate dir1/index.html, dir2/index.html, etc which can
link to any url. I use this to annotate my directories.  The home page of
MatPy (in my sig) is a simple example application.

I'm interested in hearing a better tool.  Is there a parser for netscape
bookmarks?  How do people generate fancy title fonts?


Huaiyu
-- 
Huaiyu Zhu                       hzhu at users.sourceforge.net
Matrix for Python Project        http://MatPy.sourceforge.net 


#!/usr/bin/env python
#----------------------------------------------------------------------------
# Name:         qdindex.py (Quick and dirty indexing)
# Purpose:      Generates index.html per directory from input files
# Author:       Huaiyu Zhu 
# Copyright:    (C) 2000 Huaiyu Zhu,  License:      GNU GPL
# Changes:
#        2000-02-13 Initial version
#        2000-03-09 public version 0.1.0
# Bugs:
#        Silently overwrites index.html if 00index.in and 00annot.in exist
#        Any unrecognized input line counts as empty line
# $Id: qdindex.py,v 1.6 2000-04-03 11:55:54-07 hzhu Exp hzhu $
#----------------------------------------------------------------------------

""" This program is intended as a simple annotation tool for organizing
directory listings.  It accepts a list of directory names as arguments
(default to '.'  if empty).  For each directory dir it generates a file
dir/index.html if the input files dir/00annot.in and dir/00index.in exists.

The input files contain paragraphs separated by empty lines.  For
00annot.in, the first line becomes title, the second line contains the
directory where 00tabs.in is located, the rest are introductory texts.

The file 00tabs.in contains line-separated list of white-space-separated
pairs of names and links to be displayed before title.

For 00index.in, each paragraph becomes a table with a different bgcolor.
Within each table each input line of the form
"Topic" url url ...
(where url can be a local path) becomes one row where the url are
hyperlinked.  If url/index.html exists it will be substituted as the link.

It uses html style-sheets (css) for syntax highlighting.  Any html tag can
be embedded in the input files.  """

#------------------------------------------------------------------
"All the formats"

class ColorRotate:
	"Returns one of the colors per each call"
	colors = [
#		 "mintcream",
#		 "lavenderblush",
#		 "beige",
#		 "aliceblue",
#		"linen",
#		"lightcyan",
		"papayawhip",
		"lavender",
		"mistyrose",
		"palegoldenrod",
		"wheat",
		"gainsboro",
		"bisque",
		"peachpuff",
		"powderblue",
		"moccasin",
			 ]

	i = 0
	n = len(colors)
	def __call__(self):
		color = self.colors[self.i]
		self.i = self.i + 1
		if self.i >= self.n: self.i = 0
		return color
color = ColorRotate()

def print_head(title):
	print '<html><head>\n <style><!--'
	print ' h1 {color:#3300aa}'
	print ' p.intro {color:teal; text-align: jestify}'
	print ' p code, code, pre {color:darkviolet; fontweight:bold}'
	print ' b, strong {color:teal}'
	print ' i, em {color:limegreen}'
	print ' table {border: 2}'
	print ' .comment {color:blue}'
	print ' --></style>'
	print ' <title>%s</title>' % title

def print_body(title):
	print '</head><body bgcolor="#f2ffff">\n'
	print '<H1 align=center>%s</H1>\n<p class=intro>' % title

def print_tabs(itemlist = []):
	print '<table bgcolor=#aaffcc>'
	for (name,ref) in itemlist:
		print ' <td width=60><a href="%s">%s</a>' % (ref, name)
	print '</table>'

def print_startindex():
	print "</p><hr>\n\n<table bgcolor=%s>" % color()

def print_par():
	"Unrecongnized lines change paragraph"
	print "</table>"
	print "<table bgcolor=%s>" % color()

def print_tailer(date):
	print "</table>"
	print "<hr>Index generated at <code>%s</code>" %date
	print "</body></html>"

#------------------------------------------------------------------
"All the data"

"RE for parsing input data"
import re
entry = re.compile(r'"([^"]+)\s*"(.+)') # Cant use [!"]
par = re.compile(r'^\s*$')

"Files and directories"
import os.path
pathjoin = os.path.join
isfile = os.path.isfile

def existfile(path, dir, name):
	"Test whether index exists in the pointed directory"
	idx = pathjoin(dir, name)
	pathidx = pathjoin(path, idx)
	#print "<!--%s-->" % pathidx
	if isfile(pathidx): return idx

class Entry:
	"Matched lines specify an entry {subj, dirlist}"
	def __init__(self, entrymatch):
		(self.subj, dirs) = entrymatch.groups()
		self.dirlist = string.split(dirs)

	def show(self, path):
		print ' <td width="250"><b>%s</b><td width="350">' % self.subj
		for d in self.dirlist:
			d = existfile(path, d, "index.html") or \
				existfile(path, d, "index.htm") or \
				d
			print '\t<a href="%s">%s</a>' % (d, d)
		print " <tr>"

"Get time" # date '+%Y-%m-%d %H:%M:%S'
import time
date = '%4d-%02d-%02d %02d:%02d' % time.localtime(time.time())[:5]
print date

class Tabs:
	"Glabal clickable tabs"
	def __init__(self, path, dir):
		tabsfile = pathjoin(dir, "00tabs.in")
		tabsfile = pathjoin(path, tabsfile)
		if isfile(tabsfile): tabs = open(tabsfile).readlines()
		else:
			print "Warning: File %s does not exist" % tabsfile
			tabs = []
		tabs = map(string.split, tabs)
		"Deal with relative links"
		tabs = map(lambda x, path=path, dir=dir:
				   (x[0], existfile(path, dir, x[1])
					or existfile(path, dir, pathjoin(x[1], "index.html"))
					or x[1]), tabs)
		self.tabs = tabs

#------------------------------------------------------------------
import  string, sys
def dirindex(path):
	"Directory index"
	print path
	annotfile = pathjoin(path, "00annot.in")
	idxfile = pathjoin(path, "00index.in")
	if not isfile(idxfile): return None
	if not isfile(annotfile): return None

	"Read  annotation and index"
	annot = open(annotfile)
	title = string.strip(annot.readline())
	tabsdir = string.strip(annot.readline())
	tabs = Tabs(path, tabsdir).tabs
	annot = annot.readlines()
	lines = open(idxfile).readlines()

	"Delayed writing in case anything wrong with reading" 
	outfile = pathjoin(path, "index.html")
	sys.stdout = open(outfile, "w")

	"Write annotation"
	print_head(title)
	print_tabs(tabs)
	print_body(title)
	for line in annot:
		if par.match(line): line = par.sub("</p><p class=intro>", line)
		else: line = " " + line
		print string.rstrip(line)

	"Write index as tables"
	print_startindex()
	for line in lines:
		entrymatch = entry.match(line)
		if entrymatch:
			Entry(entrymatch).show(path)
		else:
			print_par()
	
	"Finishing up"
	print_tailer(date)

#------------------------------------------------------------------
if __name__ == "__main__":
	#from os import environ
	from sys import argv
	if len(argv) < 2:	dirs = ["."]
	else: 	dirs = argv[1:]
	print "Updating index in directories %s" % dirs
	for dir in dirs:
		print "---> Indexing %s" % dir
		dirindex(dir)



More information about the Python-list mailing list