[Python-checkins] commit of r41633 - sandbox/trunk/seealso sandbox/trunk/seealso/make-seealso.py sandbox/trunk/seealso/parse-seealso.py

Wed Dec 7 17:21:49 CET 2005

Author: andrew.kuchling
Date: Wed Dec  7 17:21:47 2005
New Revision: 41633

Added:
   sandbox/trunk/seealso/
   sandbox/trunk/seealso/make-seealso.py   (contents, props changed)
   sandbox/trunk/seealso/parse-seealso.py   (contents, props changed)
Log:
Add strawman implementation of /F's seealso idea

Added: sandbox/trunk/seealso/make-seealso.py
==============================================================================

--- (empty file)
+++ sandbox/trunk/seealso/make-seealso.py	Wed Dec  7 17:21:47 2005
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+# Read a pickled dictionary, and output a bunch of *.tex files
+# into the specified directory.
+
+import os, sys
+import pickle
+
+def main ():
+    if len(sys.argv) < 3:
+        print 'Usage: %s database-filename example-dir'
+        sys.exit(1)
+    
+    db_file = sys.argv[1]
+    example_dir = sys.argv[2]
+
+    # Delete all *.tex files
+    for fn in os.listdir(example_dir):
+	if fn.endswith('.tex'):
+	    p = os.path.join(example_dir, fn)
+	    print p
+	    os.remove(p)
+	    
+    # Read dictionary
+    input = open(db_file, 'rb')
+    db = pickle.load(input)
+    input.close()
+    
+    # Output files
+    for module in db:
+	examples = db[module]
+	
+	# XXX sort examples in some way?
+	
+	def tex_escape (t):
+	    t = t.replace('%', '\%')
+	    t = t.replace('$', '\$')
+	    return t
+	    
+	# Write file containing examples for this module
+	p = os.path.join(example_dir, module + '.tex')
+	output = open(p, 'w')
+	for title, url, excerpt in examples:
+	    if excerpt is None:
+		output.write("\seeurl{%s}{%s}\n" % (tex_escape(url),
+		                                    tex_escape(title)))
+	    else:
+		output.write("\seeurl{%s}{%s\n\n%s}\n" % (tex_escape(url),
+		                                    tex_escape(title),
+						    tex_escape(excerpt)))
+		
+
+	output.close()
+	
+
+if __name__ == '__main__':
+    main()
+    

Added: sandbox/trunk/seealso/parse-seealso.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/seealso/parse-seealso.py	Wed Dec  7 17:21:47 2005
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+# Parse a seealso file, and add its contents to a pickled dictionary.
+
+import os, sys
+import urllib, pickle
+from xml.dom import minidom
+
+def main ():
+    if len(sys.argv) < 3:
+        print 'Usage: %s URL database-filename'
+        sys.exit(1)
+        
+    url = sys.argv[1]
+    db_file = sys.argv[2]
+
+    # Fetch XML data
+    f = urllib.urlopen(url)
+    data = f.read()
+    f.close()
+
+    # Parse XML data
+    dom = minidom.parseString(data)
+    L = []
+    def get_text (node):
+        t = ""
+        for c in node.childNodes:
+            if c.nodeType == c.TEXT_NODE:
+                t += c.nodeValue
+            elif c.nodeType == c.ELEMENT_NODE:
+                t += get_text(c)
+        return t
+            
+    for item in dom.getElementsByTagNameNS(None, 'item'):
+        href = item.getAttributeNS(None, 'href')
+        title_node = item.getElementsByTagNameNS(None, 'title')[0]
+        title = get_text(title_node)
+
+        # XXX this will ignore emphasized chunks of text
+        excerpt_nodes = item.getElementsByTagNameNS(None, 'excerpt')
+        if len(excerpt_nodes):
+            excerpt = get_text(excerpt_nodes[0])
+        else:
+            excerpt = None
+
+        # Multiple target elements are allowed
+        target_nodes = item.getElementsByTagNameNS(None, 'target')
+        for t in target_nodes:
+            target = get_text(t)
+            L.append((target, title, href, excerpt))
+
+        
+
+
+    # update database
+    # The database is a pickled dictionary mapping 'module name' ->
+    # list of (title, url, excerpt) pairs
+    if not os.path.exists(db_file):
+        db = {}
+    else:
+        input = open(db_file, 'rb')
+        db = pickle.load(input)
+        input.close()
+    for module, title, url, excerpt in L:
+        # Check if URL is already listed; if yes, delete the old entry
+        exlist = db.setdefault(module, [])
+        exlist = [t for t in exlist if t[1] != url]
+
+        # Append to list
+        exlist.append((title, url, excerpt))
+
+        # Reinsert (since the list-comp created a new list)
+        db[module] = exlist
+        
+    ##import pprint ; pprint.pprint(db)
+    output = open(db_file, 'wb')
+    pickle.dump(db, output)
+    output.close()
+
+if __name__ == '__main__':
+    main()
+