[XML-SIG] XBEL DTD

Marc van Grootel bwaumg@urc.tue.nl
Sun, 13 Sep 1998 02:31:50 +0200


Hi,

I attached a modified version of Lars's nsparse.py and bookmarks.py. I
changed nsparse to use htmllib since I thought it could cause problems 
when xmlproc gets HTML (empty elements: <hr> vs. <hr/> ?). I didn't
check that though.

I changed bookmarks.py to output xbel XML according to the dtd (oops
that's a lie -- it doesn't output an info element and in the dtd I
defined it as a required element) I recently sent. Oh, and I removed the
dump_adr methods 'cause I didn't know how to implement the new
features for Opera.

  cat bookmark.htm | nsparse.py -ns >bookmark2.html
  cat bookmark.htm | nsparse.py >bookmark.xml

I only ran this one time on my big bookmark file and it worked. Don't
hit me if it blows up. It's just an illustration for getting stuff
into the new dtd. The code may be a bit messy too. I'm a recent Python
convert hope it's not too much baby-talk ;)

I also thought that it would be nice to be able to store extra info in
the xbel file on different levels. This could be done by borrowing the
HTML meta tag idea:

  <xbel>
    <info>
      <meta name="generator" content="grail:?)">
      <meta name="created" content="123456789">
    </info>
    <folder>
      <info>
        <meta name="x" content="10">
      </info>
      <bookmark>...</bookmark>
      <bookmark>
        <info><meta name="y" content="20"></info>
        ...
      </bookmark>

  etc.

We could then store arbitrary data with the major elements
(xbel,folder,bookmark). It's an easy enough addition without adding
much complexity. And if you don't need it just ignore the info
elements it. Maybe it could be used in web-maintainance tools like
linbot.

What bookmark formats should be supported? I would like to see
excerpts of different kinds (like Lynx, Opera) and see if any of those
makes changes to the dtd necessary. It would be nice if xbel could
be used to express most of these without loss of information.


Oh,well...

Marc

Here are the two scripts:

#
# nsparse.py
#
from htmllib import *
from formatter import NullFormatter
import bookmark

class NSBookmarkParser(HTMLParser):

    def __init__(self):
        HTMLParser.__init__(self,NullFormatter())
        self.inBookmark = 0
        self.inDesc = 0
        self.inFolder = 0
        self.added = None
        self.folded = None
        self.desc = None
        self.title = None
        self.url_href = None
        self.url_modified = None
        self.id = None
        self.ref = None
        self.url_visited = None
        self.url_modified = None
        self.bms = bookmark.Bookmarks()
        
    def start_h1(self,attrs):
        self.inFolder = 1
        self.save_bgn()
        
    def end_h1(self):
        self.title = self.save_end()

    def start_h3(self,attrs):
        self.inFolder = 1
        for a in attrs:
            if a[0]=='add_date':
                self.added=a[1]
            elif a[0]=='folded':
                self.folded='yes'
        self.save_bgn()

    def end_h3(self):
        self.title = self.save_end()

    def start_dl(self,attrs):
        self.flush()
        
    def end_dl(self):
        self.flush()
        self.bms.leave_folder()
        self.inFolder = 0
        
    def do_hr(self,attrs):
        self.flush()
        self.bms.add_separator()
        
    def do_dt(self,attrs):
        self.flush()
        
    def do_dd(self,attrs):
        self.inDesc = 1
        self.save_bgn()        
        
    def start_a(self,attrs):
        for a in attrs:
            if a[0]=='href':
                self.url_href=a[1]
            elif a[0]=='add_date':
                self.added=a[1]
            elif a[0]=='last_visit':
                self.url_visited=a[1]
            elif a[0]=='last_modified':
                self.url_modified=a[1]
            elif a[0]=='aliasid':
                self.id=a[1]
            elif a[0]=='aliasof':
                self.ref=a[1]                
        self.inBookmark = 1
        self.save_bgn()

    def end_a(self):
        self.title = self.save_end()

    def dump_xbel(self):
        self.bms.dump_xbel()

    def dump_netscape(self):
        self.bms.dump_netscape()
        
    def flush(self):
        if self.inDesc == 1:
            self.desc = self.save_end()
            self.inDesc = 0
        if self.inBookmark == 1:
            if self.ref:
                self.bms.add_alias(self.ref)
            else:
                self.bms.add_bookmark(self.added,self.title,self.desc,self.id,self.url_href,self.url_visited,self.url_modified,None,None)
            self.inBookmark = 0
        elif self.inFolder == 1:
            self.bms.add_folder(self.title,self.desc,self.added,self.folded)
            self.inFolder = 0
        self.desc=None
        self.folded=None
        self.added=None
        self.title=None
        self.desc=None
        self.url_href=None
        self.url_modified=None
        self.url_visited=None
        self.ref=None
        self.id=None
        
if __name__ == '__main__':

    p = NSBookmarkParser()
    p.feed(sys.stdin.read())
    p.close()

    if "-ns" in sys.argv:
        p.dump_netscape()
    else:
        p.dump_xbel()


#
# bookmark.py
#
#
"""
Classes to store bookmarks and dump them to XBEL.
"""

import sys,string

# --- maintain a stored for id objects
IDs = {}

def StoreID(id,obj):
    IDs[id]=obj
        
def GetID(id):
    return IDs[id]
    
# --- Class for bookmark container

class Bookmarks:

    def __init__(self):
        self.folders=[]
        self.folder_stack=[]
    def add_folder(self,title,desc,added,folded):
        nf = Folder(title,desc,added,folded)
        if self.folder_stack==[]:
            self.folders.append(nf)
        else:
            self.folder_stack[-1].add_child(nf)
        self.folder_stack.append(nf)

    def add_bookmark(self,added,title,desc,id,href,visited,modified,checked,response):
        nb = Bookmark(added,title,desc,id,href,visited,modified,checked,response)
        if id: StoreID(id,nb)
        if self.folder_stack!=[]:
            self.folder_stack[-1].add_child(nb)
        else:
            self.folders.append(nb)

    def add_separator(self):
        sep = Separator()
        if self.folder_stack!=[]:
            self.folder_stack[-1].add_child(sep)
        else:
            self.folders.append(sep)
            
    def add_alias(self,ref):
        al = Alias(ref)
        if self.folder_stack!=[]:
            self.folder_stack[-1].add_child(al)
        else:
            self.folders.append(al)
        
    def leave_folder(self):
        if self.folder_stack!=[]:
            del self.folder_stack[-1]

    def dump_xbel(self,out=sys.stdout):
        out.write("<!DOCTYPE xbel SYSTEM \"xbel.dtd\">\n")
        out.write("<?xml version=\"1.0\"?>\n")
        out.write("<xbel version=\"0.1\">\n")
        for folder in self.folders:
            folder.dump_xbel(out)
        out.write("</xbel>")

    def dump_netscape(self,out=sys.stdout):
        out.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
        out.write("<!-- This is an automatically generated file.\n")
        out.write("It will be read and overwritten.\n")
        out.write("Do Not Edit! -->\n")
        # output first folder specially
        f = self.folders[0]
        out.write("<TITLE>%s</TITLE>\n" % f.title)
        out.write("<H1>%s</H1>\n" % f.title)
        out.write("<DD>%s\n<DL><p>\n" % f.desc)
        for folder in f.children:
            folder.dump_netscape(out)
        out.write("  </DL><p>\n")
                  
class Folder:

    def __init__(self,title,desc,added,folded):
        self.added=added
        self.folded=folded
        self.title=title
        self.desc=desc
        # valid children are folders,bookmarks,separators and aliases
        self.children=[]

    def add_child(self,child):
        self.children.append(child)

    def dump_xbel(self,out):
        out.write("  <folder")
        if self.added: out.write(" added=\"%s\"" % self.added)
        if self.folded: out.write(" folded=\"%s\"" % self.folded)
        out.write(">\n")
        out.write("    <title>%s</title>\n" % self.title)
        if self.desc: out.write("    <desc>%s</desc>\n" % self.desc)
        for child in self.children:
            child.dump_xbel(out)
        out.write("  </folder>\n\n")

    def dump_netscape(self,out):
        # if toplevel then output title and h1
        #if self.folders: #??"
        out.write("    <DT><H3")
        if self.folded: out.write(" FOLDED")
        out.write(">%s</H3>\n" % self.title)
        if self.desc: out.write("  <DD>%s" % self.desc)
        out.write("  <DL><p>\n")
        for child in self.children:
            child.dump_netscape(out)            
        out.write("  </DL><p>\n")

# --- Class for bookmarks
        
class Bookmark:

    def __init__(self,added,title,desc,id,href,visited,modified,checked,response):
        self.id=id
        self.added=added
        self.title=title
        self.desc=desc
        self.href=href
        self.visited=visited
        self.modified=modified
        self.checked=checked
        self.response=response

    def dump_xbel(self,out):
        out.write("    <bookmark")
        if self.id: out.write(" id=\"%s\"" % self.id)
        if self.added: out.write(" added=\"%s\"" % self.added)
        out.write(">\n")
        out.write("      <title>%s</title>\n" % self.title)
        if self.desc:  out.write("      <desc>%s</desc>" % self.desc)
        out.write("      <url")
        if self.modified: out.write(" modified=\"%s\"" % self.modified)
        if self.visited: out.write(" visited=\"%s\"" % self.visited)
        if self.id: out.write(" id=\"%s\"" % self.id)
        if self.checked: out.write(" checked=\"%s\"" % self.checked)
        if self.response: out.write(" response=\"%s\"" % self.response)
        out.write(">%s</url>\n" % self.href)
        out.write("    </bookmark>\n")

    def dump_netscape(self,out):
        out.write("    <DT><A HREF=\"%s\"" % self.href)
        if self.id:
            out.write(" ALIASID=\"%s\"" % self.id)
        if self.added:
            out.write(" ADD_DATE=\"%s\"" % self.added)
        else:
            out.write(" ADD_DATE=\"0\"")
        if self.visited:
            out.write(" LAST_VISIT=\"%s\"" % self.visited)
        if self.modified:
            out.write(" LAST_MODIFIED=\"%s\"" % self.modified)
        out.write(">%s</A>\n" % self.title)
        if self.desc:
            out.write("    <DD>%s" % self.desc)
        
class Alias:

    def __init__(self,ref):
        self.ref=ref
        
    def dump_xbel(self,out):
        out.write("    <alias ref=\"%s\"/>" % self.ref)
        
    def dump_netscape(self,out):
        bookref=GetID(self.ref)
        out.write("    <DT><A HREF=\"%s\"" % bookref.href)
        out.write(" ALIASOF=\"%s\"" % self.ref)
        if bookref.added:
            out.write(" ADD_DATE=\"%s\"" % bookref.added)
        else:
            out.write(" ADD_DATE=\"0\"")
        if bookref.visited:
            out.write(" LAST_VISIT=\"%s\"" % bookref.visited)
        if bookref.modified:
            out.write(" LAST_MODIFIED=\"%s\"" % bookref.modified)
        out.write(">%s</A>\n" % bookref.title)
        if bookref.desc:
            out.write("    <DD>%s" % bookref.desc)

class Separator:
    
    def dump_xbel(self,out):
        out.write("      <separator/>\n")

    def dump_netscape(self,out):
        out.write("<HR>\n")