[Archiver-dev] Pipermail archive ifc to UpLib

Mon Nov 15 21:35:34 CET 2010

I thought I'd see how hard it would be to mock up an UpLib extension
which would provide a Pipermail-like view of an UpLib email archive.
Easy to get started:

  import sys, os, re, urllib, time

  from uplib.plibUtil import note, format_date
  from uplib.webutils import htmlescape
  from uplib.basicPlugins import show_title, STANDARD_BACKGROUND_COLOR
  from uplib.emailParser import Thread, figure_thread_elements, SHOW_BLOCK

  def render_list (repo, response, params):

      elist = params.get("list")

      hits = repo.do_query('+categories:"email/%s"' % elist)
      dates = sorted([doc.get_date() for score, doc in hits])
      earliest = dates[0]
      latest = dates[-1]
      fp = response.open()
      fp.write("<body><h1>%s</h1>\n" % htmlescape(elist))
      fp.write("<p>%s to %s, %d msgs\n" % (earliest, latest, len(hits)))
      fp.write("<p><table border=1>\n")
      for year in range(latest[0], earliest[0]-1, -1):
          k1 = [x for x in dates if (x[0] == year)]
          if k1:
              for month in range(12, 1, -1):
                  k2 = [x for x in k1 if (x[1] == month)]
                  if k2:
                      fp.write("<tr>")
                      fp.write("<td>%s:</td><td>" % htmlescape(format_date("%s/%s" % (month, year))))
                      # threads
                      query = 'categories:"email/%s" AND date:[%s/1/%s TO %s/32/%s]' % (elist, month, year, month, year)
                      query = urllib.quote_plus(query)
                      fp.write('<a href="by_thread?query=%s">[Threads]</a> ' % query)
                      fp.write('<a href="by_date?query=%s">[Date]</a></td>' % query)
                      fp.write("<td> %d messages</td>" % len(k2))
                      fp.write("</tr>\n")
      fp.write("</table></body>")

  def by_thread (repo, response, params):

      query = params.get("query")
      response.redirect("/action/basic/email_threads?query=%s&sort-order=by-latest-inverted" % urllib.quote_plus(query))

  def by_date (repo, response, params):

      query = params.get("query")
      response.redirect("/action/basic/email_threads?query=%s&sort-order=by-start" % urllib.quote_plus(query))

This assumes that the email is already labelled with UpLib categories of
the form "email/LISTNAME", and the parameter "list" specifies which
LISTNAME you are looking for.

Adding search by person would be interesting, because UpLib has a
built-in notion of Person, with support for name aliases and such, so
you could slice that several different ways.  Here's one way:

  try:
      person = find_and_load_extension("Person")
  except ImportError:
      person = None

  def by_author (repo, response, params):

      def display_name(author):
          if person:
              n = person.Name(author)
              if n.reasonable():
                  return unicode(n)
          return author

      hits = [doc for score, doc in repo.do_query(params.get("query"))]
      hits = sorted(hits, key=lambda x: x.get_metadata("date"))
      authors = list(set([(doc.get_metadata("authors") or doc.get_metadata("email-sender")) for doc in hits]))
      authors = sorted([(display_name(author), author) for author in authors if author], key=lambda x: x[0].lower())
      fp = response.open()
      fp.write("<head><title>%s</title>\n" % htmlescape(params.get("query")))
      fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n')
      fp.write('<link rel="shortcut icon" href="/favicon.ico">\n')
      fp.write('<link rel="icon" type="image/ico" href="/favicon.ico">\n')
      fp.write(SHOW_BLOCK);
      fp.write('</head>\n')
      fp.write('<body bgcolor="%s" onload="javascript:show_all_threads();">\n' % STANDARD_BACKGROUND_COLOR)
      for display_name, author in authors:
          docs = [x for x in hits if ((x.get_metadata("authors") == author) or (x.get_metadata("email-sender") == author))]
          fp.write("<p>%s:<UL>" % htmlescape(display_name))
          threads = []
          for doc in docs:
              v = Thread.find_thread(repo, doc.id, doc.get_metadata())
              if (v not in threads):
                  if v:
                      threads.append(v)
          now = time.time()
          colors = ("#ffffff", STANDARD_BACKGROUND_COLOR)
          counter = 0
          for thread in threads:
              doc_id, title, count, first_date, last_date, author, summary = figure_thread_elements(thread, now=now)
              fp.write('<div style="background: %s" id="%s" msg_count="%d" summary="%s" doc_id="%s" threadtitle="%s" first_date="%s" initiator="%s" last_date="%s" content="" class=thread></div>\n'
                       % (colors[counter % 2], thread.id, count, htmlescape(summary, True),
                          doc_id, htmlescape(title, True), htmlescape(first_date, True),
                          htmlescape(author, True), htmlescape(last_date or "", True)))
              counter += 1
          fp.write("</UL>\n")
      fp.write("</body>\n")