[Python-checkins] r56524 - tracker/importer/config.py tracker/importer/xmlexport2handlers.py tracker/importer/xmlexport2toroundup.py
erik.forsberg
python-checkins at python.org
Tue Jul 24 16:40:11 CEST 2007
Author: erik.forsberg
Date: Tue Jul 24 16:40:10 2007
New Revision: 56524
Added:
tracker/importer/config.py
tracker/importer/xmlexport2handlers.py
tracker/importer/xmlexport2toroundup.py
- copied, changed from r56508, tracker/importer/sfxml2roundup.py
Log:
Importer for the "new" format produced by xml_export2.php
Added: tracker/importer/config.py
==============================================================================
--- (empty file)
+++ tracker/importer/config.py Tue Jul 24 16:40:10 2007
@@ -0,0 +1,26 @@
+mappings = {'category':
+ {"Demos and tools":"Demos and Tools",
+ "Distutils and setup.py":"Distutils",
+ "Python Interpreter Core":"Interpreter Core",
+ "Core (C code)":"Interpreter Core",
+ "Python Library":"Library (Lib)",
+ "Modules":"Extension Modules",
+ "Parser/Compiler":"Interpreter Core",
+ "Performance":"Interpreter Core",
+ "Threads":"Interpreter Core",
+ "Type/class unification":"Interpreter Core"},
+
+ 'priority':
+ {'1':'low',
+ '2':'low',
+ '3':'low',
+ '4':'low',
+ '5':'normal',
+ '6':'high',
+ '7':'high',
+ '8':'immediate',
+ '9':'urgent'
+ },
+ }
+
+
Added: tracker/importer/xmlexport2handlers.py
==============================================================================
--- (empty file)
+++ tracker/importer/xmlexport2handlers.py Tue Jul 24 16:40:10 2007
@@ -0,0 +1,429 @@
+import time, os, urllib, socket, mimetools, stat, re
+
+from config import mappings
+import time
+
+import BeautifulSoup as BS
+
+# slightly silly
+try:
+ # import xml.etree.cElementTree as ET # may crash in 2.5b2 !?
+ import xml.etree.ElementTree as ET
+except ImportError:
+ try:
+ import cElementTree as ET
+ except ImportError:
+ import elementtree.ElementTree as ET
+
+import htmlentitydefs
+
+from roundup.support import ensureParentsExist
+from roundup.date import Date
+
+class XMLExport2Handler:
+ def __init__(self, db, source, target):
+ self.db = db
+ self.source = source
+ self.target = target
+
+ def handle(self, item, roundupdata):
+ raise NotImplementedError
+
+
+class TextValueHandler(XMLExport2Handler):
+ def handle(self, item, roundupdata):
+ roundupdata[self.target] = item.find(self.source).text.encode('utf-8')
+
+class StatusHandler(XMLExport2Handler):
+ def __init__(self, db, source, target, statuses):
+ XMLExport2Handler.__init__(self, db, source, target)
+ self.statuses = statuses
+
+ def handle(self, item, roundupdata):
+ status = self.statuses[item.find(self.source).text].lower()
+
+ if "deleted" == status:
+ status = "closed"
+
+ roundupdata[self.target] = self.db.status.lookup(status)
+
+class ComponentHandler(XMLExport2Handler):
+ def __init__(self, db, source, target, categories):
+ XMLExport2Handler.__init__(self, db, source, target)
+ self.categories = categories
+
+ def handle(self, item, roundupdata):
+ category = self.categories[item.find(self.source).text]
+ category = mappings['category'].get(category, category)
+
+ try:
+ component_id = self.db.component.lookup(category)
+ roundupdata[self.target] = [component_id]
+ except KeyError:
+ roundupdata[self.target] = \
+ [self.db.component.create(name=category)]
+
+class GroupHandler(XMLExport2Handler):
+ def __init__(self, db, source, target, groups):
+ XMLExport2Handler.__init__(self, db, source, target)
+ self.groups = groups
+
+ def handle(self, item, roundupdata):
+ roundupdata[self.target] = []
+ group = self.groups[item.find(self.source).text]
+
+ if group in ["None", "Irreproducible", "AST", "Not a Bug"]:
+ return
+ elif "Feature Request" == group:
+ roundupdata['type'] = self.db.issue_type.lookup("rfe")
+ return
+ elif "Python 3000" == group:
+ roundupdata['keywords'].append(self.db.keyword.lookup('py3k'))
+ try:
+ # Merge as specified in http://psf.upfronthosting.co.za/roundup/meta/issue101
+ if group.startswith("Python 2.1"):
+ group = "Python 2.1"
+ elif group.startswith("Python 2.2"):
+ group = "Python 2.2"
+ version = self.db.version.lookup(group)
+ roundupdata[self.target] = version
+ return
+ except KeyError:
+ pass
+
+class ResolutionHandler(XMLExport2Handler):
+ def __init__(self, db, source, target, resolutions):
+ XMLExport2Handler.__init__(self, db, source, target)
+ self.resolutions = resolutions
+
+ def handle(self, item, roundupdata):
+ resolution = self.resolutions[item.find(self.source).text].lower()
+ if "none" == resolution:
+ roundupdata[self.target] = None
+ else:
+ roundupdata[self.target] = self.db.resolution.lookup(resolution)
+
+
+class UserlinkHandler(XMLExport2Handler):
+ def __init__(self, db, source, target, pmembers):
+ XMLExport2Handler.__init__(self, db, source, target)
+ self.pmembers = pmembers
+
+ def handle(self, item, roundupdata):
+ username = item.find(self.source).text
+
+ if "nobody" == username and \
+ "assignee" == self.target :
+ roundupdata[self.target] = None
+ return
+
+ if "nobody" == username:
+ username = "anonymous"
+
+ roundupdata[self.target] = self.getauthor(username)
+
+ # Add user to nosy
+ if roundupdata[self.target] not in roundupdata['nosy'] and \
+ roundupdata[self.target] != self.getauthor("anonymous"):
+ roundupdata['nosy'].append(roundupdata[self.target])
+
+ def unescape(self, string):
+ # work around oddities in BeautifulSoup's entity handling
+ def unescape_entity(m, defs=htmlentitydefs.entitydefs):
+ try:
+ return defs[m.group(1)]
+ except KeyError:
+ return m.group(0) # use as is
+ pattern = re.compile("&(\w+);")
+ return pattern.sub(unescape_entity, string)
+
+
+ def loadauthorfile(self, file):
+ def emit(soup):
+ if isinstance(soup, BS.NavigableString):
+ bob.data(self.unescape(soup))
+ else:
+ bob.start(soup.name, dict((k, self.unescape(v)) for k, v in soup.attrs))
+ for s in soup:
+ emit(s)
+ bob.end(soup.name)
+ # determine encoding (the document charset is not reliable)
+ text = open(file).read()
+ try:
+ encoding = "utf-8"
+ unicode(text, encoding)
+ except UnicodeError:
+ encoding = "iso-8859-1"
+ soup = BS.BeautifulSoup(
+ text, convertEntities="html", fromEncoding=encoding
+ )
+ # build the tree
+ bob = ET.TreeBuilder()
+ for s in soup:
+ emit(s)
+ return bob.close()
+
+ def getnonprojectmember(self, username):
+ address = "%s at users.sourceforge.net" % username
+
+ authorfile = os.path.join("authordata", username)
+ if not os.path.exists(authorfile) or 0 == os.stat(authorfile)[stat.ST_SIZE]:
+ print "Fetching user information for %s" % username
+ u = urllib.urlopen("http://sourceforge.net/users/" + username)
+ open(authorfile, 'w').write(u.fp.read())
+
+ realname = None
+ authordata = open(authorfile).read()
+ if -1 != authordata.find("That user does not exist or is not yet active."):
+ return ("anonymous", None, None)
+
+ elif -1 != authordata.find("This user account has been deleted"):
+ realname = "Deleted User %s" % username
+ return (username, realname, address)
+
+ tree = self.loadauthorfile(authorfile)
+ try:
+ table = tree.getiterator('table')[0]
+ except TypeError:
+ table = tree.getiterator('table').next()
+
+ alltds = table.findall('.//td')
+ for i in range(len(alltds)):
+ header = alltds[i].text or ""
+ if -1 != header.find("Publicly Displayed Name:"):
+ realname = alltds[i+1].text
+ break
+
+ return (username, realname, address)
+
+
+
+ def getauthor(self, username):
+ try:
+ return self.db.user.lookup(username)
+ except KeyError:
+ print "Creating new user", username
+ roles = ["User"]
+ if not self.pmembers.has_key(username):
+ (username, realname, address) = self.getnonprojectmember(username)
+ if "anonymous" == username:
+ return self.db.user.lookup(username)
+ realname = realname.encode('utf-8')
+ else:
+ realname = self.pmembers[username]['public_name'].encode('utf-8')
+ address = self.pmembers[username]['email']
+ roles.append("Developer")
+ if self.pmembers[username]['admin']:
+ roles.append('Coordinator')
+ return self.db.user.create(username=username,
+ realname=realname,
+ address=address,
+ roles=",".join(roles))
+
+class AssigneeHandler(UserlinkHandler):
+ def handle(self, item, roundupdata):
+ UserlinkHandler.handle(self, item, roundupdata)
+ if None == roundupdata[self.target]:
+ return
+ user = self.db.user.getnode(roundupdata[self.target])
+ roles = user['roles'].split(',')
+ if not "Developer" in roles:
+ roles.append('Developer')
+ user['roles'] = ",".join(roles)
+
+
+class DateHandler(XMLExport2Handler):
+ def handle(self, item, roundupdata):
+ roundupdata[self.target] = time.gmtime(int(item.find(self.source).text))
+class PriorityHandler(XMLExport2Handler):
+ def handle(self, item, roundupdata):
+ priority = item.find(self.source).text
+ roundupdata[self.target] = self.db.priority.lookup(mappings['priority'][priority])
+
+
+class TextstringHandler(XMLExport2Handler):
+ def handle(self, item, roundupdata):
+ roundupdata[self.target] = item.find(self.source).text.encode('utf-8')
+
+class MessagesHandler(UserlinkHandler):
+ def createmessage(self, roundupdata, author, date, content, recipients):
+ messageprops = ['author', 'date', 'files', 'content', 'recipients']
+ messagevals = [repr(self.getauthor(author)),
+ repr(time.gmtime(int(date))),
+ repr([]),
+ repr(content.encode('utf-8')),
+ repr([])]
+
+ if not roundupdata.has_key('activity') or \
+ int(date) > time.mktime(roundupdata['activity']):
+ roundupdata['activity'] = time.gmtime(int(date))
+ roundupdata['actor'] = self.getauthor(author)
+
+ msg_nodeid = int(self.db.msg.import_list(messageprops, messagevals))
+
+ msg_filename = self.db.filename(self.db.msg.classname,
+ msg_nodeid, create=1)
+ ensureParentsExist(msg_filename)
+
+ mo = re.search('^Logged In: (YES |NO )\nuser_id=[0-9]+\nOriginator: (YES|NO)\n', content, re.MULTILINE)
+ if mo:
+ content = content[mo.end():]
+
+ open(msg_filename, 'w').write(content.encode('utf-8'))
+
+ return msg_nodeid
+
+ def handle(self, item, roundupdata):
+ # Handle 'details'
+ roundupdata[self.target] = [self.createmessage(roundupdata,
+ item.find('submitter').text,
+ item.find('submit_date').text,
+ item.find(self.source).text,
+ [])]
+
+
+ followups = item.find("followups")
+ for fu in followups.findall("followup"):
+ author = fu.find("submitter").text
+ date = fu.find("date").text
+ content = fu.find("details").text
+ roundupdata[self.target].append(self.createmessage(roundupdata, author, date, content, []))
+
+ authorid = self.getauthor(author)
+ if authorid not in roundupdata['nosy'] and \
+ authorid != self.getauthor('anonymous'):
+ roundupdata['nosy'].append(authorid)
+
+
+class AttachmentHandler(UserlinkHandler):
+ def __init__(self, db, source, target, pmembers,
+ project_group_id, tracker):
+ UserlinkHandler.__init__(self, db, source, target, pmembers)
+ self.project_group_id = project_group_id
+ self.tracker = tracker
+
+ def downloadfile(self, url, cachefilename):
+
+ delay = 0
+ backoff = 30
+ while True:
+ print url, "->", cachefilename
+ try:
+ f = urllib.urlopen(url)
+ data = f.read()
+ if data.find("send-email-to-ipblocked-at-sourceforge-dot-net") >= 0:
+ delay+=backoff
+ print "Blocked by Sourceforge. Sleeping %d seconds before trying again" % delay
+
+ out = open(cachefilename + ".tmp", 'w')
+ out.write(str(f.headers))
+ out.write("\n")
+ out.write(data)
+ out.close()
+ try:
+ os.remove(cachefilename)
+ except:
+ pass
+ os.rename(cachefilename + ".tmp", cachefilename)
+ break
+
+ except socket.error, e:
+ print "Error fetching file, retrying", e
+ continue
+ except AttributeError, e:
+ print e, "Probably SF weirdness. Trying again after delay.."
+ delay+=backoff
+ except IOError, e:
+ print e, "Probably SF weirdness. Trying again after delay.."
+ delay+=backoff
+
+ time.sleep(delay)
+
+
+ def handle(self, item, roundupdata):
+
+ tracker_id = self.tracker.find("tracker_id").text
+ aid = roundupdata["id"]
+ files = []
+ issuefiles = []
+
+ attachments = item.find(self.source)
+ for a in attachments.findall("attachment"):
+ url = a.find("url").text + aid
+ date = a.find("date").text
+ author = a.find("submitter").text
+ filetype = a.find("filetype").text
+ file_id = a.find("id").text
+ filename = a.find("filename").text
+
+ files.append((date, url, author, filetype, file_id, filename))
+
+ files.sort(lambda x, y: cmp(x[0], y[0]))
+
+ backoff = 30
+ for timestamp, url, author, filetype, file_id, filename in files:
+ cachefilename = os.path.join("files", "%s-%s-%s-%s.dat" % (tracker_id,
+ aid,
+ file_id,
+ timestamp))
+ if not os.path.exists(cachefilename):
+ self.downloadfile(url, cachefilename)
+
+ datafile = open(cachefilename, 'rb')
+ message = mimetools.Message(datafile)
+
+ fileprops = ['creator', 'creation', 'activity',
+ 'name', 'type']
+
+ filevals = [repr(self.getauthor(author)),
+ repr(time.gmtime(int(timestamp))),
+ repr(time.gmtime(int(timestamp))),
+ repr(filename),
+ repr(filetype)
+ ]
+
+ file_nodeid = int(self.db.file.import_list(fileprops, filevals))
+ file_filename = self.db.filename(self.db.file.classname, file_nodeid,
+ create=1)
+ ensureParentsExist(file_filename)
+ open(file_filename, 'w').write(datafile.read())
+
+ issuefiles.append(file_nodeid)
+
+ if not roundupdata.has_key('activity') or \
+ int(timestamp) > time.mktime(roundupdata['activity']):
+ roundupdata['activity'] = time.gmtime(int(timestamp))
+ roundupdata['actor'] = self.getauthor(author)
+
+ roundupdata[self.target] = issuefiles
+
+
+class SeverityHandler(XMLExport2Handler):
+ def handle(self, item, roundupdata):
+ roundupdata[self.target] = self.db.severity.lookup('normal')
+
+class TypeHandler(XMLExport2Handler):
+ def __init__(self, db, source, target, tracker):
+ XMLExport2Handler.__init__(self, db, source, target)
+ self.tracker = tracker
+
+ def handle(self, item, roundupdata):
+ if "Feature Requests" == self.tracker.find("name").text:
+ roundupdata[self.target] = self.db.issue_type.lookup("rfe")
+ elif "Patches" == self.tracker.find("name").text:
+ roundupdata["keywords"].append(self.db.keyword.lookup("patch"))
+
+
+def handle_journal(db, item, roundupdata, nodeid):
+ journal = []
+ journal.append((nodeid, Date(roundupdata['creation']),
+ roundupdata['creator'],
+ 'create', {}))
+ db.setjournal("issue", nodeid, journal)
+
+
+
+
+
+
+
Copied: tracker/importer/xmlexport2toroundup.py (from r56508, tracker/importer/sfxml2roundup.py)
==============================================================================
--- tracker/importer/sfxml2roundup.py (original)
+++ tracker/importer/xmlexport2toroundup.py Tue Jul 24 16:40:10 2007
@@ -19,62 +19,101 @@
origin_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
sys.path = [os.path.join(origin_dir, "sourceforge")] + sys.path
-
import htmlentitydefs, re
import getopt
-import sfxmlhandlers
+import xmlexport2handlers as x2h
from roundup import instance
-def handle_artifact(db, artifact):
+def handle_idmapping(tracker, name, itemname):
+ print "Reading in '%s'" % name
+ mapping = tracker.find(name)
+ ret = {}
+ for g in mapping.findall(itemname):
+ ret[g.find('id').text] = g.find('%s_name' % itemname).text
+
+ return ret
+
+def handle_namemapping(tracker, name, itemname):
+ resolutions = tracker.find(name)
+ ret = {}
+ for r in resolutions.findall(itemname):
+ ret[r.find('id').text] = r.find('name').text
+ return ret
+
+
+def handle_tracker(db, project_group_id, tracker, pmembers):
+ print "Handling tracker \"%s\"" % tracker.find('name').text
- handlers = [sfxmlhandlers.IDHandler(db, "artifact_id", 'id'),
- sfxmlhandlers.CreationHandler(db, 'open_date', 'creation'),
- # activity and actor is set by CreationHandler, FileHandler and MessagesHandler
- sfxmlhandlers.UserlinkHandler(db, 'submitted_by', 'creator'),
- sfxmlhandlers.TitleHandler(db, 'summary', 'title'),
- sfxmlhandlers.MessagesHandler(db, None, 'messages'),
- sfxmlhandlers.FilesHandler(db, None, 'files'),
- sfxmlhandlers.NosyHandler(db, None, 'nosy'),
- # No handler for superseder
- sfxmlhandlers.ComponentHandler(db, 'category', 'components'),
- sfxmlhandlers.VersionsHandler(db, None, 'versions'),
- sfxmlhandlers.SeverityHandler(db, None, 'severity'),
- sfxmlhandlers.PriorityHandler(db, 'priority', 'priority'),
- sfxmlhandlers.DependencyHandler(db, None, 'dependencies'),
- sfxmlhandlers.AssigneeHandler(db, 'assigned_to', 'assignee'),
- sfxmlhandlers.StatusHandler(db, 'status', 'status'),
- sfxmlhandlers.ResolutionHandler(db, 'resolution', 'resolution'),
- sfxmlhandlers.TypeHandler(db, "artifact_type", "type"),
- sfxmlhandlers.GroupHandler(db, "artifact_group_id", "versions"),
- ]
-
- roundupdata = {'files':[], 'keywords':[]}
- fields = {}
-
- for field in artifact.findall("field"):
- name = field.attrib.get('name')
- if None == name:
- print "field has no name", field.attrib
- continue
- fields[name] = field
-
- aid = int(fields['artifact_id'].text)
-
- for handler in handlers:
- handler.handle(fields, roundupdata)
-
- props = []
- values = []
-
- for key, value in roundupdata.items():
- props.append(key)
- values.append(repr(value))
-
- nodeid = db.issue.import_list(props, values)
- sfxmlhandlers.handle_journal(db, fields, roundupdata, nodeid)
- return nodeid
+ groups = handle_idmapping(tracker, "groups", "group")
+ categories = handle_idmapping(tracker, "categories", "category")
+ categories['100100'] = 'None'
+ categories['100'] = 'None'
+ groups['100100'] = 'None'
+ groups['100'] = 'None'
+
+ print groups
+
+ resolutions = handle_namemapping(tracker, 'resolutions', 'resolution')
+ statuses = handle_namemapping(tracker, 'statuses', 'status')
+
+ handlers = [x2h.TextValueHandler(db, "id", "id"),
+ x2h.StatusHandler(db, "status_id", "status", statuses),
+ x2h.ComponentHandler(db, "category_id", "components",
+ categories),
+ x2h.GroupHandler(db, "group_id", "versions",
+ groups),
+ x2h.ResolutionHandler(db, "resolution_id", "resolution",
+ resolutions),
+ x2h.UserlinkHandler(db, 'submitter', 'creator',
+ pmembers),
+ x2h.AssigneeHandler(db, 'assignee', 'assignee', pmembers),
+ # FIXME: Activity
+ x2h.DateHandler(db, 'submit_date', 'creation'),
+ x2h.PriorityHandler(db, 'priority', 'priority'),
+ x2h.TextstringHandler(db, 'summary', 'title'),
+ x2h.MessagesHandler(db, 'details', 'messages', pmembers),
+ x2h.AttachmentHandler(db, 'attachments', 'files',
+ pmembers, project_group_id, tracker),
+ x2h.SeverityHandler(db, None, "severity"),
+ x2h.TypeHandler(db, None, "type", tracker),
+ ]
+
+ for item in tracker.find('tracker_items').findall('tracker_item'):
+ print "Handling \"%s\" item with id %s" % (tracker.find('name').text,
+ item.find('id').text)
+ roundupdata = {'keywords':[], 'files':[],
+ 'messages':[], 'dependencies':[], 'nosy':[]}
+
+ for handler in handlers:
+ handler.handle(item, roundupdata)
+
+ props = []
+ values = []
+
+ for key, value in roundupdata.items():
+ props.append(key)
+ values.append(repr(value))
+
+ nodeid = db.issue.import_list(props, values)
+ x2h.handle_journal(db, item, roundupdata, nodeid)
+
+ db.commit()
+
+
+def handle_projectmembers(tree):
+ ps = tree.find('projectsummary').find("projectmembers")
+ ret = {'nobody':{'public_name':'Nobody/Anonymous', 'admin':False,
+ 'email':''}}
+ for pm in ps.findall("projectmember"):
+ user_name = pm.find('user_name').text
+ ret[user_name] = {'public_name':pm.find('public_name').text,
+ 'email':pm.find('email').text,
+ 'admin':False}
+ if 'Yes' == pm.find('project_admin').text:
+ ret[user_name]['admin'] = True
+ return ret
if "__main__" == __name__:
@@ -92,26 +131,22 @@
trackerhome = optarg
elif "--startat" == opt:
startat = int(optarg)
+
+ rounduptracker = instance.open(trackerhome)
+ db = rounduptracker.open("admin")
tree = ET.parse(xmlfile)
- artifacts = tree.find('artifacts')
- rounduptracker = instance.open(trackerhome)
- db = rounduptracker.open("admin")
+ project_group_id = tree.find("export_details").find("project_group_id").text
- max_id = 0
- allartifacts = artifacts.findall('artifact')
- i=startat
- totalartifacts = len(allartifacts)
- for artifact in allartifacts[startat:]:
- i+=1
- sys.stdout.write("[%5d/%d] " % (i, totalartifacts))
- aid = handle_artifact(db, artifact)
- if max_id < int(aid):
- max_id = int(aid)
- db.commit()
+ pmembers = handle_projectmembers(tree)
+
+ trackers = tree.find("trackers")
+ for tracker in trackers.findall("tracker"):
+ handle_tracker(db, project_group_id, tracker, pmembers)
db.setid('issue', str(999))
- db.commit()
+ db.commit()
+
More information about the Python-checkins
mailing list