[Python-checkins] r56051 - sandbox/trunk/pep0/pep0/constants.py sandbox/trunk/pep0/pep0/output.py sandbox/trunk/pep0/pep0/pep.py

brett.cannon python-checkins at python.org
Wed Jun 20 18:55:13 CEST 2007


Author: brett.cannon
Date: Wed Jun 20 18:55:09 2007
New Revision: 56051

Modified:
   sandbox/trunk/pep0/pep0/constants.py
   sandbox/trunk/pep0/pep0/output.py
   sandbox/trunk/pep0/pep0/pep.py
Log:
Do not statically list authors' names in pep0.constants.  Instead extract them
from the PEPs directly.  Also verify that an author uses only a single email
address.  This all lead to the creation of an Author class.

Do need to rework the parsing code, though, to use email.parser instead of
doing it by hand.


Modified: sandbox/trunk/pep0/pep0/constants.py
==============================================================================
--- sandbox/trunk/pep0/pep0/constants.py	(original)
+++ sandbox/trunk/pep0/pep0/constants.py	Wed Jun 20 18:55:09 2007
@@ -38,112 +38,3 @@
 fill-column: 70
 coding: utf-8
 End:"""
-
-nicknames = {
-    'Guido van Rossum' : 'GvR',
-    'Just van Rossum' : 'JvR',
-}
-
-# Keep alphabetized!
-email_addresses = (
-    ('Aahz' , 'aahz at pobox.com'),
-    ('James C. Ahlstrom' , 'jim at interet.com'),
-    ('Jim Althoff' , 'james_althoff at i2.com'),
-    ('Kevin Altis' , 'altis at semi-retired.com'),
-    ('David Ascher' , 'davida at activestate.com'),
-    ('Peter Astrand' , 'astrand at lysator.liu.se'),
-    ('Carl Banks' , 'pythondev at aerojockey.com'),
-    ('Paul Barrett' , 'barrett at stsci.edu'),
-    ('Facundo Batista' , 'facundo at taniquetil.com.ar'),
-    ('Anthony Baxter' , 'anthony at interlink.com.au'),
-    ('Thomas Bellman' , 'bellman+pep-divmod at lysator.liu.se'),
-    ('Steven Bethard' , 'steven.bethard at gmail.com'),
-    ('Georg Brandl' , 'g.brandl at gmx.net'),
-    ('Brett Cannon' , 'brett at python.org'),
-    ('Josiah Carlson' , 'jcarlson at uci.edu'),
-    ('W Isaac Carroll' , 'icarroll at pobox.com'),
-    ('Nick Coghlan' , 'ncoghlan at gmail.com'),
-    ('Dave Cole' , 'djc at object-craft.com.au'),
-    ('Christopher Craig' , 'python-pep at ccraig.org'),
-    ('Laura Creighton' , 'lac at strakt.com'),
-    ('Walter Dörwald' , ''),
-    ('Fred L. Drake, Jr.' , 'fdrake at acm.org'),
-    ('Michael P. Dubner' , 'dubnerm at mindless.com'),
-    ('Paul F. Dubois' , 'paul at pfdubois.com'),
-    ('Phillip J. Eby' , 'pje at telecommunity.com'),
-    ('Micah Elliott' , 'mde at tracos.org'),
-    ('Jeff Epler' , 'jepler at unpythonic.net'),
-    ('David Eppstein' , 'eppstein at ics.uci.edu'),
-    ('Clark C. Evans' , 'cce at clarkevans.com'),
-    ('Greg Ewing' , 'greg at cosc.canterbury.ac.nz'),
-    ('Martijn Faassen' , 'faassen at infrae.com'),
-    ('Ben Finney' , 'ben+python at benfinney.id.au'),
-    ('Frédéric B. Giacometti' , 'fred at arakne.com'),
-    ('Scott Gilbert' , 'xscottg at yahoo.com'),
-    ('David Goodger' , 'goodger at python.org'),
-    ('Grant Griffin' , 'g2 at iowegian.com'),
-    ('Mark Hammond' , 'mhammond at skippinet.com.au'),
-    ('Peter Harris' , 'scav at blueyonder.co.uk'),
-    ('Thomas Heller' , 'theller at python.net'),
-    ('Magnus Lie Hetland' , 'magnus at hetland.org'),
-    ('Raymond D. Hettinger' , 'python at rcn.com'),
-    ('Neil Hodgson' , 'neilh at scintilla.org'),
-    ('Michael Hudson' , 'mwh at python.net'),
-    ('Jeremy Hylton' , 'jeremy at zope.com'),
-    ('Jack Jansen' , 'jack at cwi.nl'),
-    ('Jim Jewett' , 'jimjjewett at users.sourceforge.net'),
-    ('Richard Jones' , 'richard at mechanicalcat.net'),
-    ('Stepan Koltsov' , 'yozh at mx1.ru'),
-    ('A.M. Kuchling' , 'amk at amk.ca'),
-    ('Marc-Andre Lemburg' , 'mal at lemburg.com'),
-    ('Gregory Lielens' , 'gregory.lielens at fft.be'),
-    ('Björn Lindqvist' , 'bjourne at gmail.com'),
-    ('Martin von Löwis' , 'loewis at informatik.hu-berlin.de'),
-    ('Tony Lownds' , 'tony at pagedna.com'),
-    ('Alex Martelli' , 'aleax at aleax.it'),
-    ('Andrew McClelland' , 'eternalsquire at comcast.net'),
-    ('Gordon McMillan' , 'gmcm at hypernet.com'),
-    ('Andrew McNamara' , 'andrewm at object-craft.com.au'),
-    ('Trent Mick' , 'trentm at activestate.com'),
-    ('Mike Meyer' , 'mwm at mired.org'),
-    ('Skip Montanaro' , 'skip at pobox.com'),
-    ('Paul Moore' , 'gustav at morpheus.demon.co.uk'),
-    ('Ben North' , 'ben at redfrontdoor.org'),
-    ('Neal Norwitz' , 'nnorwitz at gmail.com'),
-    ('Travis Oliphant' , 'oliphant at ee.byu.edu'),
-    ('Jason Orendorff' , 'jason.orendorff at gmail.com'),
-    ('Samuele Pedroni' , 'pedronis at python.org'),
-    ('Michel Pelletier' , 'michel at users.sourceforge.net'),
-    ('Tim Peters' , 'tim at zope.com'),
-    ('Jason Petrone' , 'jp at demonseed.net'),
-    ('Paul Prescod' , 'paul at prescod.net'),
-    ('Terry Reedy' , 'tjreedy at udel.edu'),
-    ('Sean Reifschneider' , 'jafo-pep at tummy.com'),
-    ('Christian R. Reis' , 'kiko at async.com.br'),
-    ('Jonathan Riehl' , 'jriehl at spaceship.com'),
-    ('André Roberge' , 'andre.roberge at gmail.com'),
-    ('Guido van Rossum' , 'guido at python.org'),
-    ('Just van Rossum' , 'just at letterror.com'),
-    ('Vinay Sajip' , 'vinay_sajip at red-dove.com'),
-    ('Neil Schemenauer' , 'nas at arctrix.com'),
-    ('Peter Schneider-Kamp' , 'nowonder at nowonder.de'),
-    ('Jiwon Seo' , 'seojiwon at gmail.com'),
-    ('Kevin D. Smith' , 'Kevin.Smith at theMorgue.org'),
-    ('Greg Stein' , 'gstein at lyra.org'),
-    ('Daniel Stutzbach' , 'daniel.stutzbach at gmail.com'),
-    ('Roman Suzi' , 'rnd at onego.ru'),
-    ('Talin' , 'talin at acm.org'),
-    ('Steven Taschuk' , 'staschuk at telusplanet.net'),
-    ('Oren Tirosh' , 'oren at hishome.net'),
-    ('Mike Verdone' , 'mike.verdone at gmail.com'),
-    ('Gregory R. Warnes' , 'warnes at users.sourceforge.net'),
-    ('Barry Warsaw' , 'barry at python.org'),
-    ('Terence Way' , 'terry at wayforward.net'),
-    ('Cliff Wells' , 'LogiplexSoftware at earthlink.net'),
-    ('Greg Wilson' , 'gvwilson at ddj.com'),
-    ('Collin Winter' , 'collinw at gmail.com'),
-    ('Thomas Wouters' , 'thomas at python.org'),
-    ('Ka-Ping Yee' , 'ping at zesty.ca'),
-    ('Moshe Zadka' , 'moshez at zadka.site.co.il'),
-    ('Huaiyu Zhu' , 'hzhu at users.sourceforge.net'),
-)

Modified: sandbox/trunk/pep0/pep0/output.py
==============================================================================
--- sandbox/trunk/pep0/pep0/output.py	(original)
+++ sandbox/trunk/pep0/pep0/output.py	Wed Jun 20 18:55:09 2007
@@ -1,8 +1,8 @@
 """Code to handle the output of PEP 0."""
 from . import constants
-from .pep import PEP, last_name
+from .pep import PEP
 
-from operator import itemgetter
+from operator import attrgetter
 from sys import stdout
 
 
@@ -31,13 +31,13 @@
     for pep in peps:
         # Order of 'if' statement important.  Key Status values take precedence
         # over Type value, and vice-versa.
-        if pep.status == 'Draft':
+        if pep.type_ == 'Process':
+            meta.append(pep)
+        elif pep.status == 'Draft':
             open_.append(pep)
         elif pep.status in ('Rejected', 'Withdrawn', 'Deferred',
                 'Incomplete', 'Replaced'):
             dead.append(pep)
-        elif pep.type_ == 'Process':
-            meta.append(pep)
         elif pep.type_ == 'Informational':
             info.append(pep)
         elif pep.status in ('Accepted', 'Active'):
@@ -50,6 +50,48 @@
     return meta, info, accepted, open_, finished, dead
 
 
+def verify_email_addresses(peps):
+    authors_dict = {}
+    for pep in peps:
+        for author in pep.authors:
+            # If this is the first time we have come across an author, add him.
+            if author not in authors_dict:
+                authors_dict[author] = [author.email]
+            else:
+                found_emails = authors_dict[author]
+                # If no email exists for the author, use the new value.
+                if not found_emails[0]:
+                    authors_dict[author] = [author.email]
+                # If the new email is an empty string, move on.
+                elif not author.email:
+                    continue
+                # If the email has not been seen, add it to the list.
+                elif author.email not in found_emails:
+                    authors_dict[author].append(author.email)
+
+    valid_authors_dict = {}
+    too_many_emails = []
+    for author, emails in authors_dict.items():
+        if len(emails) > 1:
+            too_many_emails.append((author.full_name, emails))
+        else:
+            valid_authors_dict[author] = emails[0]
+    if too_many_emails:
+        err_output = []
+        for author, emails in too_many_emails:
+            err_output.append("    %s: %r" % (author, emails))
+        raise ValueError("some authors have more than one email address "
+                         "listed:\n" + '\n'.join(err_output))
+
+    return valid_authors_dict
+
+
+def sort_authors(authors_dict):
+    authors_list = authors_dict.keys()
+    authors_list.sort(key=attrgetter('sort_by'))
+    return authors_list
+
+
 def write_pep0(peps, output=stdout):
     print>>output, constants.header
     print>>output
@@ -115,27 +157,21 @@
     print>>output
     print>>output, "Owners"
     print>>output
-    max_name_len = len(max(constants.email_addresses, key=itemgetter(0)))
-    for name, nick in constants.nicknames.items():
-        name_nick_len = len(name) + len(nick) + 3  # Cover space and parens.
-        if name_nick_len > max_name_len:
-            max_name_len = name_nick_len
-    else:
-        max_name_len += 2  # Cover comma and space between last and first name.
+    authors_dict = verify_email_addresses(peps)
+    max_name = max(authors_dict.keys(),
+                            key=lambda x: len(x.first + x.last))
+    # Don't forget to cover the comma and space separating first and last.
+    max_name_len = len(max_name.last) + len(max_name.first) + 2
     print>>output, "    %s  %s" % ('name'.ljust(max_name_len), 'email address')
     print>>output, "    %s  %s" % ((len('name')*'-').ljust(max_name_len),
                                     len('email address')*'-')
-    for author, email in constants.email_addresses:
-        last_name_index = author.index(last_name(author))
-        first = author[:last_name_index].strip()
-        last = author[last_name_index:].strip()
-        if not first:
-            last_first = last
+    sorted_authors = sort_authors(authors_dict)
+    for author in sorted_authors:
+        if not author.first:
+            last_first = author.last
         else:
-            last_first = ', '.join([last, first])
-        if author in constants.nicknames:
-            last_first += " (%s)" % constants.nicknames[author]
-        print>>output, "    %s  %s" % (last_first.ljust(max_name_len), email)
+            last_first = ', '.join([author.last, author.first])
+        print>>output, "    %s  %s" % (last_first.ljust(max_name_len), author.email)
     print>>output
     print>>output
     print>>output, "References"

Modified: sandbox/trunk/pep0/pep0/pep.py
==============================================================================
--- sandbox/trunk/pep0/pep0/pep.py	(original)
+++ sandbox/trunk/pep0/pep0/pep.py	Wed Jun 20 18:55:09 2007
@@ -5,6 +5,84 @@
 import textwrap
 
 
+class Author(object):
+
+    """Represent PEP authors.
+
+    Attributes:
+
+        + full_name : str
+            The author's full name.
+
+        + full_last_first : str XXX
+            Output the author's name in Last, First, Suffix order.
+
+        + first : str
+            The author's first name.  A middle initial may be included.
+
+        + last : str
+            The author's last name.
+
+        + suffix : str XXX
+            A person's suffix (can be the empty string).
+
+        + sort_by : str
+            Modification of the author's last name that should be used for
+            sorting.
+
+        + email : str
+            The author's email address.
+    """
+
+    def __init__(self, author_and_email_tuple):
+        """Parse the name and email address of an author."""
+        name, email = author_and_email_tuple
+        self.full_name = name
+        self.email = email.lower()
+        last_name_fragment = self._last_name(name)
+        name_sep = name.index(last_name_fragment)
+        self.last = name[name_sep:]
+        self.first = name[:name_sep]
+
+
+    def __hash__(self):
+        return hash(self.full_name)
+
+
+    def __eq__(self, other):
+        return self.full_name == other.full_name
+
+
+    @property
+    def sort_by(self):
+        if self.last.startswith('von ') or self.last.startswith('van '):
+            return self.last.split(' ', 1)[1]
+        return self.last
+
+
+    def _last_name(self, full_name):
+        """Find the last name (or nickname) of a full name.
+
+        If no last name (e.g, 'Aahz') then return the full name.  If there is a
+        leading, lowercase portion to the last name (e.g., 'van' or 'von') then
+        include it.  If there is a suffix (e.g., 'Jr.') that is appended through a
+        comma, then drop the suffix.
+
+        """
+        no_suffix = full_name.partition(',')[0]
+        name_parts = no_suffix.split()
+        part_count = len(name_parts)
+        if part_count == 1 or part_count == 2:
+            return name_parts[-1]
+        else:
+            assert part_count > 2
+            if name_parts[-2].islower():
+                return ' '.join(name_parts[-2:])
+            else:
+                return name_parts[-1]
+
+
+
 class PEP(object):
 
     """Representation of PEPs.
@@ -24,8 +102,8 @@
         + status : str
             The PEP's status.  Value must be found in PEP.status_values.
 
-        + authors : Sequence(str)
-            A list of the authors' full names.
+        + authors : Sequence(Author)
+            A list of the authors.
     """
 
     # The various RFC 822 headers that are supported.
@@ -45,13 +123,11 @@
     status_values = ("Accepted", "Rejected", "Withdrawn", "Deferred", "Final",
                      "Active", "Draft", "Replaced")
 
-    # XXX Uncomment to valid author names (along with code in __init__).
-    #valid_authors = set(x[0] for x in constants.email_addresses)
-
-    
     def __init__(self, pep_file):
         """Init object from an open PEP file object."""
         # Parse the headers.
+        # XXX Use email.parser.HeaderParser().parse()
+        # XXX Use email.message.keys() to validate header order.
         metadata = {}
         header_name = None
         header_field_iter = iter(self.headers)
@@ -120,37 +196,37 @@
                                 "Active status (PEP %s)" % self.number)
         self.status = status
         # 'Author'.
-        authors = self._parse_author(metadata['Author'])
-        # XXX Uncomment (plus valid_authors) to validate author names.
-        #for author in authors:
-        #    if author not in self.valid_authors:
-        #        raise ValueError("%s is not a valid author name" % author)
-        if len(authors) < 1:
+        authors_and_emails = self._parse_author(metadata['Author'])
+        if len(authors_and_emails) < 1:
             raise ValueError("no authors found (PEP %s)" % self.number)
-        self.authors = authors
+        self.authors = [Author(x) for x in authors_and_emails]
 
 
     def _parse_author(self, data):
-        """Return a list of author names."""
+        """Return a list of author names and emails."""
         # XXX Consider using email.utils.parseaddr (doesn't work with names
         # lacking an email address.
-        angled = r'(?P<author>.+?) <.+?>'
-        paren = r'.+? \((?P<author>.+?)\)'
+        angled = r'(?P<author>.+?) <(?P<email>.+?)>'
+        paren = r'(?P<email>.+?) \((?P<author>.+?)\)'
         simple = r'(?P<author>[^,]+)'
         author_list = []
         for regex in (angled, paren, simple):
             # Watch out for commas separating multiple names.
             regex += '(,\s*)?'
             for match in re.finditer(regex, data):
-                author = match.group('author')
                 # Watch out for suffixes like 'Jr.' when they are comma-separated
                 # from the name and thus cause issues when *all* names are only
                 # separated by commas.
-                author = match.group('author')
+                match_dict = match.groupdict()
+                author = match_dict['author']
                 if not author.partition(' ')[1] and author.endswith('.'):
                     prev_author = author_list.pop()
                     author = ', '.join([prev_author, author])
-                author_list.append(author)
+                if 'email' not in match_dict:
+                    email = ''
+                else:
+                    email = match_dict['email']
+                author_list.append((author, email))
             else:
                 # If authors were found then stop searching as only expect one
                 # style of author citation.
@@ -176,8 +252,7 @@
     @property
     def author_abbr(self):
         """Return the author list as a comma-separated with only last names."""
-        return ', '.join(last_name(author, constants.nicknames)
-                            for author in self.authors)
+        return ', '.join(x.last for x in self.authors)
 
     @property
     def title_abbr(self):
@@ -194,28 +269,3 @@
                 'title': self.title_abbr, 'status': self.status_abbr,
                 'authors': self.author_abbr}
         return constants.column_format % pep_info
-
-
-def last_name(full_name, nicknames={}):
-    """Find the last name (or nickname) of a full name.
-
-    If no last name (e.g, 'Aahz') then return the full name.  If there is a
-    leading, lowercase portion to the last name (e.g., 'van' or 'von') then
-    include it.  If there is a suffix (e.g., 'Jr.') that is appended through a
-    comma, then drop the suffix.
-
-    """
-    nickname = nicknames.get(full_name)
-    if nickname:
-        return nickname
-    no_suffix = full_name.partition(',')[0]
-    name_parts = no_suffix.split()
-    part_count = len(name_parts)
-    if part_count == 1 or part_count == 2:
-        return name_parts[-1]
-    else:
-        assert part_count > 2
-        if name_parts[-2].islower():
-            return ' '.join(name_parts[-2:])
-        else:
-            return name_parts[-1]


More information about the Python-checkins mailing list