[Python-checkins] r56034 - sandbox/trunk/pep0/pep0/__init__.py sandbox/trunk/pep0/pep0/parse.py sandbox/trunk/pep0/pep0/pep.py

brett.cannon python-checkins at python.org
Tue Jun 19 06:22:34 CEST 2007


Author: brett.cannon
Date: Tue Jun 19 06:22:32 2007
New Revision: 56034

Removed:
   sandbox/trunk/pep0/pep0/parse.py
Modified:
   sandbox/trunk/pep0/pep0/__init__.py
   sandbox/trunk/pep0/pep0/pep.py
Log:
Move parsing of headers into the PEP class.  Also shift the constructor over to
taking an open file instead of a dictionary.  That allows for the removal of
pep0.parse.

All of this led to the verification that headers were in the proper order and
that no required headers were missing.


Modified: sandbox/trunk/pep0/pep0/__init__.py
==============================================================================
--- sandbox/trunk/pep0/pep0/__init__.py	(original)
+++ sandbox/trunk/pep0/pep0/__init__.py	Tue Jun 19 06:22:32 2007
@@ -13,22 +13,38 @@
     3. Output the PEP (both by category and numerical index).
 
 """
-from __future__ import absolute_import
+from __future__ import absolute_import, with_statement
 
 if __name__ == '__main__':
-    from pep0.parse import consume_directory, consume_pep
     from pep0.output import write_pep0
+    from pep0.pep import PEP
 
-    from os.path import isdir
+    from operator import attrgetter
+    import os.path
     from sys import argv, stdout
     
     if not argv[1:]:
         path = '.'
     else:
         path = argv[1]
-    if isdir(path):
-        peps = consume_directory(path)
+
+    peps = []
+    if os.path.isdir(path):
+        for file_path in os.listdir(path):
+            abs_file_path = os.path.join(path, file_path)
+            if not os.path.isfile(abs_file_path):
+                continue
+            if (not file_path.startswith('pep-') or
+                    not file_path.endswith('.txt')):
+                continue
+            with open(abs_file_path, 'r') as pep_file:
+                peps.append(PEP(pep_file))
+        else:
+            peps.sort(key=attrgetter('number'))
+    elif os.path.isfile(path):
+        with open(path, 'r') as pep_file:
+            peps.append(PEP(pep_file))
     else:
-        peps = [consume_pep(path)]
+        raise ValueError("argument must be a directory or file path")
 
     write_pep0(peps)

Deleted: /sandbox/trunk/pep0/pep0/parse.py
==============================================================================
--- /sandbox/trunk/pep0/pep0/parse.py	Tue Jun 19 06:22:32 2007
+++ (empty file)
@@ -1,86 +0,0 @@
-"""Parse the metadata from a PEP file.
-
-Parsing consists of several steps:
-
-    * Detecting and reading all lines of text relating to metadata.
-    * Concatenating multi-line metadata for a single field into a single line.
-    * Validate PEP number (needed for future error reporting).
-        + Must be an integer.
-        + Must match file name.
-
-"""
-from __future__ import with_statement
-from .pep import PEP
-
-from operator import attrgetter
-import os
-
-def consume_directory(directory):
-    """Pull out metadata for every PEP in the specified directory and return
-    them in a list sorted by PEP name.
-
-    The PEP file name must start with 'pep-' and end with '.txt' to be
-    considered.
-
-    """
-    peps = []
-    for file_name in os.listdir(directory):
-        if file_name.startswith('pep-') and file_name.endswith('.txt'):
-            peps.append(consume_pep(os.path.join(directory, file_name)))
-    peps.sort(key=attrgetter('number'))
-    return peps
-
-def consume_pep(path):
-    """Consume the specified file as a PEP to get its metadata."""
-    metadata = {}
-    field = None
-    with open(path, 'rU') as pep_file:
-        try:
-            for line in pep_file:
-                if line == '\n':
-                    # Found end of metadata.
-                    break
-                elif line[0].isspace():
-                    assert field is not None
-                    # Whitespace indent signifies multi-line field data.
-                    field, data = split_metadata(line, field)
-                else:
-                    field, data = split_metadata(line)
-                prev_data = metadata.get(field)
-                if prev_data:
-                    data = metadata[field] + data
-                metadata[field] = data
-        except Exception:
-            raise
-    # Make sure PEP field was found ...
-    if not 'PEP' in metadata:
-        raise ValueError("PEP at file %s lacks a PEP number" % path)
-    # ... it matches the file name in some way ...
-    if metadata['PEP'] not in path:
-        raise ValueError("PEP number in file %s does not match number "
-                         "specified in its file name" % path)
-    # ... and that the number is a valid integer.
-    try:
-        metadata['PEP'] = int(metadata['PEP'])
-    except ValueError:
-        raise ValueError("PEP number in file %s is not valid" % path)
-    return PEP(metadata)
-
-def split_metadata(line, continue_field=None):
-    """Parse the given line for PEP metadata, returning the field and data for
-    the line parsed.
-
-    If continue_field is specified then return that as the field parsed.
-
-    """
-    if continue_field:
-        field = continue_field
-        data = line
-    else:
-        try:
-            field, data = line.split(':', 1)
-        except ValueError:
-            raise ValueError("could not find field in %r" % line)
-    field = field.strip()
-    data = data.strip()
-    return field, data

Modified: sandbox/trunk/pep0/pep0/pep.py
==============================================================================
--- sandbox/trunk/pep0/pep0/pep.py	(original)
+++ sandbox/trunk/pep0/pep0/pep.py	Tue Jun 19 06:22:32 2007
@@ -28,7 +28,19 @@
             A list of the authors' full names.
     """
 
+    # The various RFC 822 headers that are supported.
+    # The second item in the nested tuples represents if the header is
+    # required or not.
+    headers = (('PEP', True), ('Title', True), ('Version', True),
+                ('Last-Modified', True), ('Author', True),
+                ('Discussions-To', False), ('Status', True), ('Type', True),
+                ('Content-Type', False), ('Requires', False),
+                ('Created', True), ('Python-Version', False),
+                ('Post-History', True), ('Replaces', False),
+                ('Replaced-By', False))
+    # Valid values for the Type header.
     type_values = ("Standards Track", "Informational", "Process")
+    # Valid values for the Status header.
     # Active PEPs can only be for Informational or Process PEPs.
     status_values = ("Accepted", "Rejected", "Withdrawn", "Deferred", "Final",
                      "Active", "Draft", "Replaced")
@@ -36,48 +48,68 @@
     # XXX Uncomment to valid author names (along with code in __init__).
     #valid_authors = set(x[0] for x in constants.email_addresses)
 
-    # XXX Take in an open file.
-    # XXX Parse header metadata (verify order and that all required fields
-    # exist).
-    def __init__(self, metadata_dict):
-        """Init object based on dict containing metadata from a file.
-        
-        Required keys from metadata_dict are:
-        
-            * PEP
-                Value must be an integer.
-
-            * Title
-                A string.
-
-            * Type
-                Value must match a value in self.type_values.
-
-            * Status
-                Value must match a value in self.status_values.
-
-            * Author
-                Value must have at least one author in the string as returned
-                by self.parse_author.
-
-        """
-        # Verify keys exist.
-        for required_key in ('PEP', 'Title', 'Type', 'Status', 'Author'):
-            if required_key not in metadata_dict:
-                raise KeyError("required key %r not in dict")
-        # 'PEP'.  PEP parsing should have already converted the number to an
-        # integer, so just being safe here.
-        self.number = int(metadata_dict['PEP'])
+    
+    def __init__(self, pep_file):
+        """Init object from an open PEP file object."""
+        # Parse the headers.
+        metadata = {}
+        header_name = None
+        header_field_iter = iter(self.headers)
+        try:
+            while True:
+                header_line = pep_file.readline()
+                if header_line == '\n':
+                    break
+                elif header_line.startswith(' '):
+                    existing_data = metadata[header_name]
+                    metadata[header_name] = existing_data + header_line.strip()
+                    continue
+                else:
+                    header_name, data = header_line.split(':', 1)
+                    header_name = header_name.strip()
+                    data = data.strip()
+                    expected_header, required = header_field_iter.next()
+                    try:
+                        while header_name != expected_header:
+                            if required:
+                                err_msg = ("The PEP at %s did not handle the "
+                                            "%s header before needing to "
+                                            "handle the %s header")
+                                raise ValueError(err_msg  % (pep_file.name,
+                                                    header_name,
+                                                    expected_header))
+                            else:
+                                expected_header, required = header_field_iter.next()
+                        metadata[header_name] = data
+                    except StopIteration:
+                        raise ValueError("the PEP at %s had the %s header unhandled "
+                                         "(something out of order?)" %
+                                            (pep_file.name, header_name))
+            else:
+                raise ValueError('no body to the PEP at %s' % pep_file.name)
+        except StopIteration:
+            raise ValueError("no body to the PEP at %s" % pep_file.name)
+        else:
+            try:
+                required = False
+                while not required:
+                    expected_header, required = header_field_iter.next()
+                raise ValueError("the PEP at %s is missing the %s header" %
+                                    (pep_file.name, expected_header))
+            except StopIteration:
+                pass
+        # 'PEP'.
+        self.number = int(metadata['PEP'])
         # 'Title'.
-        self.title = metadata_dict['Title']
+        self.title = metadata['Title']
         # 'Type'.
-        type_ = metadata_dict['Type']
+        type_ = metadata['Type']
         if type_ not in self.type_values:
             raise ValueError('%r is not a valid Type value (PEP %s)' %
                                 (type_, self.number))
         self.type_ = type_
         # 'Status'.
-        status = metadata_dict['Status']
+        status = metadata['Status']
         if status not in self.status_values:
             raise ValueError("%r is not a valid Status value (PEP %s)" %
                                 (status, self.number))
@@ -88,7 +120,7 @@
                                 "Active status (PEP %s)" % self.number)
         self.status = status
         # 'Author'.
-        authors = self._parse_author(metadata_dict['Author'])
+        authors = self._parse_author(metadata['Author'])
         # XXX Uncomment (plus valid_authors) to validate author names.
         #for author in authors:
         #    if author not in self.valid_authors:
@@ -97,6 +129,7 @@
             raise ValueError("no authors found (PEP %s)" % self.number)
         self.authors = authors
 
+
     def _parse_author(self, data):
         """Return a list of author names."""
         # XXX Consider using email.utils.parseaddr (doesn't work with names


More information about the Python-checkins mailing list