[Python-checkins] cpython: #665194: support roundtripping RFC2822 date stamps in the email.utils module

r.david.murray python-checkins at python.org
Wed Jul 20 17:41:43 CEST 2011


http://hg.python.org/cpython/rev/5f7b03dcd523
changeset:   71439:5f7b03dcd523
parent:      71437:bc71fff2b6c7
user:        R David Murray <rdmurray at bitdance.com>
date:        Wed Jul 20 11:41:21 2011 -0400
summary:
  #665194: support roundtripping RFC2822 date stamps in the email.utils module

files:
  Doc/library/email.util.rst        |  28 +++++++++++
  Lib/email/_parseaddr.py           |  19 +++++++-
  Lib/email/utils.py                |  46 ++++++++++++++++--
  Lib/test/test_email/test_utils.py |  45 ++++++++++++++++++
  Misc/NEWS                         |   3 +
  5 files changed, 133 insertions(+), 8 deletions(-)


diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst
--- a/Doc/library/email.util.rst
+++ b/Doc/library/email.util.rst
@@ -81,6 +81,20 @@
    indexes 6, 7, and 8 of the result tuple are not usable.
 
 
+.. function:: parsedate_to_datetime(date)
+
+   The inverse of :func:`format_datetime`.  Performs the same function as
+   :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`.  If
+   the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
+   ``datetime``, and if the date is conforming to the RFCs it will represent a
+   time in UTC but with no indication of the actual source timezone of the
+   message the date comes from.  If the input date has any other valid timezone
+   offset, the ``datetime`` will be an aware ``datetime`` with the
+   corresponding a :class:`~datetime.timezone` :class:`~datetime.tzinfo`.
+
+   .. versionadded:: 3.3
+
+
 .. function:: mktime_tz(tuple)
 
    Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp.  It
@@ -112,6 +126,20 @@
    ``False``.  The default is ``False``.
 
 
+.. function:: format_datetime(dt, usegmt=False)
+
+   Like ``formatdate``, but the input is a :mod:`datetime` instance.  If it is
+   a naive datetime, it is assumed to be "UTC with no information about the
+   source timezone", and the conventional ``-0000`` is used for the timezone.
+   If it is an aware ``datetime``, then the numeric timezone offset is used.
+   If it is an aware timezone with offset zero, then *usegmt* may be set to
+   ``True``, in which case the string ``GMT`` is used instead of the numeric
+   timezone offset.  This provides a way to generate standards conformant HTTP
+   date headers.
+
+   .. versionadded:: 3.3
+
+
 .. function:: make_msgid(idstring=None, domain=None)
 
    Returns a string suitable for an :rfc:`2822`\ -compliant
diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py
--- a/Lib/email/_parseaddr.py
+++ b/Lib/email/_parseaddr.py
@@ -47,6 +47,21 @@
 
     Accounts for military timezones.
     """
+    res = _parsedate_tz(data)
+    if res[9] is None:
+        res[9] = 0
+    return tuple(res)
+
+def _parsedate_tz(data):
+    """Convert date to extended time tuple.
+
+    The last (additional) element is the time zone offset in seconds, except if
+    the timezone was specified as -0000.  In that case the last element is
+    None.  This indicates a UTC timestamp that explicitly declaims knowledge of
+    the source timezone, as opposed to a +0000 timestamp that indicates the
+    source timezone really was UTC.
+
+    """
     data = data.split()
     # The FWS after the comma after the day-of-week is optional, so search and
     # adjust for this.
@@ -138,6 +153,8 @@
             tzoffset = int(tz)
         except ValueError:
             pass
+        if tzoffset==0 and tz.startswith('-'):
+            tzoffset = None
     # Convert a timezone offset into seconds ; -0500 -> -18000
     if tzoffset:
         if tzoffset < 0:
@@ -147,7 +164,7 @@
             tzsign = 1
         tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
     # Daylight Saving Time flag is set to -1, since DST is unknown.
-    return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
+    return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
 
 
 def parsedate(data):
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -11,12 +11,14 @@
     'encode_rfc2231',
     'formataddr',
     'formatdate',
+    'format_datetime',
     'getaddresses',
     'make_msgid',
     'mktime_tz',
     'parseaddr',
     'parsedate',
     'parsedate_tz',
+    'parsedate_to_datetime',
     'unquote',
     ]
 
@@ -26,6 +28,7 @@
 import base64
 import random
 import socket
+import datetime
 import urllib.parse
 import warnings
 from io import StringIO
@@ -37,6 +40,7 @@
 # We need wormarounds for bugs in these methods in older Pythons (see below)
 from email._parseaddr import parsedate as _parsedate
 from email._parseaddr import parsedate_tz as _parsedate_tz
+from email._parseaddr import _parsedate_tz as __parsedate_tz
 
 from quopri import decodestring as _qdecode
 
@@ -110,6 +114,14 @@
   ''', re.VERBOSE | re.IGNORECASE)
 
 
+def _format_timetuple_and_zone(timetuple, zone):
+    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
+        timetuple[2],
+        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
+        timetuple[0], timetuple[3], timetuple[4], timetuple[5],
+        zone)
 
 def formatdate(timeval=None, localtime=False, usegmt=False):
     """Returns a date string as specified by RFC 2822, e.g.:
@@ -154,14 +166,25 @@
             zone = 'GMT'
         else:
             zone = '-0000'
-    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
-        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
-        now[2],
-        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
-         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
-        now[0], now[3], now[4], now[5],
-        zone)
+    return _format_timetuple_and_zone(now, zone)
 
+def format_datetime(dt, usegmt=False):
+    """Turn a datetime into a date string as specified in RFC 2822.
+
+    If usegmt is True, dt must be an aware datetime with an offset of zero.  In
+    this case 'GMT' will be rendered instead of the normal +0000 required by
+    RFC2822.  This is to support HTTP headers involving date stamps.
+    """
+    now = dt.timetuple()
+    if usegmt:
+        if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
+            raise ValueError("usegmt option requires a UTC datetime")
+        zone = 'GMT'
+    elif dt.tzinfo is None:
+        zone = '-0000'
+    else:
+        zone = dt.strftime("%z")
+    return _format_timetuple_and_zone(now, zone)
 
 
 def make_msgid(idstring=None, domain=None):
@@ -203,6 +226,15 @@
         return None
     return _parsedate_tz(data)
 
+def parsedate_to_datetime(data):
+    if not data:
+        return None
+    *dtuple, tz = __parsedate_tz(data)
+    if tz is None:
+        return datetime.datetime(*dtuple[:6])
+    return datetime.datetime(*dtuple[:6],
+            tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
+
 
 def parseaddr(addr):
     addrs = _AddressList(addr).addresslist
diff --git a/Lib/test/test_email/test_utils.py b/Lib/test/test_email/test_utils.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/test_email/test_utils.py
@@ -0,0 +1,45 @@
+import datetime
+from email import utils
+import unittest
+
+class DateTimeTests(unittest.TestCase):
+
+    datestring = 'Sun, 23 Sep 2001 20:10:55'
+    dateargs = (2001, 9, 23, 20, 10, 55)
+    offsetstring = ' -0700'
+    utcoffset = datetime.timedelta(hours=-7)
+    tz = datetime.timezone(utcoffset)
+    naive_dt = datetime.datetime(*dateargs)
+    aware_dt = datetime.datetime(*dateargs, tzinfo=tz)
+
+    def test_naive_datetime(self):
+        self.assertEqual(utils.format_datetime(self.naive_dt),
+                         self.datestring + ' -0000')
+
+    def test_aware_datetime(self):
+        self.assertEqual(utils.format_datetime(self.aware_dt),
+                         self.datestring + self.offsetstring)
+
+    def test_usegmt(self):
+        utc_dt = datetime.datetime(*self.dateargs,
+                                   tzinfo=datetime.timezone.utc)
+        self.assertEqual(utils.format_datetime(utc_dt, usegmt=True),
+                         self.datestring + ' GMT')
+
+    def test_usegmt_with_naive_datetime_raises(self):
+        with self.assertRaises(ValueError):
+            utils.format_datetime(self.naive_dt, usegmt=True)
+
+    def test_usegmt_with_non_utc_datetime_raises(self):
+        with self.assertRaises(ValueError):
+            utils.format_datetime(self.aware_dt, usegmt=True)
+
+    def test_parsedate_to_datetime(self):
+        self.assertEqual(
+            utils.parsedate_to_datetime(self.datestring + self.offsetstring),
+            self.aware_dt)
+
+    def test_parsedate_to_datetime_naive(self):
+        self.assertEqual(
+            utils.parsedate_to_datetime(self.datestring + ' -0000'),
+            self.naive_dt)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -234,6 +234,9 @@
 Library
 -------
 
+- Issue #665194: email.utils now has format_datetime and parsedate_to_datetime
+  functions, allowing for round tripping of RFC2822 format dates.
+
 - Issue #12571: Add a plat-linux3 directory mirroring the plat-linux2
   directory, so that "import DLFCN" and other similar imports work on
   Linux 3.0.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list