[ckan-changes] [okfn/ckan] 11e0b5: [2314] Fix broken parse_rfc_2822 helper function.

GitHub noreply at github.com
Wed May 2 14:16:19 UTC 2012

  Branch: refs/heads/release-v1.7
  Home:   https://github.com/okfn/ckan
  Commit: 11e0b50f5bb258da58dea18b391fcbbed8a93a89
  Author: Ian Murray <ian.murray at okfn.org>
  Date:   2012-05-02 (Wed, 02 May 2012)

  Changed paths:
    M ckan/lib/helpers.py
    M ckan/tests/lib/test_helpers.py

  Log Message:
  [2314] Fix broken parse_rfc_2822 helper function.

diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index 541c367..92c40a6 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -596,26 +596,69 @@ def date_str_to_datetime(date_str):
     # a strptime. Also avoids problem with Python 2.5 not having %f.
     return datetime.datetime(*map(int, re.split('[^\d]', date_str)))
-def parse_rfc_2822_date(date_str, tz_aware=True):
+def parse_rfc_2822_date(date_str, assume_utc=True):
     Parse a date string of the form specified in RFC 2822, and return a datetime.
-    RFC 2822 is the date format used in HTTP headers.
-    If the date string contains a timezone indication, and tz_aware is True,
-    then the associated tzinfo is attached to the returned datetime object.
-    Returns None if the string cannot be parse as a valid datetime.
+    RFC 2822 is the date format used in HTTP headers.  It should contain timezone
+    information, but that cannot be relied upon.
+    If date_str doesn't contain timezone information, then the 'assume_utc' flag
+    determines whether we assume this string is local (with respect to the
+    server running this code), or UTC.  In practice, what this means is that if
+    assume_utc is True, then the returned datetime is 'aware', with an associated
+    tzinfo of offset zero.  Otherwise, the returned datetime is 'naive'.
+    If timezone information is available in date_str, then the returned datetime
+    is 'aware', ie - it has an associated tz_info object.
+    Returns None if the string cannot be parsed as a valid datetime.
     time_tuple = email.utils.parsedate_tz(date_str)
+    # Not parsable
     if not time_tuple:
         return None
-    if not tz_aware:
-        time_tuple = time_tuple[:-1] + (None,)
+    # No timezone information available in the string
+    if time_tuple[-1] is None and not assume_utc:
+        return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
+    else:
+        offset = 0 if time_tuple[-1] is None else time_tuple[-1]
+        tz_info = _RFC2282TzInfo(offset)
+    return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info)
+class _RFC2282TzInfo(datetime.tzinfo):
+    """
+    A datetime.tzinfo implementation used by parse_rfc_2822_date() function.
+    In order to return timezone information, a concrete implementation of
+    datetime.tzinfo is required.  This class represents tzinfo that knows
+    about it's offset from UTC, has no knowledge of daylight savings time, and
+    no knowledge of the timezone name.
+    """
+    def __init__(self, offset):
+        """
+        offset from UTC in seconds.
+        """
+        self.offset = datetime.timedelta(seconds=offset)
+    def utcoffset(self, dt):
+        return self.offset
+    def dst(self, dt):
+        """
+        Dates parsed from an RFC 2822 string conflate timezone and dst, and so
+        it's not possible to determine whether we're in DST or not, hence
+        returning None.
+        """
+        return None
+    def tzname(self, dt):
+        return None
-    return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
 def time_ago_in_words_from_str(date_str, granularity='month'):
     if date_str:
diff --git a/ckan/tests/lib/test_helpers.py b/ckan/tests/lib/test_helpers.py
index b7eeacb..5233d85 100644
--- a/ckan/tests/lib/test_helpers.py
+++ b/ckan/tests/lib/test_helpers.py
@@ -92,15 +92,25 @@ def test_gravatar_encodes_url_correctly(self):
         for e in expected:
             assert e in res, (e,res)
-    def test_parse_rfc_2822_simple_case(self):
+    def test_parse_rfc_2822_no_timezone_specified(self):
         Parse "Tue, 15 Nov 1994 12:45:26" successfully.
-        No zone info.
+        Assuming it's UTC.
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26')
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
+    def test_parse_rfc_2822_no_timezone_specified_assuming_local(self):
+        """
+        Parse "Tue, 15 Nov 1994 12:45:26" successfully.
+        Assuming it's local.
+        """
+        dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False)
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.tzinfo, None)
     def test_parse_rfc_2822_gmt_case(self):
         Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully.
@@ -108,20 +118,12 @@ def test_parse_rfc_2822_gmt_case(self):
         GMT obs-zone specified
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT')
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
     def test_parse_rfc_2822_with_offset(self):
         Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700')
-        assert_equal(dt.isoformat(), '1994-11-15T05:45:26')
-    def test_parse_rfc_2822_ignoring_offset(self):
-        """
-        Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
-        """
-        dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False)
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')


