[ckan-changes] [okfn/ckan] 37101a: [2314] Fix broken parse_rfc_2822 helper function.

GitHub noreply at github.com
Wed May 2 14:12:24 UTC 2012


  Branch: refs/heads/master
  Home:   https://github.com/okfn/ckan
  Commit: 37101a3d45350fae4dd219c16e320857b14634ee
      https://github.com/okfn/ckan/commit/37101a3d45350fae4dd219c16e320857b14634ee
  Author: Ian Murray <ian.murray at okfn.org>
  Date:   2012-05-02 (Wed, 02 May 2012)

  Changed paths:
    M ckan/lib/helpers.py
    M ckan/tests/lib/test_helpers.py

  Log Message:
  -----------
  [2314] Fix broken parse_rfc_2822 helper function.


diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index b84636c..1e2b5bb 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -605,26 +605,69 @@ def date_str_to_datetime(date_str):
     # a strptime. Also avoids problem with Python 2.5 not having %f.
     return datetime.datetime(*map(int, re.split('[^\d]', date_str)))
 
-def parse_rfc_2822_date(date_str, tz_aware=True):
+def parse_rfc_2822_date(date_str, assume_utc=True):
     """
     Parse a date string of the form specified in RFC 2822, and return a datetime.
 
-    RFC 2822 is the date format used in HTTP headers.
-
-    If the date string contains a timezone indication, and tz_aware is True,
-    then the associated tzinfo is attached to the returned datetime object.
-
-    Returns None if the string cannot be parse as a valid datetime.
+    RFC 2822 is the date format used in HTTP headers.  It should contain timezone
+    information, but that cannot be relied upon.
+    
+    If date_str doesn't contain timezone information, then the 'assume_utc' flag
+    determines whether we assume this string is local (with respect to the
+    server running this code), or UTC.  In practice, what this means is that if
+    assume_utc is True, then the returned datetime is 'aware', with an associated
+    tzinfo of offset zero.  Otherwise, the returned datetime is 'naive'.
+
+    If timezone information is available in date_str, then the returned datetime
+    is 'aware', ie - it has an associated tz_info object.
+    
+    Returns None if the string cannot be parsed as a valid datetime.
     """
     time_tuple = email.utils.parsedate_tz(date_str)
 
+    # Not parsable
     if not time_tuple:
         return None
 
-    if not tz_aware:
-        time_tuple = time_tuple[:-1] + (None,)
+    # No timezone information available in the string
+    if time_tuple[-1] is None and not assume_utc:
+        return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
+    else:
+        offset = 0 if time_tuple[-1] is None else time_tuple[-1]
+        tz_info = _RFC2282TzInfo(offset)
+    return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info)
+
+class _RFC2282TzInfo(datetime.tzinfo):
+    """
+    A datetime.tzinfo implementation used by parse_rfc_2822_date() function.
+
+    In order to return timezone information, a concrete implementation of
+    datetime.tzinfo is required.  This class represents tzinfo that knows
+    about it's offset from UTC, has no knowledge of daylight savings time, and
+    no knowledge of the timezone name.
+
+    """
+
+    def __init__(self, offset):
+        """
+        offset from UTC in seconds.
+        """
+        self.offset = datetime.timedelta(seconds=offset)
+
+    def utcoffset(self, dt):
+        return self.offset
+
+    def dst(self, dt):
+        """
+        Dates parsed from an RFC 2822 string conflate timezone and dst, and so
+        it's not possible to determine whether we're in DST or not, hence
+        returning None.
+        """
+        return None
+
+    def tzname(self, dt):
+        return None
 
-    return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
 
 def time_ago_in_words_from_str(date_str, granularity='month'):
     if date_str:
diff --git a/ckan/tests/lib/test_helpers.py b/ckan/tests/lib/test_helpers.py
index b7eeacb..5233d85 100644
--- a/ckan/tests/lib/test_helpers.py
+++ b/ckan/tests/lib/test_helpers.py
@@ -92,15 +92,25 @@ def test_gravatar_encodes_url_correctly(self):
         for e in expected:
             assert e in res, (e,res)
 
-    def test_parse_rfc_2822_simple_case(self):
+    def test_parse_rfc_2822_no_timezone_specified(self):
         """
         Parse "Tue, 15 Nov 1994 12:45:26" successfully.
 
-        No zone info.
+        Assuming it's UTC.
         """
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26')
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
     
+    def test_parse_rfc_2822_no_timezone_specified_assuming_local(self):
+        """
+        Parse "Tue, 15 Nov 1994 12:45:26" successfully.
+
+        Assuming it's local.
+        """
+        dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False)
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.tzinfo, None)
+
     def test_parse_rfc_2822_gmt_case(self):
         """
         Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully.
@@ -108,20 +118,12 @@ def test_parse_rfc_2822_gmt_case(self):
         GMT obs-zone specified
         """
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT')
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
 
     def test_parse_rfc_2822_with_offset(self):
         """
         Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
         """
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700')
-        assert_equal(dt.isoformat(), '1994-11-15T05:45:26')
-
-    def test_parse_rfc_2822_ignoring_offset(self):
-        """
-        Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
-        """
-        dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False)
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
-
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')
 


================================================================
  Commit: 12a42c665e9fc12678d910f6aa477901e29ecd5f
      https://github.com/okfn/ckan/commit/12a42c665e9fc12678d910f6aa477901e29ecd5f
  Author: Ian Murray <ian.murray at okfn.org>
  Date:   2012-05-02 (Wed, 02 May 2012)

  Changed paths:
    M ckan/lib/helpers.py
    M ckan/tests/lib/test_helpers.py

  Log Message:
  -----------
  Merge branch 'bug-2314-parse_rfc_2822-tests-failing-in-some-timezones'


diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index f246646..b21d5a5 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -605,26 +605,69 @@ def date_str_to_datetime(date_str):
     # a strptime. Also avoids problem with Python 2.5 not having %f.
     return datetime.datetime(*map(int, re.split('[^\d]', date_str)))
 
-def parse_rfc_2822_date(date_str, tz_aware=True):
+def parse_rfc_2822_date(date_str, assume_utc=True):
     """
     Parse a date string of the form specified in RFC 2822, and return a datetime.
 
-    RFC 2822 is the date format used in HTTP headers.
-
-    If the date string contains a timezone indication, and tz_aware is True,
-    then the associated tzinfo is attached to the returned datetime object.
-
-    Returns None if the string cannot be parse as a valid datetime.
+    RFC 2822 is the date format used in HTTP headers.  It should contain timezone
+    information, but that cannot be relied upon.
+    
+    If date_str doesn't contain timezone information, then the 'assume_utc' flag
+    determines whether we assume this string is local (with respect to the
+    server running this code), or UTC.  In practice, what this means is that if
+    assume_utc is True, then the returned datetime is 'aware', with an associated
+    tzinfo of offset zero.  Otherwise, the returned datetime is 'naive'.
+
+    If timezone information is available in date_str, then the returned datetime
+    is 'aware', ie - it has an associated tz_info object.
+    
+    Returns None if the string cannot be parsed as a valid datetime.
     """
     time_tuple = email.utils.parsedate_tz(date_str)
 
+    # Not parsable
     if not time_tuple:
         return None
 
-    if not tz_aware:
-        time_tuple = time_tuple[:-1] + (None,)
+    # No timezone information available in the string
+    if time_tuple[-1] is None and not assume_utc:
+        return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
+    else:
+        offset = 0 if time_tuple[-1] is None else time_tuple[-1]
+        tz_info = _RFC2282TzInfo(offset)
+    return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info)
+
+class _RFC2282TzInfo(datetime.tzinfo):
+    """
+    A datetime.tzinfo implementation used by parse_rfc_2822_date() function.
+
+    In order to return timezone information, a concrete implementation of
+    datetime.tzinfo is required.  This class represents tzinfo that knows
+    about it's offset from UTC, has no knowledge of daylight savings time, and
+    no knowledge of the timezone name.
+
+    """
+
+    def __init__(self, offset):
+        """
+        offset from UTC in seconds.
+        """
+        self.offset = datetime.timedelta(seconds=offset)
+
+    def utcoffset(self, dt):
+        return self.offset
+
+    def dst(self, dt):
+        """
+        Dates parsed from an RFC 2822 string conflate timezone and dst, and so
+        it's not possible to determine whether we're in DST or not, hence
+        returning None.
+        """
+        return None
+
+    def tzname(self, dt):
+        return None
 
-    return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
 
 def time_ago_in_words_from_str(date_str, granularity='month'):
     if date_str:
diff --git a/ckan/tests/lib/test_helpers.py b/ckan/tests/lib/test_helpers.py
index b7eeacb..5233d85 100644
--- a/ckan/tests/lib/test_helpers.py
+++ b/ckan/tests/lib/test_helpers.py
@@ -92,15 +92,25 @@ def test_gravatar_encodes_url_correctly(self):
         for e in expected:
             assert e in res, (e,res)
 
-    def test_parse_rfc_2822_simple_case(self):
+    def test_parse_rfc_2822_no_timezone_specified(self):
         """
         Parse "Tue, 15 Nov 1994 12:45:26" successfully.
 
-        No zone info.
+        Assuming it's UTC.
         """
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26')
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
     
+    def test_parse_rfc_2822_no_timezone_specified_assuming_local(self):
+        """
+        Parse "Tue, 15 Nov 1994 12:45:26" successfully.
+
+        Assuming it's local.
+        """
+        dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False)
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.tzinfo, None)
+
     def test_parse_rfc_2822_gmt_case(self):
         """
         Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully.
@@ -108,20 +118,12 @@ def test_parse_rfc_2822_gmt_case(self):
         GMT obs-zone specified
         """
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT')
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
 
     def test_parse_rfc_2822_with_offset(self):
         """
         Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
         """
         dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700')
-        assert_equal(dt.isoformat(), '1994-11-15T05:45:26')
-
-    def test_parse_rfc_2822_ignoring_offset(self):
-        """
-        Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
-        """
-        dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False)
-        assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
-
+        assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')
 


================================================================
Compare: https://github.com/okfn/ckan/compare/bb4da15...12a42c6


More information about the ckan-changes mailing list