[ckan-changes] [okfn/ckan] 37101a: [2314] Fix broken parse_rfc_2822 helper function.
GitHub
noreply at github.com
Wed May 2 14:12:24 UTC 2012
Branch: refs/heads/master
Home: https://github.com/okfn/ckan
Commit: 37101a3d45350fae4dd219c16e320857b14634ee
https://github.com/okfn/ckan/commit/37101a3d45350fae4dd219c16e320857b14634ee
Author: Ian Murray <ian.murray at okfn.org>
Date: 2012-05-02 (Wed, 02 May 2012)
Changed paths:
M ckan/lib/helpers.py
M ckan/tests/lib/test_helpers.py
Log Message:
-----------
[2314] Fix broken parse_rfc_2822 helper function.
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index b84636c..1e2b5bb 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -605,26 +605,69 @@ def date_str_to_datetime(date_str):
# a strptime. Also avoids problem with Python 2.5 not having %f.
return datetime.datetime(*map(int, re.split('[^\d]', date_str)))
-def parse_rfc_2822_date(date_str, tz_aware=True):
+def parse_rfc_2822_date(date_str, assume_utc=True):
"""
Parse a date string of the form specified in RFC 2822, and return a datetime.
- RFC 2822 is the date format used in HTTP headers.
-
- If the date string contains a timezone indication, and tz_aware is True,
- then the associated tzinfo is attached to the returned datetime object.
-
- Returns None if the string cannot be parse as a valid datetime.
+ RFC 2822 is the date format used in HTTP headers. It should contain timezone
+ information, but that cannot be relied upon.
+
+ If date_str doesn't contain timezone information, then the 'assume_utc' flag
+ determines whether we assume this string is local (with respect to the
+ server running this code), or UTC. In practice, what this means is that if
+ assume_utc is True, then the returned datetime is 'aware', with an associated
+ tzinfo of offset zero. Otherwise, the returned datetime is 'naive'.
+
+ If timezone information is available in date_str, then the returned datetime
+ is 'aware', ie - it has an associated tz_info object.
+
+ Returns None if the string cannot be parsed as a valid datetime.
"""
time_tuple = email.utils.parsedate_tz(date_str)
+ # Not parsable
if not time_tuple:
return None
- if not tz_aware:
- time_tuple = time_tuple[:-1] + (None,)
+ # No timezone information available in the string
+ if time_tuple[-1] is None and not assume_utc:
+ return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
+ else:
+ offset = 0 if time_tuple[-1] is None else time_tuple[-1]
+ tz_info = _RFC2282TzInfo(offset)
+ return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info)
+
+class _RFC2282TzInfo(datetime.tzinfo):
+ """
+ A datetime.tzinfo implementation used by parse_rfc_2822_date() function.
+
+ In order to return timezone information, a concrete implementation of
+ datetime.tzinfo is required. This class represents tzinfo that knows
+ about it's offset from UTC, has no knowledge of daylight savings time, and
+ no knowledge of the timezone name.
+
+ """
+
+ def __init__(self, offset):
+ """
+ offset from UTC in seconds.
+ """
+ self.offset = datetime.timedelta(seconds=offset)
+
+ def utcoffset(self, dt):
+ return self.offset
+
+ def dst(self, dt):
+ """
+ Dates parsed from an RFC 2822 string conflate timezone and dst, and so
+ it's not possible to determine whether we're in DST or not, hence
+ returning None.
+ """
+ return None
+
+ def tzname(self, dt):
+ return None
- return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
def time_ago_in_words_from_str(date_str, granularity='month'):
if date_str:
diff --git a/ckan/tests/lib/test_helpers.py b/ckan/tests/lib/test_helpers.py
index b7eeacb..5233d85 100644
--- a/ckan/tests/lib/test_helpers.py
+++ b/ckan/tests/lib/test_helpers.py
@@ -92,15 +92,25 @@ def test_gravatar_encodes_url_correctly(self):
for e in expected:
assert e in res, (e,res)
- def test_parse_rfc_2822_simple_case(self):
+ def test_parse_rfc_2822_no_timezone_specified(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26" successfully.
- No zone info.
+ Assuming it's UTC.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26')
- assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
+ def test_parse_rfc_2822_no_timezone_specified_assuming_local(self):
+ """
+ Parse "Tue, 15 Nov 1994 12:45:26" successfully.
+
+ Assuming it's local.
+ """
+ dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False)
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+ assert_equal(dt.tzinfo, None)
+
def test_parse_rfc_2822_gmt_case(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully.
@@ -108,20 +118,12 @@ def test_parse_rfc_2822_gmt_case(self):
GMT obs-zone specified
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT')
- assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
def test_parse_rfc_2822_with_offset(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700')
- assert_equal(dt.isoformat(), '1994-11-15T05:45:26')
-
- def test_parse_rfc_2822_ignoring_offset(self):
- """
- Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
- """
- dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False)
- assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
-
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')
================================================================
Commit: 12a42c665e9fc12678d910f6aa477901e29ecd5f
https://github.com/okfn/ckan/commit/12a42c665e9fc12678d910f6aa477901e29ecd5f
Author: Ian Murray <ian.murray at okfn.org>
Date: 2012-05-02 (Wed, 02 May 2012)
Changed paths:
M ckan/lib/helpers.py
M ckan/tests/lib/test_helpers.py
Log Message:
-----------
Merge branch 'bug-2314-parse_rfc_2822-tests-failing-in-some-timezones'
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index f246646..b21d5a5 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -605,26 +605,69 @@ def date_str_to_datetime(date_str):
# a strptime. Also avoids problem with Python 2.5 not having %f.
return datetime.datetime(*map(int, re.split('[^\d]', date_str)))
-def parse_rfc_2822_date(date_str, tz_aware=True):
+def parse_rfc_2822_date(date_str, assume_utc=True):
"""
Parse a date string of the form specified in RFC 2822, and return a datetime.
- RFC 2822 is the date format used in HTTP headers.
-
- If the date string contains a timezone indication, and tz_aware is True,
- then the associated tzinfo is attached to the returned datetime object.
-
- Returns None if the string cannot be parse as a valid datetime.
+ RFC 2822 is the date format used in HTTP headers. It should contain timezone
+ information, but that cannot be relied upon.
+
+ If date_str doesn't contain timezone information, then the 'assume_utc' flag
+ determines whether we assume this string is local (with respect to the
+ server running this code), or UTC. In practice, what this means is that if
+ assume_utc is True, then the returned datetime is 'aware', with an associated
+ tzinfo of offset zero. Otherwise, the returned datetime is 'naive'.
+
+ If timezone information is available in date_str, then the returned datetime
+ is 'aware', ie - it has an associated tz_info object.
+
+ Returns None if the string cannot be parsed as a valid datetime.
"""
time_tuple = email.utils.parsedate_tz(date_str)
+ # Not parsable
if not time_tuple:
return None
- if not tz_aware:
- time_tuple = time_tuple[:-1] + (None,)
+ # No timezone information available in the string
+ if time_tuple[-1] is None and not assume_utc:
+ return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
+ else:
+ offset = 0 if time_tuple[-1] is None else time_tuple[-1]
+ tz_info = _RFC2282TzInfo(offset)
+ return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info)
+
+class _RFC2282TzInfo(datetime.tzinfo):
+ """
+ A datetime.tzinfo implementation used by parse_rfc_2822_date() function.
+
+ In order to return timezone information, a concrete implementation of
+ datetime.tzinfo is required. This class represents tzinfo that knows
+ about it's offset from UTC, has no knowledge of daylight savings time, and
+ no knowledge of the timezone name.
+
+ """
+
+ def __init__(self, offset):
+ """
+ offset from UTC in seconds.
+ """
+ self.offset = datetime.timedelta(seconds=offset)
+
+ def utcoffset(self, dt):
+ return self.offset
+
+ def dst(self, dt):
+ """
+ Dates parsed from an RFC 2822 string conflate timezone and dst, and so
+ it's not possible to determine whether we're in DST or not, hence
+ returning None.
+ """
+ return None
+
+ def tzname(self, dt):
+ return None
- return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
def time_ago_in_words_from_str(date_str, granularity='month'):
if date_str:
diff --git a/ckan/tests/lib/test_helpers.py b/ckan/tests/lib/test_helpers.py
index b7eeacb..5233d85 100644
--- a/ckan/tests/lib/test_helpers.py
+++ b/ckan/tests/lib/test_helpers.py
@@ -92,15 +92,25 @@ def test_gravatar_encodes_url_correctly(self):
for e in expected:
assert e in res, (e,res)
- def test_parse_rfc_2822_simple_case(self):
+ def test_parse_rfc_2822_no_timezone_specified(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26" successfully.
- No zone info.
+ Assuming it's UTC.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26')
- assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
+ def test_parse_rfc_2822_no_timezone_specified_assuming_local(self):
+ """
+ Parse "Tue, 15 Nov 1994 12:45:26" successfully.
+
+ Assuming it's local.
+ """
+ dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False)
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+ assert_equal(dt.tzinfo, None)
+
def test_parse_rfc_2822_gmt_case(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully.
@@ -108,20 +118,12 @@ def test_parse_rfc_2822_gmt_case(self):
GMT obs-zone specified
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT')
- assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')
def test_parse_rfc_2822_with_offset(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700')
- assert_equal(dt.isoformat(), '1994-11-15T05:45:26')
-
- def test_parse_rfc_2822_ignoring_offset(self):
- """
- Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
- """
- dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False)
- assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
-
+ assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')
================================================================
Compare: https://github.com/okfn/ckan/compare/bb4da15...12a42c6
More information about the ckan-changes
mailing list