[ckan-changes] commit/datautil-date: dread: [rename] datautildate to avoid conflicts with datautil.
Bitbucket
commits-noreply at bitbucket.org
Tue Nov 29 13:00:38 UTC 2011
1 new commit in datautil-date:
https://bitbucket.org/okfn/datautil-date/changeset/4e8b4c556738/
changeset: 4e8b4c556738
user: dread
date: 2011-11-29 13:54:12
summary: [rename] datautildate to avoid conflicts with datautil.
affected #: 10 files
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee .hgignore
--- /dev/null
+++ b/.hgignore
@@ -0,0 +1,1 @@
+.*egg-info.*
\ No newline at end of file
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/__init__.py
--- a/datautil/__init__.py
+++ /dev/null
@@ -1,1 +0,0 @@
-__version__ = '0.4'
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/date.py
--- a/datautil/date.py
+++ /dev/null
@@ -1,282 +0,0 @@
-'''Date parsing and normalization utilities based on FlexiDate.
-
-To parser dates use parse, e.g.::
-
- parse('1890') -> FlexiDate(year=u'1890')
- parse('1890?') -> FlexiDate(year=u'1890', qualifier='Uncertainty: 1985?')
-
-Once you have a FlexiDate you can get access to attributes (strings of course
-...)::
-
- fd = parse('Jan 1890')
- fd.year # u'1890'
- fd.month # u'01'
-
-And convert to other forms:
-
- fd.as_float() # 1890
- fd.as_datetime() # datetime(1890,01,01)
-
-Background
-==========
-
-FlexiDate is focused on supporting:
-
- 1. Dates outside of Python (or DB) supported period (esp. dates < 0 AD)
- 2. Imprecise dates (c.1860, 18??, fl. 1534, etc)
- 3. Normalization of dates to machine processable versions
- 4. Sortable in the database (in correct date order)
-
-For more information see:
-
-http://www.rufuspollock.org/2009/06/18/flexible-dates-in-python/
-'''
-import re
-import datetime
-
-class FlexiDate(object):
- """Store dates as strings and present them in a slightly extended version
- of ISO8601.
-
- Modifications:
- * Allow a trailing qualifiers e.g. fl.
- * Allow replacement of unknown values by ? e.g. if sometime in 1800s
- can do 18??
-
- Restriction on ISO8601:
- * Truncation (e.g. of centuries) is *not* permitted.
- * No week and day representation e.g. 1999-W01
- """
- # pass
- def __init__(self, year=None, month=None, day=None, qualifier=''):
- # force = month or day or qualifier
- force = False
- self.year = self._cvt(year, rjust=4, force=force)
- self.month = self._cvt(month)
- self.day = self._cvt(day)
- self.qualifier = qualifier
-
- def _cvt(self, val, rjust=2, force=False):
- if val:
- tmp = unicode(val).strip()
- if tmp.startswith('-'):
- tmp = '-' + tmp[1:].rjust(rjust, '0')
- else:
- tmp = tmp.rjust(rjust, '0')
- return tmp
- elif force:
- # use '!' rather than '?' as '!' < '1' while '?' > '1'
- return rjust * '!'
- else:
- return ''
-
- def __str__(self):
- out = self.isoformat()
- if self.qualifier:
- # leading space is important as ensures when no year sort in right
- # order as ' ' < '1'
- out += u' [%s]' % self.qualifier
- return out
-
- def __repr__(self):
- return u'%s %s' % (self.__class__, self.__str__())
-
- def isoformat(self, strict=False):
- '''Return date in isoformat (same as __str__ but without qualifier).
-
- WARNING: does not replace '?' in dates unless strict=True.
- '''
- out = self.year
- # what do we do when no year ...
- for val in [ self.month, self.day ]:
- if not val:
- break
- out += u'-' + val
- if strict:
- out = out.replace('?', '0')
- return out
-
- our_re_pat = '''
- (?P<year> -?[\d?]+)
- (?:
- \s* - (?P<month> [\d?]{1,2})
- (?: \s* - (?P<day> [\d?]{1,2}) )?
- )?
- \s*
- (?: \[ (?P<qualifier>[^]]*) \])?
- '''
- our_re = re.compile(our_re_pat, re.VERBOSE)
- @classmethod
- def from_str(self, instr):
- '''Undo affect of __str__'''
- if not instr:
- return FlexiDate()
-
- out = self.our_re.match(instr)
- if out is None: # no match TODO: raise Exception?
- return None
- else:
- return FlexiDate(
- out.group('year'),
- out.group('month'),
- out.group('day'),
- qualifier=out.group('qualifier')
- )
-
- def as_float(self):
- '''Get as a float (year being the integer part).
-
- Replace '?' in year with 9 so as to be conservative (e.g. 19?? becomes
- 1999) and elsewhere (month, day) with 0
-
- @return: float.
- '''
- if not self.year: return None
- out = float(self.year.replace('?', '9'))
- if self.month:
- # TODO: we are assuming months are of equal length
- out += float(self.month.replace('?', '0')) / 12.0
- if self.day:
- out += float(self.day.replace('?', '0')) / 365.0
- return out
-
- def as_datetime(self):
- '''Get as python datetime.datetime.
-
- Require year to be a valid datetime year. Default month and day to 1 if
- do not exist.
-
- @return: datetime.datetime object.
- '''
- year = int(self.year)
- month = int(self.month) if self.month else 1
- day = int(self.day) if self.day else 1
- return datetime.datetime(year, month, day)
-
-
-def parse(date, dayfirst=True):
- '''Parse a `date` into a `FlexiDate`.
-
- @param date: the date to parse - may be a string, datetime.date,
- datetime.datetime or FlexiDate.
-
- TODO: support for quarters e.g. Q4 1980 or 1954 Q3
- TODO: support latin stuff like M.DCC.LIII
- TODO: convert '-' to '?' when used that way
- e.g. had this date [181-]
- '''
- if not date:
- return None
- if isinstance(date, FlexiDate):
- return date
- if isinstance(date, int):
- return FlexiDate(year=date)
- elif isinstance(date, datetime.date):
- parser = PythonDateParser()
- return parser.parse(date)
- else: # assuming its a string
- parser = DateutilDateParser()
- out = parser.parse(date, **{'dayfirst': dayfirst})
- if out is not None:
- return out
- # msg = 'Unable to parse %s' % date
- # raise ValueError(date)
- val = 'UNPARSED: %s' % date
- val = val.encode('ascii', 'ignore')
- return FlexiDate(qualifier=val)
-
-
-class DateParserBase(object):
- def parse(self, date):
- raise NotImplementedError
-
- def norm(self, date):
- return str(self.parse(date))
-
-class PythonDateParser(object):
- def parse(self, date):
- return FlexiDate(date.year, date.month, date.day)
-
-try:
- import dateutil.parser
- dateutil_parser = dateutil.parser.parser()
-except:
- dateutil_parser = None
-
-class DateutilDateParser(DateParserBase):
- _numeric = re.compile("^[0-9]+$")
- def parse(self, date, **kwargs):
- '''
- :param **kwargs: any kwargs accepted by dateutil.parse function.
- '''
- qualifiers = []
- if dateutil_parser is None:
- return None
- date = orig_date = date.strip()
-
- # various normalizations
- # TODO: call .lower() first
- date = date.replace('B.C.', 'BC')
- date = date.replace('A.D.', 'AD')
-
- # deal with pre 0AD dates
- if date.startswith('-') or 'BC' in date or 'B.C.' in date:
- pre0AD = True
- else:
- pre0AD = False
- # BC seems to mess up parser
- date = date.replace('BC', '')
-
- # deal with circa: 'c.1950' or 'c1950'
- circa_match = re.match('([^a-zA-Z]*)c\.?\s*(\d+.*)', date)
- if circa_match:
- # remove circa bit
- qualifiers.append("Note 'circa'")
- date = ''.join(circa_match.groups())
-
- # deal with p1980 (what does this mean? it can appear in
- # field 008 of MARC records
- p_match = re.match("^p(\d+)", date)
- if p_match:
- date = date[1:]
-
- # Deal with uncertainty: '1985?'
- uncertainty_match = re.match('([0-9xX]{4})\?', date)
- if uncertainty_match:
- # remove the ?
- date = date[:-1]
- qualifiers.append('Uncertainty')
-
- # Parse the numbers intelligently
- # do not use std parser function as creates lots of default data
- res = dateutil_parser._parse(date, **kwargs)
-
- if res is None:
- # Couldn't parse it
- return None
- #Note: Years of less than 3 digits not interpreted by
- # dateutil correctly
- # e.g. 87 -> 1987
- # 4 -> day 4 (no year)
- # Both cases are handled in this routine
- if res.year is None and res.day:
- year = res.day
- # If the whole date is simply two digits then dateutil_parser makes
- # it '86' -> '1986'. So strip off the '19'. (If the date specified
- # day/month then a two digit year is more likely to be this century
- # and so allow the '19' prefix to it.)
- elif self._numeric.match(date) and (len(date) == 2 or date.startswith('00')):
- year = res.year % 100
- else:
- year = res.year
-
- # finally add back in BC stuff
- if pre0AD:
- year = -year
-
- if not qualifiers:
- qualifier = ''
- else:
- qualifier = ', '.join(qualifiers) + (' : %s' % orig_date)
- return FlexiDate(year, res.month, res.day, qualifier=qualifier)
-
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/tests/__init__.py
--- a/datautil/tests/__init__.py
+++ /dev/null
@@ -1,1 +0,0 @@
-__version__ = '0.4'
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/tests/test_date.py
--- a/datautil/tests/test_date.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from datautil.date import *
-
-import datetime
-
-class TestPythonStringOrdering(object):
- # It is impossible to find a string format such that +ve and -ve numbers
- # sort correctly as strings:
- # if (in string ordering) X < Y => -X < -Y (False!)
- def test_ordering(self):
- assert '0' < '1'
- assert '-10' < '10'
- assert '-' < '@'
- assert '-' < '0'
- assert '-100' < '-X10'
- assert '10' < '1000'
- assert '02000' < '10000'
- assert ' 2000' < '10000'
-
- def test_bad_ordering(self):
- assert ' ' < '0'
- assert ' ' < '-'
- assert not '-' < '+'
- assert '-100' > '-10'
- assert not '-100' < '-010'
- assert not '-100' < '- 10'
- assert not '-100' < ' -10'
- assert '10000' < '2000'
- assert not '-10' < ' 1'
-
-
-class TestFlexiDate(object):
- def test_init(self):
- fd = FlexiDate()
- assert fd.year == '', fd
- assert fd.month == '', fd
-
- fd = FlexiDate(2000, 1,1)
- assert fd.month == '01', fd
- assert fd.day== '01', fd
-
- def test_str(self):
- fd = FlexiDate(2000, 1, 23)
- assert str(fd) == '2000-01-23', '"%s"' % fd
- fd = FlexiDate(-2000, 1, 23)
- assert str(fd) == '-2000-01-23'
- fd = FlexiDate(2000)
- assert str(fd) == '2000'
- fd = FlexiDate(1760, qualifier='fl.')
- assert str(fd) == '1760 [fl.]', fd
-
- fd = FlexiDate(qualifier='anything')
- assert str(fd) == ' [anything]'
-
-
- def test_from_str(self):
- def dotest(fd):
- out = FlexiDate.from_str(str(fd))
- assert str(out) == str(fd)
-
- fd = FlexiDate(2000, 1, 23)
- dotest(fd)
- fd = FlexiDate(1760, qualifier='fl.')
- dotest(fd)
- fd = FlexiDate(-1760, 1, 3, qualifier='fl.')
- dotest(fd)
-
- def test_as_float(self):
- fd = FlexiDate(2000)
- assert fd.as_float() == float(2000), fd.as_float()
- fd = FlexiDate(1760, 1, 2)
- exp = 1760 + 1/12.0 + 2/365.0
- assert fd.as_float() == exp, fd.as_float()
- fd = FlexiDate(-1000)
- assert fd.as_float() == float(-1000)
-
- def test_as_datetime(self):
- fd = FlexiDate(2000)
- out = fd.as_datetime()
- assert out == datetime.datetime(2000, 1, 1), out
- fd = FlexiDate(1760, 1, 2)
- out = fd.as_datetime()
- assert out == datetime.datetime(1760,1,2), out
-
-
-class TestDateParsers(object):
- def test_using_datetime(self):
- parser = PythonDateParser()
-
- d1 = datetime.date(2000, 1, 23)
- fd = parser.parse(d1)
- assert fd.year == '2000'
-
- d1 = datetime.datetime(2000, 1, 23)
- fd = parser.parse(d1)
- # assert str(fd) == '2000-01-23T00:00:00', fd
- assert str(fd) == '2000-01-23', fd
-
- def test_using_dateutil(self):
- parser = DateutilDateParser()
-
- in1 = '2001-02'
- fd = parser.parse(in1)
- assert str(fd) == in1, fd
-
- in1 = 'March 1762'
- fd = parser.parse(in1)
- assert str(fd) == '1762-03'
-
- in1 = 'March 1762'
- fd = parser.parse(in1)
- assert str(fd) == '1762-03'
-
- in1 = '1768 AD'
- fd = parser.parse(in1)
- assert str(fd) == '1768', fd
-
- in1 = '1768 A.D.'
- fd = parser.parse(in1)
- assert str(fd) == '1768', fd
-
- in1 = '-1850'
- fd = parser.parse(in1)
- assert str(fd) == '-1850', fd
-
- in1 = '1762 BC'
- fd = parser.parse(in1)
- assert str(fd) == '-1762', fd
-
- in1 = '4 BC'
- fd = parser.parse(in1)
- assert str(fd) == '-0004', fd
-
- in1 = '4 B.C.'
- fd = parser.parse(in1)
- assert str(fd) == '-0004', fd
-
- in1 = 'Wed, 06 Jan 2010 09:30:00 GMT'
- fd = parser.parse(in1)
- assert str(fd) == '2010-01-06', fd
-
- in1 = 'Tue, 07 Dec 2010 10:00:00 GMT'
- fd = parser.parse(in1)
- assert str(fd) == '2010-12-07', fd
-
- def test_parse(self):
- d1 = datetime.datetime(2000, 1, 23)
- fd = parse(d1)
- assert fd.year == '2000'
-
- fd = parse('March 1762')
- assert str(fd) == '1762-03'
-
- fd = parse(1966)
- assert str(fd) == '1966'
-
- fd = parse('22/07/2010')
- assert fd.month == '07', fd.month
-
- def test_parse_ambiguous_day_month(self):
- fd = parse('05/07/2010')
- assert fd.month == '07', fd.month
- assert fd.day == '05', fd.month
-
- def test_parse_with_none(self):
- d1 = parse(None)
- assert d1 is None
-
- def test_parse_wildcards(self):
- fd = parse('198?')
- assert fd.year == '', fd.year # expect this to not parse
- # TODO but we should have a float if possible
-# assert fd.as_float() == u'1980', fd.as_float()
-
- def test_parse_with_qualifiers(self):
- fd = parse('1985?')
- assert fd.year == u'1985', fd
- assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier
-
- fd = parse('c.1780')
- assert fd.year == u'1780', fd
- assert fd.qualifier == u"Note 'circa' : c.1780", fd
-
- fd = parse('c. 1780')
- assert fd.year == u'1780', fd
- assert fd.qualifier.startswith(u"Note 'circa'"), fd
-
- def test_ambiguous(self):
- # TODO: have to be careful here ...
- fd = parse('1068/1069')
-
- def test_small_years(self):
- in1 = '23'
- fd = parse(in1)
- assert str(fd) == '0023', fd
- assert fd.as_float() == 23, fd.as_float()
-
- def test_small_years_with_zeros(self):
- in1 = '0023'
- fd = parse(in1)
- assert str(fd) == '0023', fd
- assert fd.as_float() == 23, fd.as_float()
-
- def test_years_with_alpha_prefix(self):
- in1 = "p1980"
- fd = parse(in1)
- assert str(fd) == "1980", fd
-
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/__init__.py
--- /dev/null
+++ b/datautildate/__init__.py
@@ -0,0 +1,1 @@
+__version__ = '0.4'
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/date.py
--- /dev/null
+++ b/datautildate/date.py
@@ -0,0 +1,282 @@
+'''Date parsing and normalization utilities based on FlexiDate.
+
+To parser dates use parse, e.g.::
+
+ parse('1890') -> FlexiDate(year=u'1890')
+ parse('1890?') -> FlexiDate(year=u'1890', qualifier='Uncertainty: 1985?')
+
+Once you have a FlexiDate you can get access to attributes (strings of course
+...)::
+
+ fd = parse('Jan 1890')
+ fd.year # u'1890'
+ fd.month # u'01'
+
+And convert to other forms:
+
+ fd.as_float() # 1890
+ fd.as_datetime() # datetime(1890,01,01)
+
+Background
+==========
+
+FlexiDate is focused on supporting:
+
+ 1. Dates outside of Python (or DB) supported period (esp. dates < 0 AD)
+ 2. Imprecise dates (c.1860, 18??, fl. 1534, etc)
+ 3. Normalization of dates to machine processable versions
+ 4. Sortable in the database (in correct date order)
+
+For more information see:
+
+http://www.rufuspollock.org/2009/06/18/flexible-dates-in-python/
+'''
+import re
+import datetime
+
+class FlexiDate(object):
+ """Store dates as strings and present them in a slightly extended version
+ of ISO8601.
+
+ Modifications:
+ * Allow a trailing qualifiers e.g. fl.
+ * Allow replacement of unknown values by ? e.g. if sometime in 1800s
+ can do 18??
+
+ Restriction on ISO8601:
+ * Truncation (e.g. of centuries) is *not* permitted.
+ * No week and day representation e.g. 1999-W01
+ """
+ # pass
+ def __init__(self, year=None, month=None, day=None, qualifier=''):
+ # force = month or day or qualifier
+ force = False
+ self.year = self._cvt(year, rjust=4, force=force)
+ self.month = self._cvt(month)
+ self.day = self._cvt(day)
+ self.qualifier = qualifier
+
+ def _cvt(self, val, rjust=2, force=False):
+ if val:
+ tmp = unicode(val).strip()
+ if tmp.startswith('-'):
+ tmp = '-' + tmp[1:].rjust(rjust, '0')
+ else:
+ tmp = tmp.rjust(rjust, '0')
+ return tmp
+ elif force:
+ # use '!' rather than '?' as '!' < '1' while '?' > '1'
+ return rjust * '!'
+ else:
+ return ''
+
+ def __str__(self):
+ out = self.isoformat()
+ if self.qualifier:
+ # leading space is important as ensures when no year sort in right
+ # order as ' ' < '1'
+ out += u' [%s]' % self.qualifier
+ return out
+
+ def __repr__(self):
+ return u'%s %s' % (self.__class__, self.__str__())
+
+ def isoformat(self, strict=False):
+ '''Return date in isoformat (same as __str__ but without qualifier).
+
+ WARNING: does not replace '?' in dates unless strict=True.
+ '''
+ out = self.year
+ # what do we do when no year ...
+ for val in [ self.month, self.day ]:
+ if not val:
+ break
+ out += u'-' + val
+ if strict:
+ out = out.replace('?', '0')
+ return out
+
+ our_re_pat = '''
+ (?P<year> -?[\d?]+)
+ (?:
+ \s* - (?P<month> [\d?]{1,2})
+ (?: \s* - (?P<day> [\d?]{1,2}) )?
+ )?
+ \s*
+ (?: \[ (?P<qualifier>[^]]*) \])?
+ '''
+ our_re = re.compile(our_re_pat, re.VERBOSE)
+ @classmethod
+ def from_str(self, instr):
+ '''Undo affect of __str__'''
+ if not instr:
+ return FlexiDate()
+
+ out = self.our_re.match(instr)
+ if out is None: # no match TODO: raise Exception?
+ return None
+ else:
+ return FlexiDate(
+ out.group('year'),
+ out.group('month'),
+ out.group('day'),
+ qualifier=out.group('qualifier')
+ )
+
+ def as_float(self):
+ '''Get as a float (year being the integer part).
+
+ Replace '?' in year with 9 so as to be conservative (e.g. 19?? becomes
+ 1999) and elsewhere (month, day) with 0
+
+ @return: float.
+ '''
+ if not self.year: return None
+ out = float(self.year.replace('?', '9'))
+ if self.month:
+ # TODO: we are assuming months are of equal length
+ out += float(self.month.replace('?', '0')) / 12.0
+ if self.day:
+ out += float(self.day.replace('?', '0')) / 365.0
+ return out
+
+ def as_datetime(self):
+ '''Get as python datetime.datetime.
+
+ Require year to be a valid datetime year. Default month and day to 1 if
+ do not exist.
+
+ @return: datetime.datetime object.
+ '''
+ year = int(self.year)
+ month = int(self.month) if self.month else 1
+ day = int(self.day) if self.day else 1
+ return datetime.datetime(year, month, day)
+
+
+def parse(date, dayfirst=True):
+ '''Parse a `date` into a `FlexiDate`.
+
+ @param date: the date to parse - may be a string, datetime.date,
+ datetime.datetime or FlexiDate.
+
+ TODO: support for quarters e.g. Q4 1980 or 1954 Q3
+ TODO: support latin stuff like M.DCC.LIII
+ TODO: convert '-' to '?' when used that way
+ e.g. had this date [181-]
+ '''
+ if not date:
+ return None
+ if isinstance(date, FlexiDate):
+ return date
+ if isinstance(date, int):
+ return FlexiDate(year=date)
+ elif isinstance(date, datetime.date):
+ parser = PythonDateParser()
+ return parser.parse(date)
+ else: # assuming its a string
+ parser = DateutilDateParser()
+ out = parser.parse(date, **{'dayfirst': dayfirst})
+ if out is not None:
+ return out
+ # msg = 'Unable to parse %s' % date
+ # raise ValueError(date)
+ val = 'UNPARSED: %s' % date
+ val = val.encode('ascii', 'ignore')
+ return FlexiDate(qualifier=val)
+
+
+class DateParserBase(object):
+ def parse(self, date):
+ raise NotImplementedError
+
+ def norm(self, date):
+ return str(self.parse(date))
+
+class PythonDateParser(object):
+ def parse(self, date):
+ return FlexiDate(date.year, date.month, date.day)
+
+try:
+ import dateutil.parser
+ dateutil_parser = dateutil.parser.parser()
+except:
+ dateutil_parser = None
+
+class DateutilDateParser(DateParserBase):
+ _numeric = re.compile("^[0-9]+$")
+ def parse(self, date, **kwargs):
+ '''
+ :param **kwargs: any kwargs accepted by dateutil.parse function.
+ '''
+ qualifiers = []
+ if dateutil_parser is None:
+ return None
+ date = orig_date = date.strip()
+
+ # various normalizations
+ # TODO: call .lower() first
+ date = date.replace('B.C.', 'BC')
+ date = date.replace('A.D.', 'AD')
+
+ # deal with pre 0AD dates
+ if date.startswith('-') or 'BC' in date or 'B.C.' in date:
+ pre0AD = True
+ else:
+ pre0AD = False
+ # BC seems to mess up parser
+ date = date.replace('BC', '')
+
+ # deal with circa: 'c.1950' or 'c1950'
+ circa_match = re.match('([^a-zA-Z]*)c\.?\s*(\d+.*)', date)
+ if circa_match:
+ # remove circa bit
+ qualifiers.append("Note 'circa'")
+ date = ''.join(circa_match.groups())
+
+ # deal with p1980 (what does this mean? it can appear in
+ # field 008 of MARC records
+ p_match = re.match("^p(\d+)", date)
+ if p_match:
+ date = date[1:]
+
+ # Deal with uncertainty: '1985?'
+ uncertainty_match = re.match('([0-9xX]{4})\?', date)
+ if uncertainty_match:
+ # remove the ?
+ date = date[:-1]
+ qualifiers.append('Uncertainty')
+
+ # Parse the numbers intelligently
+ # do not use std parser function as creates lots of default data
+ res = dateutil_parser._parse(date, **kwargs)
+
+ if res is None:
+ # Couldn't parse it
+ return None
+ #Note: Years of less than 3 digits not interpreted by
+ # dateutil correctly
+ # e.g. 87 -> 1987
+ # 4 -> day 4 (no year)
+ # Both cases are handled in this routine
+ if res.year is None and res.day:
+ year = res.day
+ # If the whole date is simply two digits then dateutil_parser makes
+ # it '86' -> '1986'. So strip off the '19'. (If the date specified
+ # day/month then a two digit year is more likely to be this century
+ # and so allow the '19' prefix to it.)
+ elif self._numeric.match(date) and (len(date) == 2 or date.startswith('00')):
+ year = res.year % 100
+ else:
+ year = res.year
+
+ # finally add back in BC stuff
+ if pre0AD:
+ year = -year
+
+ if not qualifiers:
+ qualifier = ''
+ else:
+ qualifier = ', '.join(qualifiers) + (' : %s' % orig_date)
+ return FlexiDate(year, res.month, res.day, qualifier=qualifier)
+
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/tests/__init__.py
--- /dev/null
+++ b/datautildate/tests/__init__.py
@@ -0,0 +1,1 @@
+__version__ = '0.4'
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/tests/test_date.py
--- /dev/null
+++ b/datautildate/tests/test_date.py
@@ -0,0 +1,207 @@
+from datautildate.date import *
+
+import datetime
+
+class TestPythonStringOrdering(object):
+ # It is impossible to find a string format such that +ve and -ve numbers
+ # sort correctly as strings:
+ # if (in string ordering) X < Y => -X < -Y (False!)
+ def test_ordering(self):
+ assert '0' < '1'
+ assert '-10' < '10'
+ assert '-' < '@'
+ assert '-' < '0'
+ assert '-100' < '-X10'
+ assert '10' < '1000'
+ assert '02000' < '10000'
+ assert ' 2000' < '10000'
+
+ def test_bad_ordering(self):
+ assert ' ' < '0'
+ assert ' ' < '-'
+ assert not '-' < '+'
+ assert '-100' > '-10'
+ assert not '-100' < '-010'
+ assert not '-100' < '- 10'
+ assert not '-100' < ' -10'
+ assert '10000' < '2000'
+ assert not '-10' < ' 1'
+
+
+class TestFlexiDate(object):
+ def test_init(self):
+ fd = FlexiDate()
+ assert fd.year == '', fd
+ assert fd.month == '', fd
+
+ fd = FlexiDate(2000, 1,1)
+ assert fd.month == '01', fd
+ assert fd.day== '01', fd
+
+ def test_str(self):
+ fd = FlexiDate(2000, 1, 23)
+ assert str(fd) == '2000-01-23', '"%s"' % fd
+ fd = FlexiDate(-2000, 1, 23)
+ assert str(fd) == '-2000-01-23'
+ fd = FlexiDate(2000)
+ assert str(fd) == '2000'
+ fd = FlexiDate(1760, qualifier='fl.')
+ assert str(fd) == '1760 [fl.]', fd
+
+ fd = FlexiDate(qualifier='anything')
+ assert str(fd) == ' [anything]'
+
+
+ def test_from_str(self):
+ def dotest(fd):
+ out = FlexiDate.from_str(str(fd))
+ assert str(out) == str(fd)
+
+ fd = FlexiDate(2000, 1, 23)
+ dotest(fd)
+ fd = FlexiDate(1760, qualifier='fl.')
+ dotest(fd)
+ fd = FlexiDate(-1760, 1, 3, qualifier='fl.')
+ dotest(fd)
+
+ def test_as_float(self):
+ fd = FlexiDate(2000)
+ assert fd.as_float() == float(2000), fd.as_float()
+ fd = FlexiDate(1760, 1, 2)
+ exp = 1760 + 1/12.0 + 2/365.0
+ assert fd.as_float() == exp, fd.as_float()
+ fd = FlexiDate(-1000)
+ assert fd.as_float() == float(-1000)
+
+ def test_as_datetime(self):
+ fd = FlexiDate(2000)
+ out = fd.as_datetime()
+ assert out == datetime.datetime(2000, 1, 1), out
+ fd = FlexiDate(1760, 1, 2)
+ out = fd.as_datetime()
+ assert out == datetime.datetime(1760,1,2), out
+
+
+class TestDateParsers(object):
+ def test_using_datetime(self):
+ parser = PythonDateParser()
+
+ d1 = datetime.date(2000, 1, 23)
+ fd = parser.parse(d1)
+ assert fd.year == '2000'
+
+ d1 = datetime.datetime(2000, 1, 23)
+ fd = parser.parse(d1)
+ # assert str(fd) == '2000-01-23T00:00:00', fd
+ assert str(fd) == '2000-01-23', fd
+
+ def test_using_dateutil(self):
+ parser = DateutilDateParser()
+
+ in1 = '2001-02'
+ fd = parser.parse(in1)
+ assert str(fd) == in1, fd
+
+ in1 = 'March 1762'
+ fd = parser.parse(in1)
+ assert str(fd) == '1762-03'
+
+ in1 = 'March 1762'
+ fd = parser.parse(in1)
+ assert str(fd) == '1762-03'
+
+ in1 = '1768 AD'
+ fd = parser.parse(in1)
+ assert str(fd) == '1768', fd
+
+ in1 = '1768 A.D.'
+ fd = parser.parse(in1)
+ assert str(fd) == '1768', fd
+
+ in1 = '-1850'
+ fd = parser.parse(in1)
+ assert str(fd) == '-1850', fd
+
+ in1 = '1762 BC'
+ fd = parser.parse(in1)
+ assert str(fd) == '-1762', fd
+
+ in1 = '4 BC'
+ fd = parser.parse(in1)
+ assert str(fd) == '-0004', fd
+
+ in1 = '4 B.C.'
+ fd = parser.parse(in1)
+ assert str(fd) == '-0004', fd
+
+ in1 = 'Wed, 06 Jan 2010 09:30:00 GMT'
+ fd = parser.parse(in1)
+ assert str(fd) == '2010-01-06', fd
+
+ in1 = 'Tue, 07 Dec 2010 10:00:00 GMT'
+ fd = parser.parse(in1)
+ assert str(fd) == '2010-12-07', fd
+
+ def test_parse(self):
+ d1 = datetime.datetime(2000, 1, 23)
+ fd = parse(d1)
+ assert fd.year == '2000'
+
+ fd = parse('March 1762')
+ assert str(fd) == '1762-03'
+
+ fd = parse(1966)
+ assert str(fd) == '1966'
+
+ fd = parse('22/07/2010')
+ assert fd.month == '07', fd.month
+
+ def test_parse_ambiguous_day_month(self):
+ fd = parse('05/07/2010')
+ assert fd.month == '07', fd.month
+ assert fd.day == '05', fd.month
+
+ def test_parse_with_none(self):
+ d1 = parse(None)
+ assert d1 is None
+
+ def test_parse_wildcards(self):
+ fd = parse('198?')
+ assert fd.year == '', fd.year # expect this to not parse
+ # TODO but we should have a float if possible
+# assert fd.as_float() == u'1980', fd.as_float()
+
+ def test_parse_with_qualifiers(self):
+ fd = parse('1985?')
+ assert fd.year == u'1985', fd
+ assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier
+
+ fd = parse('c.1780')
+ assert fd.year == u'1780', fd
+ assert fd.qualifier == u"Note 'circa' : c.1780", fd
+
+ fd = parse('c. 1780')
+ assert fd.year == u'1780', fd
+ assert fd.qualifier.startswith(u"Note 'circa'"), fd
+
+ def test_ambiguous(self):
+ # TODO: have to be careful here ...
+ fd = parse('1068/1069')
+
+ def test_small_years(self):
+ in1 = '23'
+ fd = parse(in1)
+ assert str(fd) == '0023', fd
+ assert fd.as_float() == 23, fd.as_float()
+
+ def test_small_years_with_zeros(self):
+ in1 = '0023'
+ fd = parse(in1)
+ assert str(fd) == '0023', fd
+ assert fd.as_float() == 23, fd.as_float()
+
+ def test_years_with_alpha_prefix(self):
+ in1 = "p1980"
+ fd = parse(in1)
+ assert str(fd) == "1980", fd
+
diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee setup.py
--- a/setup.py
+++ b/setup.py
@@ -2,10 +2,10 @@
import sys
sys.path.insert(0, '.')
-from datautil import __version__, __doc__ as __long_description__
+from datautildate import __version__, __doc__ as __long_description__
setup(
- name='datautil-date',
+ name='datautildate',
version=__version__,
license='MIT',
description='Date Utilities for Data Work',
Repository URL: https://bitbucket.org/okfn/datautil-date/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list