[ckan-changes] commit/datautil-date: dread: [rename] datautildate to avoid conflicts with datautil.

Bitbucket commits-noreply at bitbucket.org
Tue Nov 29 13:00:38 UTC 2011


1 new commit in datautil-date:


https://bitbucket.org/okfn/datautil-date/changeset/4e8b4c556738/
changeset:   4e8b4c556738
user:        dread
date:        2011-11-29 13:54:12
summary:     [rename] datautildate to avoid conflicts with datautil.
affected #:  10 files

diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee .hgignore
--- /dev/null
+++ b/.hgignore
@@ -0,0 +1,1 @@
+.*egg-info.*
\ No newline at end of file


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/__init__.py
--- a/datautil/__init__.py
+++ /dev/null
@@ -1,1 +0,0 @@
-__version__ = '0.4'


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/date.py
--- a/datautil/date.py
+++ /dev/null
@@ -1,282 +0,0 @@
-'''Date parsing and normalization utilities based on FlexiDate.
-
-To parser dates use parse, e.g.::
-
-    parse('1890') -> FlexiDate(year=u'1890')
-    parse('1890?') -> FlexiDate(year=u'1890', qualifier='Uncertainty: 1985?')
-
-Once you have a FlexiDate you can get access to attributes (strings of course
-...)::
-
-    fd = parse('Jan 1890')
-    fd.year # u'1890'
-    fd.month # u'01'
-
-And convert to other forms:
-
-    fd.as_float() # 1890
-    fd.as_datetime() # datetime(1890,01,01)
-
-Background
-==========
-
-FlexiDate is focused on supporting:
-
-  1. Dates outside of Python (or DB) supported period (esp. dates < 0 AD)
-  2. Imprecise dates (c.1860, 18??, fl. 1534, etc)
-  3. Normalization of dates to machine processable versions
-  4. Sortable in the database (in correct date order)
-
-For more information see:
-
-http://www.rufuspollock.org/2009/06/18/flexible-dates-in-python/
-'''
-import re
-import datetime
-
-class FlexiDate(object):
-    """Store dates as strings and present them in a slightly extended version
-    of ISO8601.
-
-    Modifications:
-        * Allow a trailing qualifiers e.g. fl.
-        * Allow replacement of unknown values by ? e.g. if sometime in 1800s
-          can do 18??
-    
-    Restriction on ISO8601:
-        * Truncation (e.g. of centuries) is *not* permitted.
-        * No week and day representation e.g. 1999-W01
-    """
-    # pass
-    def __init__(self, year=None, month=None, day=None, qualifier=''):
-        # force = month or day or qualifier
-        force = False
-        self.year = self._cvt(year, rjust=4, force=force)
-        self.month = self._cvt(month)
-        self.day = self._cvt(day)
-        self.qualifier = qualifier
-         
-    def _cvt(self, val, rjust=2, force=False):
-        if val:
-            tmp = unicode(val).strip()
-            if tmp.startswith('-'):
-                tmp = '-' + tmp[1:].rjust(rjust, '0')
-            else:
-                tmp = tmp.rjust(rjust, '0')
-            return tmp
-        elif force:
-            # use '!' rather than '?' as '!' < '1' while '?' > '1'
-            return rjust * '!'
-        else:
-            return ''
-
-    def __str__(self):
-        out = self.isoformat()
-        if self.qualifier:
-            # leading space is important as ensures when no year sort in right
-            # order as ' ' < '1'
-            out += u' [%s]' % self.qualifier
-        return out
-
-    def __repr__(self):
-        return u'%s %s' % (self.__class__, self.__str__())
-
-    def isoformat(self, strict=False):
-        '''Return date in isoformat (same as __str__ but without qualifier).
-        
-        WARNING: does not replace '?' in dates unless strict=True.
-        '''
-        out = self.year
-        # what do we do when no year ...
-        for val in [ self.month, self.day ]:
-            if not val:
-                break
-            out += u'-' + val
-        if strict:
-            out = out.replace('?', '0')
-        return out
-
-    our_re_pat = '''
-        (?P<year> -?[\d?]+)
-        (?:
-                \s* - (?P<month> [\d?]{1,2})
-            (?: \s* - (?P<day> [\d?]{1,2}) )?
-        )?
-        \s*
-        (?: \[ (?P<qualifier>[^]]*) \])?
-        '''
-    our_re = re.compile(our_re_pat, re.VERBOSE)
-    @classmethod
-    def from_str(self, instr):
-        '''Undo affect of __str__'''
-        if not instr:
-            return FlexiDate()
-
-        out = self.our_re.match(instr)
-        if out is None: # no match TODO: raise Exception?
-            return None
-        else:
-            return FlexiDate(
-                    out.group('year'),
-                    out.group('month'),
-                    out.group('day'),
-                    qualifier=out.group('qualifier')
-                    )
-    
-    def as_float(self):
-        '''Get as a float (year being the integer part).
-
-        Replace '?' in year with 9 so as to be conservative (e.g. 19?? becomes
-        1999) and elsewhere (month, day) with 0
-
-        @return: float.
-        '''
-        if not self.year: return None
-        out = float(self.year.replace('?', '9'))
-        if self.month:
-            # TODO: we are assuming months are of equal length
-            out += float(self.month.replace('?', '0')) / 12.0
-            if self.day:
-                out += float(self.day.replace('?', '0')) / 365.0
-        return out
-
-    def as_datetime(self):
-        '''Get as python datetime.datetime.
-
-        Require year to be a valid datetime year. Default month and day to 1 if
-        do not exist.
-
-        @return: datetime.datetime object.
-        '''
-        year = int(self.year)
-        month = int(self.month) if self.month else 1
-        day = int(self.day) if self.day else 1
-        return datetime.datetime(year, month, day)
-
-
-def parse(date, dayfirst=True):
-    '''Parse a `date` into a `FlexiDate`.
-
-    @param date: the date to parse - may be a string, datetime.date,
-    datetime.datetime or FlexiDate.
-
-    TODO: support for quarters e.g. Q4 1980 or 1954 Q3
-    TODO: support latin stuff like M.DCC.LIII  
-    TODO: convert '-' to '?' when used that way
-        e.g. had this date [181-]
-    '''
-    if not date:
-        return None
-    if isinstance(date, FlexiDate):
-        return date
-    if isinstance(date, int):
-        return FlexiDate(year=date)
-    elif isinstance(date, datetime.date):
-        parser = PythonDateParser()
-        return parser.parse(date)
-    else: # assuming its a string
-        parser = DateutilDateParser()
-        out = parser.parse(date, **{'dayfirst': dayfirst})
-        if out is not None:
-            return out
-        # msg = 'Unable to parse %s' % date
-        # raise ValueError(date)
-        val = 'UNPARSED: %s' % date
-        val = val.encode('ascii', 'ignore')
-        return FlexiDate(qualifier=val)
-
-
-class DateParserBase(object):
-    def parse(self, date):
-        raise NotImplementedError
-
-    def norm(self, date):
-        return str(self.parse(date))
-
-class PythonDateParser(object):
-    def parse(self, date):
-        return FlexiDate(date.year, date.month, date.day)
-
-try:
-    import dateutil.parser
-    dateutil_parser = dateutil.parser.parser()
-except:
-    dateutil_parser = None
-
-class DateutilDateParser(DateParserBase):
-    _numeric = re.compile("^[0-9]+$")
-    def parse(self, date, **kwargs):
-        '''
-        :param **kwargs: any kwargs accepted by dateutil.parse function.
-        '''
-        qualifiers = []
-        if dateutil_parser is None:
-            return None
-        date = orig_date = date.strip()
-
-        # various normalizations
-        # TODO: call .lower() first
-        date = date.replace('B.C.', 'BC')
-        date = date.replace('A.D.', 'AD')
-
-        # deal with pre 0AD dates
-        if date.startswith('-') or 'BC' in date or 'B.C.' in date:
-            pre0AD = True
-        else:
-            pre0AD = False
-        # BC seems to mess up parser
-        date = date.replace('BC', '')
-
-        # deal with circa: 'c.1950' or 'c1950'
-        circa_match = re.match('([^a-zA-Z]*)c\.?\s*(\d+.*)', date)
-        if circa_match:
-            # remove circa bit
-            qualifiers.append("Note 'circa'")
-            date = ''.join(circa_match.groups())
-
-        # deal with p1980 (what does this mean? it can appear in
-        # field 008 of MARC records
-        p_match = re.match("^p(\d+)", date)
-        if p_match:
-            date = date[1:]
-
-        # Deal with uncertainty: '1985?'
-        uncertainty_match = re.match('([0-9xX]{4})\?', date)
-        if uncertainty_match:
-            # remove the ?
-            date = date[:-1]
-            qualifiers.append('Uncertainty')
-
-        # Parse the numbers intelligently
-        # do not use std parser function as creates lots of default data
-        res = dateutil_parser._parse(date, **kwargs)
-
-        if res is None:
-            # Couldn't parse it
-            return None
-        #Note: Years of less than 3 digits not interpreted by
-        #      dateutil correctly
-        #      e.g. 87 -> 1987
-        #           4  -> day 4 (no year)
-        # Both cases are handled in this routine
-        if res.year is None and res.day:
-            year = res.day
-        # If the whole date is simply two digits then dateutil_parser makes
-        # it '86' -> '1986'. So strip off the '19'. (If the date specified
-        # day/month then a two digit year is more likely to be this century
-        # and so allow the '19' prefix to it.)
-        elif self._numeric.match(date) and (len(date) == 2 or date.startswith('00')):
-            year = res.year % 100
-        else:
-            year = res.year
-
-        # finally add back in BC stuff
-        if pre0AD:
-            year = -year
-            
-        if not qualifiers:
-            qualifier = ''
-        else:
-            qualifier = ', '.join(qualifiers) + (' : %s' % orig_date)
-        return FlexiDate(year, res.month, res.day, qualifier=qualifier)
-    


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/tests/__init__.py
--- a/datautil/tests/__init__.py
+++ /dev/null
@@ -1,1 +0,0 @@
-__version__ = '0.4'


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautil/tests/test_date.py
--- a/datautil/tests/test_date.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from datautil.date import *
-
-import datetime
-
-class TestPythonStringOrdering(object):
-    # It is impossible to find a string format such that +ve and -ve numbers
-    # sort correctly as strings:
-    # if (in string ordering) X < Y => -X < -Y (False!)
-    def test_ordering(self):
-        assert '0' < '1'
-        assert '-10' < '10'
-        assert '-' < '@'
-        assert '-' < '0'
-        assert '-100' < '-X10'
-        assert '10' < '1000'
-        assert '02000' < '10000'
-        assert ' 2000' < '10000'
-
-    def test_bad_ordering(self):
-        assert ' ' < '0'
-        assert ' ' < '-'
-        assert not '-' < '+'
-        assert '-100' > '-10'
-        assert not '-100' < '-010'
-        assert not '-100' < '- 10'
-        assert not '-100' < ' -10'
-        assert '10000' < '2000'
-        assert not '-10' < ' 1'
-        
-
-class TestFlexiDate(object):
-    def test_init(self):
-        fd = FlexiDate()
-        assert fd.year == '', fd
-        assert fd.month == '', fd
-
-        fd = FlexiDate(2000, 1,1)
-        assert fd.month == '01', fd
-        assert fd.day== '01', fd
-
-    def test_str(self):
-        fd = FlexiDate(2000, 1, 23)
-        assert str(fd) == '2000-01-23', '"%s"' % fd
-        fd = FlexiDate(-2000, 1, 23)
-        assert str(fd) == '-2000-01-23'
-        fd = FlexiDate(2000)
-        assert str(fd) == '2000'
-        fd = FlexiDate(1760, qualifier='fl.')
-        assert str(fd) == '1760 [fl.]', fd
-
-        fd = FlexiDate(qualifier='anything')
-        assert str(fd) == ' [anything]'
-
-
-    def test_from_str(self):
-        def dotest(fd):
-            out = FlexiDate.from_str(str(fd))
-            assert str(out) == str(fd)
-
-        fd = FlexiDate(2000, 1, 23)
-        dotest(fd)
-        fd = FlexiDate(1760, qualifier='fl.')
-        dotest(fd)
-        fd = FlexiDate(-1760, 1, 3, qualifier='fl.')
-        dotest(fd)
-    
-    def test_as_float(self):
-        fd = FlexiDate(2000)
-        assert fd.as_float() == float(2000), fd.as_float()
-        fd = FlexiDate(1760, 1, 2)
-        exp = 1760 + 1/12.0 + 2/365.0
-        assert fd.as_float() == exp, fd.as_float()
-        fd = FlexiDate(-1000)
-        assert fd.as_float() == float(-1000)
-
-    def test_as_datetime(self):
-        fd = FlexiDate(2000)
-        out = fd.as_datetime()
-        assert out == datetime.datetime(2000, 1, 1), out
-        fd = FlexiDate(1760, 1, 2)
-        out = fd.as_datetime()
-        assert out == datetime.datetime(1760,1,2), out
-
-
-class TestDateParsers(object):
-    def test_using_datetime(self):
-        parser = PythonDateParser()
-
-        d1 = datetime.date(2000, 1, 23)
-        fd = parser.parse(d1)
-        assert fd.year == '2000'
-
-        d1 = datetime.datetime(2000, 1, 23)
-        fd = parser.parse(d1)
-        # assert str(fd) == '2000-01-23T00:00:00', fd
-        assert str(fd) == '2000-01-23', fd
-
-    def test_using_dateutil(self):
-        parser = DateutilDateParser()
-
-        in1 = '2001-02'
-        fd = parser.parse(in1)
-        assert str(fd) == in1, fd
-
-        in1 = 'March 1762'
-        fd = parser.parse(in1)
-        assert str(fd) == '1762-03'
-
-        in1 = 'March 1762'
-        fd = parser.parse(in1)
-        assert str(fd) == '1762-03'
-
-        in1 = '1768 AD'
-        fd = parser.parse(in1)
-        assert str(fd) == '1768', fd
-
-        in1 = '1768 A.D.'
-        fd = parser.parse(in1)
-        assert str(fd) == '1768', fd
-
-        in1 = '-1850'
-        fd = parser.parse(in1)
-        assert str(fd) == '-1850', fd
-
-        in1 = '1762 BC'
-        fd = parser.parse(in1)
-        assert str(fd) == '-1762', fd
-
-        in1 = '4 BC'
-        fd = parser.parse(in1)
-        assert str(fd) == '-0004', fd
-
-        in1 = '4 B.C.'
-        fd = parser.parse(in1)
-        assert str(fd) == '-0004', fd
-
-        in1 = 'Wed, 06 Jan 2010 09:30:00 GMT'
-        fd = parser.parse(in1)
-        assert str(fd) == '2010-01-06', fd
-
-        in1 = 'Tue, 07 Dec 2010 10:00:00 GMT'
-        fd = parser.parse(in1)
-        assert str(fd) == '2010-12-07', fd
-
-    def test_parse(self):
-        d1 = datetime.datetime(2000, 1, 23)
-        fd = parse(d1)
-        assert fd.year == '2000'
-
-        fd = parse('March 1762')
-        assert str(fd) == '1762-03'
-
-        fd = parse(1966)
-        assert str(fd) == '1966'
-
-        fd = parse('22/07/2010')
-        assert fd.month == '07', fd.month
-
-    def test_parse_ambiguous_day_month(self):
-        fd = parse('05/07/2010')
-        assert fd.month == '07', fd.month
-        assert fd.day == '05', fd.month
-
-    def test_parse_with_none(self):
-        d1 = parse(None)
-        assert d1 is None
-    
-    def test_parse_wildcards(self):
-        fd = parse('198?')
-        assert fd.year == '', fd.year # expect this to not parse
-        # TODO but we should have a float if possible
-#        assert fd.as_float() == u'1980', fd.as_float()
-
-    def test_parse_with_qualifiers(self):
-        fd = parse('1985?')
-        assert fd.year == u'1985', fd
-        assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier
-
-        fd = parse('c.1780')
-        assert fd.year == u'1780', fd
-        assert fd.qualifier == u"Note 'circa' : c.1780", fd
-
-        fd = parse('c. 1780')
-        assert fd.year == u'1780', fd
-        assert fd.qualifier.startswith(u"Note 'circa'"), fd
-
-    def test_ambiguous(self):
-        # TODO: have to be careful here ...
-        fd = parse('1068/1069')
-
-    def test_small_years(self):
-        in1 = '23'
-        fd = parse(in1)
-        assert str(fd) == '0023', fd
-        assert fd.as_float() == 23, fd.as_float()
-
-    def test_small_years_with_zeros(self):
-        in1 = '0023'
-        fd = parse(in1)
-        assert str(fd) == '0023', fd
-        assert fd.as_float() == 23, fd.as_float()
-
-    def test_years_with_alpha_prefix(self):
-        in1 = "p1980"
-        fd = parse(in1)
-        assert str(fd) == "1980", fd
-        


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/__init__.py
--- /dev/null
+++ b/datautildate/__init__.py
@@ -0,0 +1,1 @@
+__version__ = '0.4'


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/date.py
--- /dev/null
+++ b/datautildate/date.py
@@ -0,0 +1,282 @@
+'''Date parsing and normalization utilities based on FlexiDate.
+
+To parser dates use parse, e.g.::
+
+    parse('1890') -> FlexiDate(year=u'1890')
+    parse('1890?') -> FlexiDate(year=u'1890', qualifier='Uncertainty: 1985?')
+
+Once you have a FlexiDate you can get access to attributes (strings of course
+...)::
+
+    fd = parse('Jan 1890')
+    fd.year # u'1890'
+    fd.month # u'01'
+
+And convert to other forms:
+
+    fd.as_float() # 1890
+    fd.as_datetime() # datetime(1890,01,01)
+
+Background
+==========
+
+FlexiDate is focused on supporting:
+
+  1. Dates outside of Python (or DB) supported period (esp. dates < 0 AD)
+  2. Imprecise dates (c.1860, 18??, fl. 1534, etc)
+  3. Normalization of dates to machine processable versions
+  4. Sortable in the database (in correct date order)
+
+For more information see:
+
+http://www.rufuspollock.org/2009/06/18/flexible-dates-in-python/
+'''
+import re
+import datetime
+
+class FlexiDate(object):
+    """Store dates as strings and present them in a slightly extended version
+    of ISO8601.
+
+    Modifications:
+        * Allow a trailing qualifiers e.g. fl.
+        * Allow replacement of unknown values by ? e.g. if sometime in 1800s
+          can do 18??
+    
+    Restriction on ISO8601:
+        * Truncation (e.g. of centuries) is *not* permitted.
+        * No week and day representation e.g. 1999-W01
+    """
+    # pass
+    def __init__(self, year=None, month=None, day=None, qualifier=''):
+        # force = month or day or qualifier
+        force = False
+        self.year = self._cvt(year, rjust=4, force=force)
+        self.month = self._cvt(month)
+        self.day = self._cvt(day)
+        self.qualifier = qualifier
+         
+    def _cvt(self, val, rjust=2, force=False):
+        if val:
+            tmp = unicode(val).strip()
+            if tmp.startswith('-'):
+                tmp = '-' + tmp[1:].rjust(rjust, '0')
+            else:
+                tmp = tmp.rjust(rjust, '0')
+            return tmp
+        elif force:
+            # use '!' rather than '?' as '!' < '1' while '?' > '1'
+            return rjust * '!'
+        else:
+            return ''
+
+    def __str__(self):
+        out = self.isoformat()
+        if self.qualifier:
+            # leading space is important as ensures when no year sort in right
+            # order as ' ' < '1'
+            out += u' [%s]' % self.qualifier
+        return out
+
+    def __repr__(self):
+        return u'%s %s' % (self.__class__, self.__str__())
+
+    def isoformat(self, strict=False):
+        '''Return date in isoformat (same as __str__ but without qualifier).
+        
+        WARNING: does not replace '?' in dates unless strict=True.
+        '''
+        out = self.year
+        # what do we do when no year ...
+        for val in [ self.month, self.day ]:
+            if not val:
+                break
+            out += u'-' + val
+        if strict:
+            out = out.replace('?', '0')
+        return out
+
+    our_re_pat = '''
+        (?P<year> -?[\d?]+)
+        (?:
+                \s* - (?P<month> [\d?]{1,2})
+            (?: \s* - (?P<day> [\d?]{1,2}) )?
+        )?
+        \s*
+        (?: \[ (?P<qualifier>[^]]*) \])?
+        '''
+    our_re = re.compile(our_re_pat, re.VERBOSE)
+    @classmethod
+    def from_str(self, instr):
+        '''Undo affect of __str__'''
+        if not instr:
+            return FlexiDate()
+
+        out = self.our_re.match(instr)
+        if out is None: # no match TODO: raise Exception?
+            return None
+        else:
+            return FlexiDate(
+                    out.group('year'),
+                    out.group('month'),
+                    out.group('day'),
+                    qualifier=out.group('qualifier')
+                    )
+    
+    def as_float(self):
+        '''Get as a float (year being the integer part).
+
+        Replace '?' in year with 9 so as to be conservative (e.g. 19?? becomes
+        1999) and elsewhere (month, day) with 0
+
+        @return: float.
+        '''
+        if not self.year: return None
+        out = float(self.year.replace('?', '9'))
+        if self.month:
+            # TODO: we are assuming months are of equal length
+            out += float(self.month.replace('?', '0')) / 12.0
+            if self.day:
+                out += float(self.day.replace('?', '0')) / 365.0
+        return out
+
+    def as_datetime(self):
+        '''Get as python datetime.datetime.
+
+        Require year to be a valid datetime year. Default month and day to 1 if
+        do not exist.
+
+        @return: datetime.datetime object.
+        '''
+        year = int(self.year)
+        month = int(self.month) if self.month else 1
+        day = int(self.day) if self.day else 1
+        return datetime.datetime(year, month, day)
+
+
+def parse(date, dayfirst=True):
+    '''Parse a `date` into a `FlexiDate`.
+
+    @param date: the date to parse - may be a string, datetime.date,
+    datetime.datetime or FlexiDate.
+
+    TODO: support for quarters e.g. Q4 1980 or 1954 Q3
+    TODO: support latin stuff like M.DCC.LIII  
+    TODO: convert '-' to '?' when used that way
+        e.g. had this date [181-]
+    '''
+    if not date:
+        return None
+    if isinstance(date, FlexiDate):
+        return date
+    if isinstance(date, int):
+        return FlexiDate(year=date)
+    elif isinstance(date, datetime.date):
+        parser = PythonDateParser()
+        return parser.parse(date)
+    else: # assuming its a string
+        parser = DateutilDateParser()
+        out = parser.parse(date, **{'dayfirst': dayfirst})
+        if out is not None:
+            return out
+        # msg = 'Unable to parse %s' % date
+        # raise ValueError(date)
+        val = 'UNPARSED: %s' % date
+        val = val.encode('ascii', 'ignore')
+        return FlexiDate(qualifier=val)
+
+
+class DateParserBase(object):
+    def parse(self, date):
+        raise NotImplementedError
+
+    def norm(self, date):
+        return str(self.parse(date))
+
+class PythonDateParser(object):
+    def parse(self, date):
+        return FlexiDate(date.year, date.month, date.day)
+
+try:
+    import dateutil.parser
+    dateutil_parser = dateutil.parser.parser()
+except:
+    dateutil_parser = None
+
+class DateutilDateParser(DateParserBase):
+    _numeric = re.compile("^[0-9]+$")
+    def parse(self, date, **kwargs):
+        '''
+        :param **kwargs: any kwargs accepted by dateutil.parse function.
+        '''
+        qualifiers = []
+        if dateutil_parser is None:
+            return None
+        date = orig_date = date.strip()
+
+        # various normalizations
+        # TODO: call .lower() first
+        date = date.replace('B.C.', 'BC')
+        date = date.replace('A.D.', 'AD')
+
+        # deal with pre 0AD dates
+        if date.startswith('-') or 'BC' in date or 'B.C.' in date:
+            pre0AD = True
+        else:
+            pre0AD = False
+        # BC seems to mess up parser
+        date = date.replace('BC', '')
+
+        # deal with circa: 'c.1950' or 'c1950'
+        circa_match = re.match('([^a-zA-Z]*)c\.?\s*(\d+.*)', date)
+        if circa_match:
+            # remove circa bit
+            qualifiers.append("Note 'circa'")
+            date = ''.join(circa_match.groups())
+
+        # deal with p1980 (what does this mean? it can appear in
+        # field 008 of MARC records
+        p_match = re.match("^p(\d+)", date)
+        if p_match:
+            date = date[1:]
+
+        # Deal with uncertainty: '1985?'
+        uncertainty_match = re.match('([0-9xX]{4})\?', date)
+        if uncertainty_match:
+            # remove the ?
+            date = date[:-1]
+            qualifiers.append('Uncertainty')
+
+        # Parse the numbers intelligently
+        # do not use std parser function as creates lots of default data
+        res = dateutil_parser._parse(date, **kwargs)
+
+        if res is None:
+            # Couldn't parse it
+            return None
+        #Note: Years of less than 3 digits not interpreted by
+        #      dateutil correctly
+        #      e.g. 87 -> 1987
+        #           4  -> day 4 (no year)
+        # Both cases are handled in this routine
+        if res.year is None and res.day:
+            year = res.day
+        # If the whole date is simply two digits then dateutil_parser makes
+        # it '86' -> '1986'. So strip off the '19'. (If the date specified
+        # day/month then a two digit year is more likely to be this century
+        # and so allow the '19' prefix to it.)
+        elif self._numeric.match(date) and (len(date) == 2 or date.startswith('00')):
+            year = res.year % 100
+        else:
+            year = res.year
+
+        # finally add back in BC stuff
+        if pre0AD:
+            year = -year
+            
+        if not qualifiers:
+            qualifier = ''
+        else:
+            qualifier = ', '.join(qualifiers) + (' : %s' % orig_date)
+        return FlexiDate(year, res.month, res.day, qualifier=qualifier)
+    


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/tests/__init__.py
--- /dev/null
+++ b/datautildate/tests/__init__.py
@@ -0,0 +1,1 @@
+__version__ = '0.4'


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee datautildate/tests/test_date.py
--- /dev/null
+++ b/datautildate/tests/test_date.py
@@ -0,0 +1,207 @@
+from datautildate.date import *
+
+import datetime
+
+class TestPythonStringOrdering(object):
+    # It is impossible to find a string format such that +ve and -ve numbers
+    # sort correctly as strings:
+    # if (in string ordering) X < Y => -X < -Y (False!)
+    def test_ordering(self):
+        assert '0' < '1'
+        assert '-10' < '10'
+        assert '-' < '@'
+        assert '-' < '0'
+        assert '-100' < '-X10'
+        assert '10' < '1000'
+        assert '02000' < '10000'
+        assert ' 2000' < '10000'
+
+    def test_bad_ordering(self):
+        assert ' ' < '0'
+        assert ' ' < '-'
+        assert not '-' < '+'
+        assert '-100' > '-10'
+        assert not '-100' < '-010'
+        assert not '-100' < '- 10'
+        assert not '-100' < ' -10'
+        assert '10000' < '2000'
+        assert not '-10' < ' 1'
+        
+
+class TestFlexiDate(object):
+    def test_init(self):
+        fd = FlexiDate()
+        assert fd.year == '', fd
+        assert fd.month == '', fd
+
+        fd = FlexiDate(2000, 1,1)
+        assert fd.month == '01', fd
+        assert fd.day== '01', fd
+
+    def test_str(self):
+        fd = FlexiDate(2000, 1, 23)
+        assert str(fd) == '2000-01-23', '"%s"' % fd
+        fd = FlexiDate(-2000, 1, 23)
+        assert str(fd) == '-2000-01-23'
+        fd = FlexiDate(2000)
+        assert str(fd) == '2000'
+        fd = FlexiDate(1760, qualifier='fl.')
+        assert str(fd) == '1760 [fl.]', fd
+
+        fd = FlexiDate(qualifier='anything')
+        assert str(fd) == ' [anything]'
+
+
+    def test_from_str(self):
+        def dotest(fd):
+            out = FlexiDate.from_str(str(fd))
+            assert str(out) == str(fd)
+
+        fd = FlexiDate(2000, 1, 23)
+        dotest(fd)
+        fd = FlexiDate(1760, qualifier='fl.')
+        dotest(fd)
+        fd = FlexiDate(-1760, 1, 3, qualifier='fl.')
+        dotest(fd)
+    
+    def test_as_float(self):
+        fd = FlexiDate(2000)
+        assert fd.as_float() == float(2000), fd.as_float()
+        fd = FlexiDate(1760, 1, 2)
+        exp = 1760 + 1/12.0 + 2/365.0
+        assert fd.as_float() == exp, fd.as_float()
+        fd = FlexiDate(-1000)
+        assert fd.as_float() == float(-1000)
+
+    def test_as_datetime(self):
+        fd = FlexiDate(2000)
+        out = fd.as_datetime()
+        assert out == datetime.datetime(2000, 1, 1), out
+        fd = FlexiDate(1760, 1, 2)
+        out = fd.as_datetime()
+        assert out == datetime.datetime(1760,1,2), out
+
+
+class TestDateParsers(object):
+    def test_using_datetime(self):
+        parser = PythonDateParser()
+
+        d1 = datetime.date(2000, 1, 23)
+        fd = parser.parse(d1)
+        assert fd.year == '2000'
+
+        d1 = datetime.datetime(2000, 1, 23)
+        fd = parser.parse(d1)
+        # assert str(fd) == '2000-01-23T00:00:00', fd
+        assert str(fd) == '2000-01-23', fd
+
+    def test_using_dateutil(self):
+        parser = DateutilDateParser()
+
+        in1 = '2001-02'
+        fd = parser.parse(in1)
+        assert str(fd) == in1, fd
+
+        in1 = 'March 1762'
+        fd = parser.parse(in1)
+        assert str(fd) == '1762-03'
+
+        in1 = 'March 1762'
+        fd = parser.parse(in1)
+        assert str(fd) == '1762-03'
+
+        in1 = '1768 AD'
+        fd = parser.parse(in1)
+        assert str(fd) == '1768', fd
+
+        in1 = '1768 A.D.'
+        fd = parser.parse(in1)
+        assert str(fd) == '1768', fd
+
+        in1 = '-1850'
+        fd = parser.parse(in1)
+        assert str(fd) == '-1850', fd
+
+        in1 = '1762 BC'
+        fd = parser.parse(in1)
+        assert str(fd) == '-1762', fd
+
+        in1 = '4 BC'
+        fd = parser.parse(in1)
+        assert str(fd) == '-0004', fd
+
+        in1 = '4 B.C.'
+        fd = parser.parse(in1)
+        assert str(fd) == '-0004', fd
+
+        in1 = 'Wed, 06 Jan 2010 09:30:00 GMT'
+        fd = parser.parse(in1)
+        assert str(fd) == '2010-01-06', fd
+
+        in1 = 'Tue, 07 Dec 2010 10:00:00 GMT'
+        fd = parser.parse(in1)
+        assert str(fd) == '2010-12-07', fd
+
+    def test_parse(self):
+        d1 = datetime.datetime(2000, 1, 23)
+        fd = parse(d1)
+        assert fd.year == '2000'
+
+        fd = parse('March 1762')
+        assert str(fd) == '1762-03'
+
+        fd = parse(1966)
+        assert str(fd) == '1966'
+
+        fd = parse('22/07/2010')
+        assert fd.month == '07', fd.month
+
+    def test_parse_ambiguous_day_month(self):
+        fd = parse('05/07/2010')
+        assert fd.month == '07', fd.month
+        assert fd.day == '05', fd.month
+
+    def test_parse_with_none(self):
+        d1 = parse(None)
+        assert d1 is None
+    
+    def test_parse_wildcards(self):
+        fd = parse('198?')
+        assert fd.year == '', fd.year # expect this to not parse
+        # TODO but we should have a float if possible
+#        assert fd.as_float() == u'1980', fd.as_float()
+
+    def test_parse_with_qualifiers(self):
+        fd = parse('1985?')
+        assert fd.year == u'1985', fd
+        assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier
+
+        fd = parse('c.1780')
+        assert fd.year == u'1780', fd
+        assert fd.qualifier == u"Note 'circa' : c.1780", fd
+
+        fd = parse('c. 1780')
+        assert fd.year == u'1780', fd
+        assert fd.qualifier.startswith(u"Note 'circa'"), fd
+
+    def test_ambiguous(self):
+        # TODO: have to be careful here ...
+        fd = parse('1068/1069')
+
+    def test_small_years(self):
+        in1 = '23'
+        fd = parse(in1)
+        assert str(fd) == '0023', fd
+        assert fd.as_float() == 23, fd.as_float()
+
+    def test_small_years_with_zeros(self):
+        in1 = '0023'
+        fd = parse(in1)
+        assert str(fd) == '0023', fd
+        assert fd.as_float() == 23, fd.as_float()
+
+    def test_years_with_alpha_prefix(self):
+        in1 = "p1980"
+        fd = parse(in1)
+        assert str(fd) == "1980", fd
+        


diff -r 2dc27829de2081ae342d497e14e52eda46e502ee -r 4e8b4c55673821b3519b0daf570bacc135ff19ee setup.py
--- a/setup.py
+++ b/setup.py
@@ -2,10 +2,10 @@
 
 import sys
 sys.path.insert(0, '.')
-from datautil import __version__, __doc__ as __long_description__
+from datautildate import __version__, __doc__ as __long_description__
 
 setup(
-    name='datautil-date',
+    name='datautildate',
     version=__version__,
     license='MIT',
     description='Date Utilities for Data Work',

Repository URL: https://bitbucket.org/okfn/datautil-date/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list