[ckan-changes] commit/ckanext-inspire: 2 new changesets

Bitbucket commits-noreply at bitbucket.org
Tue May 10 15:07:27 UTC 2011


2 new changesets in ckanext-inspire:

http://bitbucket.org/okfn/ckanext-inspire/changeset/bf36327a7e16/
changeset:   r24:bf36327a7e16
user:        amercader
date:        2011-05-10 13:31:13
summary:     Store the reference date when importing the object, and use this date to check if it needs to be updated
affected #:  2 files (603 bytes)

--- a/ckanext/inspire/harvesters.py	Tue May 10 10:53:33 2011 +0100
+++ b/ckanext/inspire/harvesters.py	Tue May 10 12:31:13 2011 +0100
@@ -10,7 +10,7 @@
 '''
 from lxml import etree
 import urllib2
-
+from datetime import datetime
 
 import logging
 log = logging.getLogger(__name__)
@@ -148,18 +148,39 @@
         '''Create or update a Package based on some content that has
         come from a URL.
         '''
-        # Look for previously harvested document matching Gemini GUID
         package = None
         gemini_document = GeminiDocument(content)
         gemini_values = gemini_document.read_values()
         gemini_guid = gemini_values['guid']
 
+        # Save the metadata reference date in the Harvest Object
+        try:
+            reference_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%d')
+        except ValueError:
+            try:
+                reference_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%dT%H:%M:%S')
+            except:
+                raise Exception('Could not extract reference date for GUID %s (%s)' \
+                        % (gemini_guid,gemini_values['metadata-date']))
+
+        self.obj.reference_date = reference_date
+        self.obj.save()
+
+        # Look for previously harvested document matching Gemini GUID
         harvested_objects = Session.query(HarvestObject) \
                             .filter(HarvestObject.guid==gemini_guid) \
                             .filter(HarvestObject.package!=None) \
-                            .order_by(HarvestObject.created.desc()).all()
+                            .order_by(HarvestObject.reference_date.desc()).all()
 
-        last_harvested_object = harvested_objects[0] if len(harvested_objects) > 0 else None
+        if len(harvested_objects):
+            #SA returns nulls first.
+            last_harvested_object = harvested_objects[0]
+            for ho in harvested_objects:
+                if ho.reference_date:
+                    last_harvested_object = ho
+                    break
+        else:
+            last_harvested_object = None
 
         if last_harvested_object:
             # We've previously harvested this (i.e. it's an update)
@@ -179,23 +200,19 @@
                             gemini_guid,
                         ))
 
-            last_gemini_document = GeminiDocument(last_harvested_object.content)
-            last_gemini_values = last_gemini_document.read_values()
-
             # Use reference date instead of content to determine if the package
             # needs to be updated
-            if last_gemini_values['date-updated'] == gemini_values['date-updated'] and \
-               last_gemini_values['date-released'] == gemini_values['date-released'] and \
-               last_gemini_values['date-created'] == gemini_values['date-created']:
-
+            if last_harvested_object.reference_date < self.obj.reference_date \
+                or last_harvested_object.reference_date is None:
+                log.info('Package for %s needs to be created or updated' % gemini_guid)
+                package = last_harvested_object.package
+            else:
                 if last_harvested_object.content != self.obj.content:
                     raise Exception('The contents of document with GUID %s changed, but the reference date has not been updated' % gemini_guid)
                 else:
                     # The content hasn't changed, no need to update the package
                     log.info('Document with GUID %s unchanged, skipping...' % (gemini_guid))
                 return None
-            log.info('Package for %s needs to be created or updated' % gemini_guid)
-            package = last_harvested_object.package
         else:
             log.info('No package with GEMINI guid %s found, let''s create one' % gemini_guid)
 


--- a/ckanext/inspire/model/__init__.py	Tue May 10 10:53:33 2011 +0100
+++ b/ckanext/inspire/model/__init__.py	Tue May 10 12:31:13 2011 +0100
@@ -299,8 +299,8 @@
         GeminiElement(
             name="metadata-date",
             search_paths=[
+                "gmd:dateStamp/gco:DateTime/text()",
                 "gmd:dateStamp/gco:Date/text()",
-                "gmd:dateStamp/gco:DateTime/text()",
             ],
             multiplicity="1",
         ),


http://bitbucket.org/okfn/ckanext-inspire/changeset/2ce9616576f7/
changeset:   r25:2ce9616576f7
user:        amercader
date:        2011-05-10 14:23:14
summary:     Update API calls to use reference_date
affected #:  1 file (471 bytes)

--- a/ckanext/inspire/controllers/api.py	Tue May 10 12:31:13 2011 +0100
+++ b/ckanext/inspire/controllers/api.py	Tue May 10 13:23:14 2011 +0100
@@ -15,12 +15,21 @@
 class ApiController(BaseApiController):
 
     def _get_harvest_object(self,guid):
-        return Session.query(HarvestObject) \
+        obj = Session.query(HarvestObject) \
                         .filter(HarvestObject.guid==guid) \
                         .filter(HarvestObject.package!=None) \
-                        .order_by(HarvestObject.created.desc()) \
+                        .filter(HarvestObject.reference_date!=None) \
+                        .order_by(HarvestObject.reference_date.desc()) \
                         .limit(1).first()
-       
+        if not obj:
+            #Just in case reference_dates have not been yet set up
+            obj = Session.query(HarvestObject) \
+                            .filter(HarvestObject.guid==guid) \
+                            .filter(HarvestObject.package!=None) \
+                            .order_by(HarvestObject.created.desc()) \
+                            .limit(1).first()
+        return obj
+
     def display_xml(self, guid):
         doc = self._get_harvest_object(guid)

Repository URL: https://bitbucket.org/okfn/ckanext-inspire/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list