[ckan-changes] commit/ckanext-inspire: 2 new changesets
Bitbucket
commits-noreply at bitbucket.org
Tue May 10 15:07:27 UTC 2011
2 new changesets in ckanext-inspire:
http://bitbucket.org/okfn/ckanext-inspire/changeset/bf36327a7e16/
changeset: r24:bf36327a7e16
user: amercader
date: 2011-05-10 13:31:13
summary: Store the reference date when importing the object, and use this date to check if it needs to be updated
affected #: 2 files (603 bytes)
--- a/ckanext/inspire/harvesters.py Tue May 10 10:53:33 2011 +0100
+++ b/ckanext/inspire/harvesters.py Tue May 10 12:31:13 2011 +0100
@@ -10,7 +10,7 @@
'''
from lxml import etree
import urllib2
-
+from datetime import datetime
import logging
log = logging.getLogger(__name__)
@@ -148,18 +148,39 @@
'''Create or update a Package based on some content that has
come from a URL.
'''
- # Look for previously harvested document matching Gemini GUID
package = None
gemini_document = GeminiDocument(content)
gemini_values = gemini_document.read_values()
gemini_guid = gemini_values['guid']
+ # Save the metadata reference date in the Harvest Object
+ try:
+ reference_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%d')
+ except ValueError:
+ try:
+ reference_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%dT%H:%M:%S')
+ except:
+ raise Exception('Could not extract reference date for GUID %s (%s)' \
+ % (gemini_guid,gemini_values['metadata-date']))
+
+ self.obj.reference_date = reference_date
+ self.obj.save()
+
+ # Look for previously harvested document matching Gemini GUID
harvested_objects = Session.query(HarvestObject) \
.filter(HarvestObject.guid==gemini_guid) \
.filter(HarvestObject.package!=None) \
- .order_by(HarvestObject.created.desc()).all()
+ .order_by(HarvestObject.reference_date.desc()).all()
- last_harvested_object = harvested_objects[0] if len(harvested_objects) > 0 else None
+ if len(harvested_objects):
+ #SA returns nulls first.
+ last_harvested_object = harvested_objects[0]
+ for ho in harvested_objects:
+ if ho.reference_date:
+ last_harvested_object = ho
+ break
+ else:
+ last_harvested_object = None
if last_harvested_object:
# We've previously harvested this (i.e. it's an update)
@@ -179,23 +200,19 @@
gemini_guid,
))
- last_gemini_document = GeminiDocument(last_harvested_object.content)
- last_gemini_values = last_gemini_document.read_values()
-
# Use reference date instead of content to determine if the package
# needs to be updated
- if last_gemini_values['date-updated'] == gemini_values['date-updated'] and \
- last_gemini_values['date-released'] == gemini_values['date-released'] and \
- last_gemini_values['date-created'] == gemini_values['date-created']:
-
+ if last_harvested_object.reference_date < self.obj.reference_date \
+ or last_harvested_object.reference_date is None:
+ log.info('Package for %s needs to be created or updated' % gemini_guid)
+ package = last_harvested_object.package
+ else:
if last_harvested_object.content != self.obj.content:
raise Exception('The contents of document with GUID %s changed, but the reference date has not been updated' % gemini_guid)
else:
# The content hasn't changed, no need to update the package
log.info('Document with GUID %s unchanged, skipping...' % (gemini_guid))
return None
- log.info('Package for %s needs to be created or updated' % gemini_guid)
- package = last_harvested_object.package
else:
log.info('No package with GEMINI guid %s found, let''s create one' % gemini_guid)
--- a/ckanext/inspire/model/__init__.py Tue May 10 10:53:33 2011 +0100
+++ b/ckanext/inspire/model/__init__.py Tue May 10 12:31:13 2011 +0100
@@ -299,8 +299,8 @@
GeminiElement(
name="metadata-date",
search_paths=[
+ "gmd:dateStamp/gco:DateTime/text()",
"gmd:dateStamp/gco:Date/text()",
- "gmd:dateStamp/gco:DateTime/text()",
],
multiplicity="1",
),
http://bitbucket.org/okfn/ckanext-inspire/changeset/2ce9616576f7/
changeset: r25:2ce9616576f7
user: amercader
date: 2011-05-10 14:23:14
summary: Update API calls to use reference_date
affected #: 1 file (471 bytes)
--- a/ckanext/inspire/controllers/api.py Tue May 10 12:31:13 2011 +0100
+++ b/ckanext/inspire/controllers/api.py Tue May 10 13:23:14 2011 +0100
@@ -15,12 +15,21 @@
class ApiController(BaseApiController):
def _get_harvest_object(self,guid):
- return Session.query(HarvestObject) \
+ obj = Session.query(HarvestObject) \
.filter(HarvestObject.guid==guid) \
.filter(HarvestObject.package!=None) \
- .order_by(HarvestObject.created.desc()) \
+ .filter(HarvestObject.reference_date!=None) \
+ .order_by(HarvestObject.reference_date.desc()) \
.limit(1).first()
-
+ if not obj:
+ #Just in case reference_dates have not been yet set up
+ obj = Session.query(HarvestObject) \
+ .filter(HarvestObject.guid==guid) \
+ .filter(HarvestObject.package!=None) \
+ .order_by(HarvestObject.created.desc()) \
+ .limit(1).first()
+ return obj
+
def display_xml(self, guid):
doc = self._get_harvest_object(guid)
Repository URL: https://bitbucket.org/okfn/ckanext-inspire/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list