[ckan-changes] commit/ckanext-inspire: 2 new changesets
Bitbucket
commits-noreply at bitbucket.org
Thu Sep 29 14:52:01 UTC 2011
2 new changesets in ckanext-inspire:
http://bitbucket.org/okfn/ckanext-inspire/changeset/17d41759c0d2/
changeset: 17d41759c0d2
branch: spatial-refactoring
user: amercader
date: 2011-09-29 16:48:30
summary: Update the harvester to use the logic functions and store the spatial extra
The harvester no longer explicitly stores the extents, a 'spatial' extra is
created and ckanext-spatial takes care of all. Logic functions are now used
to create and update packages.
affected #: 1 file (-1 bytes)
--- a/ckanext/inspire/harvesters.py Thu Sep 08 09:54:05 2011 +0100
+++ b/ckanext/inspire/harvesters.py Thu Sep 29 15:48:30 2011 +0100
@@ -12,17 +12,25 @@
import urllib2
from urlparse import urlparse
from datetime import datetime
+from string import Template
+from numbers import Number
import logging
log = logging.getLogger(__name__)
from pylons import config
from sqlalchemy.exc import InvalidRequestError
+
+from ckan import model
from ckan.model import Session, repo, \
Package, Resource, PackageExtra, \
- setup_default_user_roles
+ setup_default_user_roles, make_uuid
from ckan.lib.munge import munge_title_to_name
from ckan.plugins.core import SingletonPlugin, implements
+from ckan.lib.helpers import json
+from ckan.logic import get_action, ValidationError
+from ckan.logic.schema import default_package_schema, default_tags_schema
+from ckan.lib.navl.validators import not_empty
from ckanext.harvest.interfaces import IHarvester
from ckanext.harvest.model import HarvestObject, HarvestGatherError, \
@@ -33,13 +41,6 @@
from owslib import wms
try:
- from ckanext.spatial.lib import save_extent
- save_extents = True
-except ImportError:
- log.error('No spatial support installed -- install ckanext-spatial if you want to support spatial queries')
- save_extents = False
-
-try:
from ckanext.csw.services import CswService
from ckanext.csw.validation import Validator
from owslib.csw import namespaces
@@ -55,6 +56,10 @@
force_import = False
+ extent_template = Template('''
+ {"type":"Polygon","coordinates":[[[$minx, $miny],[$minx, $maxy], [$maxx, $maxy], [$maxx, $miny], [$minx, $miny]]]}
+ ''')
+
def _is_wms(self,url):
try:
s = wms.WebMapService(url)
@@ -241,7 +246,7 @@
license_url_extracted = self._extract_first_license_url(extras['licence'])
if license_url_extracted:
extras['licence_url'] = license_url_extracted
-
+
extras['access_constraints'] = gemini_values.get('limitations-on-public-access','')
if gemini_values.has_key('temporal-extent-begin'):
#gemini_values['temporal-extent-begin'].sort()
@@ -259,37 +264,50 @@
else:
parties[responsible_party['organisation-name']] = [responsible_party['role']]
parties_extra = []
- for party_name in parties:
+ for party_name in parties:
parties_extra.append('%s (%s)' % (party_name, ', '.join(parties[party_name])))
extras['responsible-party'] = '; '.join(parties_extra)
+ # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry
+ extent_string = self.extent_template.substitute(
+ minx = extras['bbox-east-long'],
+ miny = extras['bbox-south-lat'],
+ maxx = extras['bbox-west-long'],
+ maxy = extras['bbox-north-lat']
+ )
+
+ extras['spatial'] = extent_string.strip()
+
tags = []
for tag in gemini_values['tags']:
tag = tag[:50] if len(tag) > 50 else tag
- tags.append(tag)
+ tags.append({'name':tag})
- package_data = {
+ package_dict = {
'title': gemini_values['title'],
'notes': gemini_values['abstract'],
- 'extras': extras,
'tags': tags,
}
+
if package is None or package.title != gemini_values['title']:
name = self.gen_new_name(gemini_values['title'])
if not name:
name = self.gen_new_name(str(gemini_guid))
if not name:
raise Exception('Could not generate a unique name from the title or the GUID. Please choose a more unique title.')
- package_data['name'] = name
+ package_dict['name'] = name
+ else:
+ package_dict['name'] = package.name
+
resource_locator = gemini_values.get('resource-locator', []) and gemini_values['resource-locator'][0].get('url') or ''
if resource_locator:
if extras['resource-type'] == 'service':
_format = 'WMS' if self._is_wms(resource_locator) else 'Unverified'
- else:
+ else:
_format = 'Unverified'
- package_data['resources'] = [
+ package_dict['resources'] = [
{
'url': resource_locator,
'description': 'Resource locator',
@@ -297,26 +315,28 @@
},
]
+ extras_as_dict = []
+ for key,value in extras.iteritems():
+ if isinstance(value,(basestring,Number)):
+ extras_as_dict.append({'key':key,'value':value})
+ else:
+ extras_as_dict.append({'key':key,'value':json.dumps(value)})
+
+ package_dict['extras'] = extras_as_dict
+
if package == None:
# Create new package from data.
- package = self._create_package_from_data(package_data)
+ package = self._create_package_from_data(package_dict)
log.info('Created new package ID %s with GEMINI guid %s', package.id, gemini_guid)
+
+ # Set reference to package in the HarvestObject
+ self.obj.package = package
+ self.obj.save()
+
else:
- package = self._create_package_from_data(package_data, package = package)
+ package = self._create_package_from_data(package_dict, package = package)
log.info('Updated existing package ID %s with existing GEMINI guid %s', package.id, gemini_guid)
- # Set reference to package in the HarvestObject
- self.obj.package = package
- self.obj.save()
-
- # Save spatial extent
- if package.extras.get('bbox-east-long') and save_extents:
- try:
- save_extent(package)
- except:
- log.error('There was an error saving the package extent. Have you set up the package_extent table in the DB?')
- raise
-
assert gemini_guid == package.harvest_objects[0].guid
return package
@@ -344,77 +364,61 @@
return licence
return None
- def _create_package_from_data(self, package_data, package = None):
+ def _create_package_from_data(self, package_dict, package = None):
'''
- {'extras': {'INSPIRE': 'True',
- 'bbox-east-long': '-3.12442',
- 'bbox-north-lat': '54.218407',
- 'bbox-south-lat': '54.039634',
- 'bbox-west-long': '-3.32485',
- 'constraint': 'conditions unknown; (e) intellectual property rights;',
- 'dataset-reference-date': [{'type': 'creation',
- 'value': '2008-10-10'},
- {'type': 'revision',
- 'value': '2009-10-08'}],
- 'guid': '00a743bf-cca4-4c19-a8e5-e64f7edbcadd',
- 'metadata-date': '2009-10-16',
- 'metadata-language': 'eng',
- 'published_by': 0,
- 'resource-type': 'dataset',
- 'spatial-reference-system': '<gmd:MD_ReferenceSystem xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gco="http://www.isotc211.org/2005/gco" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:xlink="http://www.w3.org/1999/xlink"><gmd:referenceSystemIdentifier><gmd:RS_Identifier><gmd:code><gco:CharacterString>urn:ogc:def:crs:EPSG::27700</gco:CharacterString></gmd:code></gmd:RS_Identifier></gmd:referenceSystemIdentifier></gmd:MD_ReferenceSystem>',
- 'temporal_coverage-from': '1977-03-10T11:45:30',
- 'temporal_coverage-to': '2005-01-15T09:10:00'},
- 'name': 'council-owned-litter-bins',
+ {'name': 'council-owned-litter-bins',
'notes': 'Location of Council owned litter bins within Borough.',
'resources': [{'description': 'Resource locator',
'format': 'Unverified',
'url': 'http://www.barrowbc.gov.uk'}],
- 'tags': ['Utility and governmental services'],
- 'title': 'Council Owned Litter Bins'}
+ 'tags': [{'name':'Utility and governmental services'}],
+ 'title': 'Council Owned Litter Bins',
+ 'extras': [{'key':'INSPIRE','value':'True'},
+ {'key':'bbox-east-long','value': '-3.12442'},
+ {'key':'bbox-north-lat','value': '54.218407'},
+ {'key':'bbox-south-lat','value': '54.039634'},
+ {'key':'bbox-west-long','value': '-3.32485'},
+ # etc.
+ ]
+ }
'''
+ # The default package schema does not like Upper case tags
+ tag_schema = default_tags_schema()
+ package_schema = default_package_schema()
+
+ tag_schema['name'] = [not_empty,unicode]
+ package_schema['tags'] = tag_schema
+
+ # TODO: user
+ context = {'model':model,
+ 'session':Session,
+ 'user':'harvest',
+ 'schema':package_schema,
+ 'extras_as_string':True}
+
if not package:
- package = Package()
+ # We need to explicitly provide a package ID, otherwise ckanext-spatial
+ # won't be be able to link the extent to the package.
+ package_dict['id'] = make_uuid()
+ package_schema['id'] = [unicode]
- rev = repo.new_revision()
+ action_function = get_action('package_create')
+ else:
+ action_function = get_action('package_update')
+ package_dict['id'] = package.id
- relationship_attr = ['extras', 'resources', 'tags']
+ try:
+ package_dict = action_function(context, package_dict)
+ except ValidationError,e:
+ import pdb; pdb.set_trace()
+ raise Exception('Validation Error: %s' % str(e.error_summary))
+ except Exception, e:
+ import pdb; pdb.set_trace()
+ raise e
- package_properties = {}
- for key, value in package_data.iteritems():
- if key not in relationship_attr:
- setattr(package, key, value)
-
- tags = package_data.get('tags', [])
-
- for tag in tags:
- package.add_tag_by_name(tag, autoflush=False)
-
- for resource_dict in package_data.get('resources', []):
- resource = Resource(**resource_dict)
- package.resources[:] = []
- package.resources.append(resource)
-
- # Make sure old extras are removed if updating
- if len(package.extras):
- for key in package.extras.keys():
- del package.extras[key]
-
- for key, value in package_data.get('extras', {}).iteritems():
- extra = PackageExtra(key=key, value=value)
- package._extras[key] = extra
-
- Session.add(package)
- Session.flush()
-
- setup_default_user_roles(package, [])
-
- rev.message = 'Harvester: Created package %s' % package.id
-
- Session.add(rev)
- Session.commit()
-
- return package
+ # Return the actual package object
+ return context['package']
def get_gemini_string_and_guid(self,content,url=None):
try:
@@ -528,7 +532,7 @@
except Exception, e:
self._save_object_error('Error getting the CSW record with GUID %s' % identifier,harvest_object)
return False
-
+
if record is None:
self._save_object_error('Empty record for GUID %s' % identifier,harvest_object)
return False
@@ -577,7 +581,7 @@
try:
# We need to extract the guid to pass it to the next stage
gemini_string, gemini_guid = self.get_gemini_string_and_guid(content,url)
-
+
if gemini_guid:
# Create a new HarvestObject for this identifier
# Generally the content will be set in the fetch stage, but as we alredy
http://bitbucket.org/okfn/ckanext-inspire/changeset/ac9aa6d33482/
changeset: ac9aa6d33482
branch: spatial-refactoring
user: amercader
date: 2011-09-29 16:49:50
summary: [merge] from default
affected #: 2 files (-1 bytes)
--- a/ckanext/inspire/harvesters.py Thu Sep 29 15:48:30 2011 +0100
+++ b/ckanext/inspire/harvesters.py Thu Sep 29 15:49:50 2011 +0100
@@ -238,6 +238,9 @@
'metadata-language', # Language
'metadata-date', # Released
'coupled-resource',
+ 'contact-email',
+ 'frequency-of-update',
+ 'spatial-data-service-type',
]:
extras[name] = gemini_values[name]
--- a/ckanext/inspire/model/__init__.py Thu Sep 29 15:48:30 2011 +0100
+++ b/ckanext/inspire/model/__init__.py Thu Sep 29 15:49:50 2011 +0100
@@ -414,7 +414,7 @@
GeminiElement(
name="spatial-data-service-type",
search_paths=[
- "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:serviceType/gco:LocalName",
+ "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:serviceType/gco:LocalName/text()",
],
multiplicity="0..1",
),
Repository URL: https://bitbucket.org/okfn/ckanext-inspire/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list