[ckan-changes] commit/ckanext-harvest: 5 new changesets
Bitbucket
commits-noreply at bitbucket.org
Fri May 13 17:40:59 UTC 2011
5 new changesets in ckanext-harvest:
http://bitbucket.org/okfn/ckanext-harvest/changeset/6120b0083a2e/
changeset: r92:6120b0083a2e
branch: new-forms
user: amercader
date: 2011-05-13 15:17:58
summary: [forms] Major refactoring of the harvest forms. Forms no longer use the DGU form
API, and are handled similarly to the new ones on CKAN core (logic, schema,
validators...). The UI is also more consistent with the CKAN one.
affected #: 15 files (13.6 KB)
--- a/ckanext/harvest/controllers/view.py Wed May 11 17:07:05 2011 +0100
+++ b/ckanext/harvest/controllers/view.py Fri May 13 14:17:58 2011 +0100
@@ -1,21 +1,21 @@
-import urllib2
-
from pylons.i18n import _
import ckan.lib.helpers as h, json
from ckan.lib.base import BaseController, c, g, request, \
response, session, render, config, abort, redirect
-from ckan.model import Package
-
-from ckanext.harvest.lib import *
+from ckan.lib.navl.dictization_functions import DataError
+from ckan.logic import NotFound, ValidationError
+from ckanext.harvest.logic.schema import harvest_source_form_schema
+from ckanext.harvest.lib import create_harvest_source, edit_harvest_source, \
+ get_harvest_source, get_harvest_sources, \
+ create_harvest_job, get_registered_harvesters_types
+
+import logging
+log = logging.getLogger(__name__)
class ViewController(BaseController):
- api_url = config.get('ckan.api_url', 'http://localhost:5000').rstrip('/')+'/api/2/rest'
- form_api_url = config.get('ckan.api_url', 'http://localhost:5000').rstrip('/')+'/api/2/form'
- api_key = config.get('ckan.harvest.api_key')
-
def __before__(self, action, **env):
super(ViewController, self).__before__(action, **env)
# All calls to this controller must be with a sysadmin key
@@ -24,138 +24,123 @@
status = 401
abort(status, response_msg)
- def _do_request(self,url,data = None):
-
- http_request = urllib2.Request(
- url = url,
- headers = {'Authorization' : self.api_key}
- )
-
- if data:
- http_request.add_data(data)
-
- try:
- return urllib2.urlopen(http_request)
- except urllib2.HTTPError as e:
- raise
-
def index(self):
# Request all harvest sources
c.sources = get_harvest_sources()
- return render('ckanext/harvest/index.html')
+ return render('index.html')
- def create(self):
+ def new(self,data = None,errors = None, error_summary = None):
- # This is the DGU form API, so we don't use self.api_url
- form_url = self.form_api_url + '/harvestsource/create'
- if request.method == 'GET':
+ if ('save' in request.params) and not data:
+ return self._save_new()
+
+ data = data or {}
+ errors = errors or {}
+ error_summary = error_summary or {}
+ #TODO: Use new description interface to build the types select and descriptions
+ vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'types': get_registered_harvesters_types()}
+
+ c.form = render('source/new_source_form.html', extra_vars=vars)
+ return render('source/new.html')
+
+ def _save_new(self):
+ try:
+ data_dict = dict(request.params)
+ self._check_data_dict(data_dict)
+
+ source = create_harvest_source(data_dict)
+
+ # Create a harvest job for the new source
+ create_harvest_job(source['id'])
+
+ h.flash_success(_('New harvest source added successfully.'
+ 'A new harvest job for the source has also been created.'))
+ redirect(h.url_for('harvest'))
+ except DataError,e:
+ abort(400, 'Integrity Error')
+ except ValidationError,e:
+ errors = e.error_dict
+ error_summary = e.error_summary if 'error_summary' in e else None
+ return self.new(data_dict, errors, error_summary)
+
+ def edit(self, id, data = None,errors = None, error_summary = None):
+
+ if ('save' in request.params) and not data:
+ return self._save_edit(id)
+
+ if not data:
try:
- # Request the fields
- c.form = self._do_request(form_url).read()
- c.mode = 'create'
- except urllib2.HTTPError as e:
- msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg)
- h.flash_error(msg)
- return render('ckanext/harvest/create.html')
- if request.method == 'POST':
- # Build an object like the one expected by the DGU form API
- data = {
- 'form_data':
- {'HarvestSource--url': request.POST['HarvestSource--url'],
- 'HarvestSource--description': request.POST['HarvestSource--description'],
- 'HarvestSource--type': request.POST['HarvestSource--type'],
- },
- 'user_id':'',
- 'publisher_id':''
- }
- data = json.dumps(data)
- try:
- rq = self._do_request(form_url,data)
+ old_data = get_harvest_source(id)
+ except NotFound:
+ abort(404, _('Harvest Source not found'))
- h.flash_success('Harvesting source added successfully')
- redirect(h.url_for('harvest'))
+ data = data or old_data
+ errors = errors or {}
+ error_summary = error_summary or {}
+ #TODO: Use new description interface to build the types select and descriptions
+ vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'types': get_registered_harvesters_types()}
+
+ c.form = render('source/new_source_form.html', extra_vars=vars)
+ return render('source/edit.html')
- except urllib2.HTTPError as e:
- msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg)
- # The form API returns just a 500, so we are not exactly sure of what
- # happened, but most probably it was a duplicate entry
- if e.getcode() == 500:
- msg = msg + ' Does the source already exist?'
- elif e.getcode() == 400:
- err_msg = e.read()
- if '<form' in c.form:
- c.form = err_msg
- c.mode = 'create'
- return render('ckanext/harvest/create.html')
- else:
- msg = err_msg
+ def _save_edit(self,id):
+ try:
+ data_dict = dict(request.params)
+ self._check_data_dict(data_dict)
- h.flash_error(msg)
- redirect(h.url_for('harvest'))
+ source = edit_harvest_source(id,data_dict)
- def show(self,id):
+ h.flash_success(_('Harvest source edited successfully.'))
+ redirect(h.url_for('harvest'))
+ except DataError,e:
+ abort(400, _('Integrity Error'))
+ except NotFound, e:
+ abort(404, _('Harvest Source not found'))
+ except ValidationError,e:
+ errors = e.error_dict
+ error_summary = e.error_summary if 'error_summary' in e else None
+ return self.edit(id,data_dict, errors, error_summary)
+
+ def _check_data_dict(self, data_dict):
+ '''Check if the return data is correct'''
+ surplus_keys_schema = ['id','publisher_id','user_id','active','save']
+
+ schema_keys = harvest_source_form_schema().keys()
+ keys_in_schema = set(schema_keys) - set(surplus_keys_schema)
+
+ if keys_in_schema - set(data_dict.keys()):
+ log.info(_('Incorrect form fields posted'))
+ raise DataError(data_dict)
+
+ def read(self,id):
try:
c.source = get_harvest_source(id)
- return render('ckanext/harvest/show.html')
- except:
- abort(404,'Harvest source not found')
+ return render('source/read.html')
+ except NotFound:
+ abort(404,_('Harvest source not found'))
def delete(self,id):
try:
delete_harvest_source(id)
- h.flash_success('Harvesting source deleted successfully')
- except Exception as e:
- msg = 'An error occurred: [%s]' % e.message
- h.flash_error(msg)
- redirect(h.url_for('harvest'))
+ h.flash_success(_('Harvesting source deleted successfully'))
+ redirect(h.url_for('harvest'))
+ except NotFound:
+ abort(404,_('Harvest source not found'))
- def edit(self,id):
-
- form_url = self.form_api_url + '/harvestsource/edit/%s' % id
- if request.method == 'GET':
- # Request the fields
- c.form = self._do_request(form_url).read()
- c.mode = 'edit'
-
- return render('ckanext/harvest/create.html')
- if request.method == 'POST':
- # Build an object like the one expected by the DGU form API
- data = {
- 'form_data':
- {'HarvestSource-%s-url' % id: request.POST['HarvestSource-%s-url' % id] ,
- 'HarvestSource-%s-type' % id: request.POST['HarvestSource-%s-type' % id],
- 'HarvestSource-%s-description' % id: request.POST['HarvestSource-%s-description' % id]},
- 'user_id':'',
- 'publisher_id':''
- }
- data = json.dumps(data)
- try:
- r = self._do_request(form_url,data)
-
- h.flash_success('Harvesting source edited successfully')
-
- redirect(h.url_for('harvest'))
- except urllib2.HTTPError as e:
- if e.getcode() == 400:
- c.form = e.read()
- c.mode = 'edit'
- return render('ckanext/harvest/create.html')
- else:
- msg = 'An error occurred: [%s %s]' % (str(e.getcode()),e.msg)
- h.flash_error(msg)
- redirect(h.url_for('harvest'))
def create_harvesting_job(self,id):
try:
create_harvest_job(id)
- h.flash_success('Refresh requested, harvesting will take place within 15 minutes.')
+ h.flash_success(_('Refresh requested, harvesting will take place within 15 minutes.'))
+ redirect(h.url_for('harvest'))
+ except NotFound:
+ abort(404,_('Harvest source not found'))
except Exception as e:
msg = 'An error occurred: [%s]' % e.message
h.flash_error(msg)
+ redirect(h.url_for('harvest'))
- redirect(h.url_for('harvest'))
-
--- a/ckanext/harvest/lib/__init__.py Wed May 11 17:07:05 2011 +0100
+++ b/ckanext/harvest/lib/__init__.py Fri May 13 14:17:58 2011 +0100
@@ -1,14 +1,22 @@
import urlparse
+import re
+
from sqlalchemy import distinct,func
from ckan.model import Session, repo
from ckan.model import Package
+from ckan.lib.navl.dictization_functions import validate
+from ckan.logic import NotFound, ValidationError
+
+from ckanext.harvest.logic.schema import harvest_source_form_schema
+
from ckan.plugins import PluginImplementations
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, \
HarvestGatherError, HarvestObjectError
from ckanext.harvest.queue import get_gather_publisher
from ckanext.harvest.interfaces import IHarvester
-log = __import__("logging").getLogger(__name__)
+import logging
+log = logging.getLogger(__name__)
def _get_source_status(source):
@@ -183,49 +191,68 @@
return check_url
-def get_harvest_source(id,default=Exception,attr=None):
- source = HarvestSource.get(id,default=default,attr=attr)
- if source:
- return _source_as_dict(source)
- else:
- return default
+def _prettify(field_name):
+ field_name = re.sub('(?<!\w)[Uu]rl(?!\w)', 'URL', field_name.replace('_', ' ').capitalize())
+ return field_name.replace('_', ' ')
+
+def _error_summary(error_dict):
+
+ error_summary = {}
+ for key, error in error_dict.iteritems():
+ error_summary[_prettify(key)] = error[0]
+ return error_summary
+
+def get_harvest_source(id,attr=None):
+ source = HarvestSource.get(id,attr=attr)
+
+ if not source:
+ raise NotFound
+
+ return _source_as_dict(source)
def get_harvest_sources(**kwds):
sources = HarvestSource.filter(**kwds).all()
return [_source_as_dict(source) for source in sources]
-def create_harvest_source(source_dict):
- if not 'url' in source_dict or not source_dict['url'] or \
- not 'type' in source_dict or not source_dict['type']:
- raise Exception('Missing mandatory properties: url, type')
+def create_harvest_source(data_dict):
- # Check if source already exists
- existing_source = _url_exists(source_dict['url'])
- if existing_source:
- raise Exception('There already is an active Harvest Source for this URL: %s' % source_dict['url'])
+ schema = harvest_source_form_schema()
+ data, errors = validate(data_dict, schema)
+
+ if errors:
+ Session.rollback()
+ raise ValidationError(errors,_error_summary(errors))
source = HarvestSource()
- source.url = source_dict['url']
- source.type = source_dict['type']
+ source.url = data['url']
+ source.type = data['type']
+
opt = ['active','description','user_id','publisher_id']
for o in opt:
- if o in source_dict and source_dict[o] is not None:
- source.__setattr__(o,source_dict[o])
+ if o in data and data[o] is not None:
+ source.__setattr__(o,data[o])
source.save()
-
return _source_as_dict(source)
-def edit_harvest_source(source_id,source_dict):
- try:
- source = HarvestSource.get(source_id)
- except:
- raise Exception('Source %s does not exist' % source_id)
+def edit_harvest_source(source_id,data_dict):
+ schema = harvest_source_form_schema()
+
+ source = HarvestSource.get(source_id)
+
+ # Add source id to the dict, as some validators will need it
+ data_dict["id"] = source.id
+
+ data, errors = validate(data_dict, schema)
+ if errors:
+ Session.rollback()
+ raise ValidationError(errors,_error_summary(errors))
+
fields = ['url','type','active','description','user_id','publisher_id']
for f in fields:
- if f in source_dict and source_dict[f] is not None and source_dict[f] != '':
- source.__setattr__(f,source_dict[f])
+ if f in data_dict and data_dict[f] is not None and data_dict[f] != '':
+ source.__setattr__(f,data_dict[f])
source.save()
@@ -251,12 +278,12 @@
return True
-def get_harvest_job(id,default=Exception,attr=None):
- job = HarvestJob.get(id,default=default,attr=attr)
- if job:
- return _job_as_dict(job)
- else:
- return default
+def get_harvest_job(id,attr=None):
+ job = HarvestJob.get(id,attr=attr)
+ if not job:
+ raise NotFound
+
+ return _job_as_dict(job)
def get_harvest_jobs(**kwds):
jobs = HarvestJob.filter(**kwds).all()
@@ -304,13 +331,13 @@
publisher.close()
return sent_jobs
-def get_harvest_object(id,default=Exception,attr=None):
- obj = HarvestObject.get(id,default=default,attr=attr)
- if obj:
- return _object_as_dict(obj)
- else:
- return default
+def get_harvest_object(id,attr=None):
+ obj = HarvestObject.get(id,attr=attr)
+ if not obj:
+ raise NotFound
+ return _object_as_dict(obj)
+
def get_harvest_objects(**kwds):
objects = HarvestObject.filter(**kwds).all()
return [_object_as_dict(obj) for obj in objects]
@@ -351,3 +378,10 @@
last_obj_guid = obj.guid
return imported_objects
+
+def get_registered_harvesters_types():
+ # TODO: Use new description interface when implemented
+ available_types = []
+ for harvester in PluginImplementations(IHarvester):
+ available_types.append(harvester.get_type())
+ return available_types
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/logic/__init__.py Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,7 @@
+try:
+ import pkg_resources
+ pkg_resources.declare_namespace(__name__)
+except ImportError:
+ import pkgutil
+ __path__ = pkgutil.extend_path(__path__, __name__)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/logic/schema.py Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,33 @@
+from ckan.lib.navl.validators import (ignore_missing,
+ not_empty,
+ empty,
+ ignore,
+ not_missing
+ )
+
+from ckanext.harvest.logic.validators import harvest_source_id_exists, \
+ harvest_source_url_validator, \
+ harvest_source_type_exists
+
+def default_harvest_source_schema():
+
+ schema = {
+ 'id': [ignore_missing, unicode, harvest_source_id_exists],
+ 'url': [not_empty, unicode, harvest_source_url_validator],
+ 'type': [not_empty, unicode, harvest_source_type_exists],
+ 'description': [ignore_missing],
+ 'active': [ignore_missing],
+ 'user_id': [ignore_missing],
+ 'publisher_id': [ignore_missing],
+ #'config'
+ }
+
+ return schema
+
+
+def harvest_source_form_schema():
+
+ schema = default_harvest_source_schema()
+ schema['save'] = [ignore]
+
+ return schema
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/logic/validators.py Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,74 @@
+import urlparse
+
+from ckan.lib.navl.dictization_functions import Invalid, missing
+from ckan.model import Session
+from ckan.plugins import PluginImplementations
+
+from ckanext.harvest.model import HarvestSource
+from ckanext.harvest.interfaces import IHarvester
+
+
+#TODO: use context?
+
+def harvest_source_id_exists(value, context):
+
+ result = HarvestSource.get(value,None)
+
+ if not result:
+ raise Invalid('Harvest Source with id %r does not exist.' % str(value))
+ return value
+
+def _normalize_url(url):
+ o = urlparse.urlparse(url)
+
+ # Normalize port
+ if ':' in o.netloc:
+ parts = o.netloc.split(':')
+ if (o.scheme == 'http' and parts[1] == '80') or \
+ (o.scheme == 'https' and parts[1] == '443'):
+ netloc = parts[0]
+ else:
+ netloc = ':'.join(parts)
+ else:
+ netloc = o.netloc
+
+ # Remove trailing slash
+ path = o.path.rstrip('/')
+
+ check_url = urlparse.urlunparse((
+ o.scheme,
+ netloc,
+ path,
+ None,None,None))
+
+ return check_url
+
+def harvest_source_url_validator(key,data,errors,context):
+ new_url = _normalize_url(data[key])
+ source_id = data.get(('id',),'')
+ if source_id:
+ # When editing a source we need to avoid its own URL
+ existing_sources = Session.query(HarvestSource.url,HarvestSource.active) \
+ .filter(HarvestSource.id!=source_id).all()
+ else:
+ existing_sources = Session.query(HarvestSource.url,HarvestSource.active).all()
+
+ for url,active in existing_sources:
+ url = _normalize_url(url)
+ if url == new_url and active == True:
+ raise Invalid('There already is an active Harvest Source for this URL: %s' % data[key])
+
+ return data[key]
+
+def harvest_source_type_exists(value,context):
+ #TODO: use new description interface
+
+ # Get all the registered harvester types
+ available_types = []
+ for harvester in PluginImplementations(IHarvester):
+ available_types.append(harvester.get_type())
+
+ if not value in available_types:
+ raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value)
+
+ return value
--- a/ckanext/harvest/model/__init__.py Wed May 11 17:07:05 2011 +0100
+++ b/ckanext/harvest/model/__init__.py Fri May 13 14:17:58 2011 +0100
@@ -31,7 +31,7 @@
key_attr = 'id'
@classmethod
- def get(self, key, default=Exception, attr=None):
+ def get(self, key, default=None, attr=None):
'''Finds a single entity in the register.'''
if attr == None:
attr = self.key_attr
@@ -39,10 +39,8 @@
o = self.filter(**kwds).first()
if o:
return o
- if default != Exception:
+ else:
return default
- else:
- raise Exception('%s not found: %s' % (self.__name__, key))
@classmethod
def filter(self, **kwds):
--- a/ckanext/harvest/plugin.py Wed May 11 17:07:05 2011 +0100
+++ b/ckanext/harvest/plugin.py Fri May 13 14:17:58 2011 +0100
@@ -22,35 +22,17 @@
pass
def before_map(self, map):
- map.connect('harvest', '/harvest',
- controller='ckanext.harvest.controllers.view:ViewController',
- action='index')
-
- map.connect('harvest_create_form', '/harvest/create',
- controller='ckanext.harvest.controllers.view:ViewController',
- conditions=dict(method=['GET']),
- action='create')
- map.connect('harvest_create', '/harvest/create',
- controller='ckanext.harvest.controllers.view:ViewController',
- conditions=dict(method=['POST']),
- action='create')
+ controller = 'ckanext.harvest.controllers.view:ViewController'
+ map.connect('harvest', '/harvest',controller=controller,action='index')
- map.connect('harvest_show', '/harvest/:id',
- controller='ckanext.harvest.controllers.view:ViewController',
- action='show')
+ map.connect('/harvest/new', controller=controller, action='new')
+ map.connect('/harvest/edit/:id', controller=controller, action='edit')
+ map.connect('/harvest/delete/:id',controller=controller, action='delete')
+ map.connect('/harvest/:id', controller=controller, action='read')
- map.connect('harvest_edit', '/harvest/:id/edit',
- controller='ckanext.harvest.controllers.view:ViewController',
- action='edit')
-
- map.connect('harvest_delete', '/harvest/:id/delete',
- controller='ckanext.harvest.controllers.view:ViewController',
- action='delete')
-
- map.connect('harvesting_job_create', '/harvest/:id/refresh',
- controller='ckanext.harvest.controllers.view:ViewController',
- action='create_harvesting_job')
+ map.connect('harvesting_job_create', '/harvest/refresh/:id',controller=controller,
+ action='create_harvesting_job')
return map
--- a/ckanext/harvest/templates/ckanext/harvest/create.html Wed May 11 17:07:05 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-<?python
- if c.mode == 'create':
- title = 'Add harvesting source'
- else:
- title = 'Edit harvesting source'
-?>
-<html xmlns:py="http://genshi.edgewall.org/"
- xmlns:i18n="http://genshi.edgewall.org/i18n"
- xmlns:xi="http://www.w3.org/2001/XInclude"
- py:strip="">
-
- <py:def function="page_title">${title}</py:def>
-
- <py:def function="optional_head">
- <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" />
- </py:def>
-
-<div py:match="content">
- <div class="harvest-content">
- <h1>${title}</h1>
- <form action="${c.mode}" method="POST">
- ${Markup(c.form)}
- <input id="save" name="save" value="Save" type="submit" />
- </form>
- </div>
-</div>
-<xi:include href="../../layout.html" />
-</html>
--- a/ckanext/harvest/templates/ckanext/harvest/index.html Wed May 11 17:07:05 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-<html xmlns:py="http://genshi.edgewall.org/"
- xmlns:i18n="http://genshi.edgewall.org/i18n"
- xmlns:xi="http://www.w3.org/2001/XInclude"
- py:strip="">
-
- <py:def function="page_title">Harvesting Sources</py:def>
-
- <py:def function="optional_head">
- <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" />
- </py:def>
-
-<div py:match="content">
- <div class="harvest-content">
- <h1>Harvesting Sources</h1>
- <a id="new-harvest-source" href="harvest/create">Add a harvesting source</a>
- <py:choose>
- <py:when test="c.sources">
-
-
- <table id="harvest-sources">
- <tr>
- <th></th>
- <th></th>
- <th></th>
- <th>URL</th>
- <th>Type</th>
- <th>Active</th>
- <th>Statistics</th>
- <th>Next Harvest</th>
- <th>Created</th>
- </tr>
-
- <tr py:for="source in c.sources">
- <td>${h.link_to('view', 'harvest/' + source.id)}</td>
- <td>${h.link_to('edit', 'harvest/' + source.id + '/edit')}</td>
- <td>${h.link_to('refresh', 'harvest/' + source.id + '/refresh')}</td>
- <td>${source.url}</td>
- <td>${source.type}</td>
- <td>${source.active}</td>
- <py:choose>
- <py:when test="'msg' in source.status">
- <td>${source.status.msg}</td>
- <td>${source.status.msg}</td>
- </py:when>
- <py:otherwise>
- <td>${source.status.overall_statistics.added} pkgs ${source.status.overall_statistics.errors} errors</td>
- <td>${source.status.next_harvest}</td>
- </py:otherwise>
- </py:choose>
-
- <td>${source.created}</td>
- </tr>
- </table>
- </py:when>
- <py:otherwise>
- <div id="no-harvest-sources">No harvest sources defined yet.</div>
- </py:otherwise>
- </py:choose>
-
- </div>
-</div>
-<xi:include href="../../layout.html" />
-</html>
--- a/ckanext/harvest/templates/ckanext/harvest/show.html Wed May 11 17:07:05 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-<html xmlns:py="http://genshi.edgewall.org/"
- xmlns:i18n="http://genshi.edgewall.org/i18n"
- xmlns:xi="http://www.w3.org/2001/XInclude"
- py:strip="">
-
- <py:def function="page_title">Harvest Source Details</py:def>
-
- <py:def function="optional_head">
- <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" />
- </py:def>
-
-<div py:match="content">
- <div class="harvest-content">
- <py:if test="c.source">
- <h1>Harvest Source Details</h1>
- <table id="harvest-source-details">
- <tr>
- <th>ID</th>
- <td>${c.source.id}</td>
- </tr>
- <tr>
- <th>URL</th>
- <td>${c.source.url}</td>
- </tr>
- <tr>
- <th>Type</th>
- <td>${c.source.type}</td>
- </tr>
- <tr>
- <th>Active</th>
- <td>${c.source.active}</td>
- </tr>
- <tr>
- <th>Description</th>
- <td>${c.source.description}</td>
- </tr>
- <tr>
- <th>User</th>
- <td>${c.source.user_id}</td>
- </tr>
- <tr>
- <th>Publisher</th>
- <td>${c.source.publisher_id}</td>
- </tr>
- <tr>
- <th>Created</th>
- <td>${c.source.created}</td>
- </tr>
- <tr>
- <th>Total jobs</th>
- <td>${len(c.source.jobs)}</td>
- </tr>
- <tr>
- <th>Status</th>
- <td>
- Last Harvest Errors: ${c.source.status.last_harvest_statistics.errors}<br/>
- <py:choose>
- <py:when test="len(c.source.status.last_harvest_errors)>0">
- <ul>
- <li py:for="error in c.source.status.last_harvest_errors">${error}</li>
- </ul>
- </py:when>
- </py:choose>
- Last Harvest Added: ${c.source.status.last_harvest_statistics.added}<br/>
- Last Harvest Updated: ${c.source.status.last_harvest_statistics.updated}<br/>
- Last Harvest: ${c.source.status.last_harvest_request} <br/>
- Next Harvest: ${c.source.status.next_harvest}
- </td>
- </tr>
- <tr>
- <th>Total Errors</th>
- <td>${c.source.status.overall_statistics.errors}</td>
- </tr>
- <tr>
- <th>Total Packages</th>
- <td>${c.source.status.overall_statistics.added}</td>
- </tr>
- <tr>
- <th>Packages</th>
- <td>
- <div>There could be a 10 minutes delay before these packages (or changes to them) appear on
- the site or on search results.</div>
- <div py:for="package in c.source.status.packages">
- <a href="/package/${package}">${package}</a>
- </div>
- </td>
- </tr>
- </table>
- </py:if>
- </div>
-</div>
-<xi:include href="../../layout.html" />
-</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/templates/index.html Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,63 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ py:strip="">
+
+ <py:def function="page_title">Harvesting Sources</py:def>
+
+ <py:def function="optional_head">
+ <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" />
+ </py:def>
+
+<div py:match="content">
+ <div class="harvest-content">
+ <h1>Harvesting Sources</h1>
+ <a id="new-harvest-source" href="harvest/new">Add a harvesting source</a>
+ <py:choose>
+ <py:when test="c.sources">
+
+
+ <table id="harvest-sources">
+ <tr>
+ <th></th>
+ <th></th>
+ <th></th>
+ <th>URL</th>
+ <th>Type</th>
+ <th>Active</th>
+ <th>Statistics</th>
+ <th>Next Harvest</th>
+ <th>Created</th>
+ </tr>
+
+ <tr py:for="source in c.sources">
+ <td>${h.link_to('view', 'harvest/%s' % source.id)}</td>
+ <td>${h.link_to('edit', 'harvest/edit/%s' % source.id)}</td>
+ <td>${h.link_to('refresh', 'harvest/refresh/%s' % source.id)}</td>
+ <td>${source.url}</td>
+ <td>${source.type}</td>
+ <td>${source.active}</td>
+ <py:choose>
+ <py:when test="'msg' in source.status">
+ <td>${source.status.msg}</td>
+ <td>${source.status.msg}</td>
+ </py:when>
+ <py:otherwise>
+ <td>${source.status.overall_statistics.added} pkgs ${source.status.overall_statistics.errors} errors</td>
+ <td>${source.status.next_harvest}</td>
+ </py:otherwise>
+ </py:choose>
+
+ <td>${source.created}</td>
+ </tr>
+ </table>
+ </py:when>
+ <py:otherwise>
+ <div id="no-harvest-sources">No harvest sources defined yet.</div>
+ </py:otherwise>
+ </py:choose>
+
+ </div>
+</div>
+<xi:include href="layout.html" />
+</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/templates/source/edit.html Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,23 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ py:strip="">
+
+ <py:def function="page_title">Edit - Harvest Source</py:def>
+
+ <py:def function="body_class">hide-sidebar</py:def>
+ <py:def function="optional_head">
+ <link rel="stylesheet" href="${g.site_url}/css/forms.css" type="text/css" media="screen, print" />
+ </py:def>
+
+ <div py:match="content">
+ <div class="harvest-content">
+ <h2>Edit harvest source </h2>
+
+
+ ${h.literal(c.form)}
+
+ </div>
+ </div>
+ <xi:include href="../layout.html" />
+ </html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/templates/source/new.html Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,22 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ py:strip="">
+
+ <py:def function="page_title">New - Harvest Source</py:def>
+
+ <py:def function="body_class">hide-sidebar</py:def>
+ <py:def function="optional_head">
+ <link rel="stylesheet" href="${g.site_url}/css/forms.css" type="text/css" media="screen, print" />
+ </py:def>
+
+ <div py:match="content">
+ <div class="harvest-content">
+ <h2>New harvest source </h2>
+
+ ${h.literal(c.form)}
+
+ </div>
+ </div>
+ <xi:include href="../layout.html" />
+ </html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/templates/source/new_source_form.html Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,47 @@
+<form id="source-new" class="ckan" method="post"
+ py:attrs="{'class':'has-errors'} if errors else {}"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude">
+
+ <div class="error-explanation" py:if="error_summary">
+<h2>Errors in form</h2>
+<p>The form contains invalid entries:</p>
+<ul>
+ <li py:for="key, error in error_summary.items()">${"%s: %s" % (key, error)}</li>
+</ul>
+</div>
+
+ <fieldset>
+ <legend>Details</legend>
+ <dl>
+ <dt><label class="field_req" for="url">URL for source of metadata *</label></dt>
+ <dd><input id="url" name="url" size="80" type="text" value="${data.get('url', '')}" /></dd>
+ <dd class="field_error" py:if="errors.get('url', '')">${errors.get('url', '')}</dd>
+ <dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
+ <dt><label class="field_req" for="type">Source Type *</label></dt>
+ <dd>
+ <select id="type" name="type">
+ <py:for each="type in types">
+ <option value="${type}" py:attrs="{'selected': 'selected' if data.get('type', '') == type else None}" >${type}</option>
+ </py:for>
+ <option value="FAKW">FAKW</option>
+ </select>
+ </dd>
+ <dd class="field_error" py:if="errors.get('type', '')">${errors.get('type', '')}</dd>
+ <dd class="instructions basic">Which type of source does the URL above represent?
+ TODO: get these from the harvesters
+ <ul>
+ <li>A server's CSW interface</li>
+ <li>A Web Accessible Folder (WAF) displaying a list of GEMINI 2.1 documents</li>
+ <li>A single GEMINI 2.1 document</li>
+ </ul>
+ </dd>
+ <dt><label class="field_opt" for="description">Description</label></dt>
+ <dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
+ <dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
+ </dl>
+ </fieldset>
+ <input id="save" name="save" value="Save" type="submit" />
+
+</form>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/harvest/templates/source/read.html Fri May 13 14:17:58 2011 +0100
@@ -0,0 +1,93 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ py:strip="">
+
+ <py:def function="page_title">Harvest Source Details</py:def>
+
+ <py:def function="optional_head">
+ <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" />
+ </py:def>
+
+<div py:match="content">
+ <div class="harvest-content">
+ <py:if test="c.source">
+ <h1>Harvest Source Details</h1>
+ <table id="harvest-source-details">
+ <tr>
+ <th>ID</th>
+ <td>${c.source.id}</td>
+ </tr>
+ <tr>
+ <th>URL</th>
+ <td>${c.source.url}</td>
+ </tr>
+ <tr>
+ <th>Type</th>
+ <td>${c.source.type}</td>
+ </tr>
+ <tr>
+ <th>Active</th>
+ <td>${c.source.active}</td>
+ </tr>
+ <tr>
+ <th>Description</th>
+ <td>${c.source.description}</td>
+ </tr>
+ <tr>
+ <th>User</th>
+ <td>${c.source.user_id}</td>
+ </tr>
+ <tr>
+ <th>Publisher</th>
+ <td>${c.source.publisher_id}</td>
+ </tr>
+ <tr>
+ <th>Created</th>
+ <td>${c.source.created}</td>
+ </tr>
+ <tr>
+ <th>Total jobs</th>
+ <td>${len(c.source.jobs)}</td>
+ </tr>
+ <tr>
+ <th>Status</th>
+ <td>
+ Last Harvest Errors: ${c.source.status.last_harvest_statistics.errors}<br/>
+ <py:choose>
+ <py:when test="len(c.source.status.last_harvest_errors)>0">
+ <ul>
+ <li py:for="error in c.source.status.last_harvest_errors">${error}</li>
+ </ul>
+ </py:when>
+ </py:choose>
+ Last Harvest Added: ${c.source.status.last_harvest_statistics.added}<br/>
+ Last Harvest Updated: ${c.source.status.last_harvest_statistics.updated}<br/>
+ Last Harvest: ${c.source.status.last_harvest_request} <br/>
+ Next Harvest: ${c.source.status.next_harvest}
+ </td>
+ </tr>
+ <tr>
+ <th>Total Errors</th>
+ <td>${c.source.status.overall_statistics.errors}</td>
+ </tr>
+ <tr>
+ <th>Total Packages</th>
+ <td>${c.source.status.overall_statistics.added}</td>
+ </tr>
+ <tr>
+ <th>Packages</th>
+ <td>
+ <div>There could be a 10 minutes delay before these packages (or changes to them) appear on
+ the site or on search results.</div>
+ <div py:for="package in c.source.status.packages">
+ <a href="/package/${package}">${package}</a>
+ </div>
+ </td>
+ </tr>
+ </table>
+ </py:if>
+ </div>
+</div>
+<xi:include href="../layout.html" />
+</html>
http://bitbucket.org/okfn/ckanext-harvest/changeset/1b93c59d82af/
changeset: r93:1b93c59d82af
branch: new-forms
user: amercader
date: 2011-05-13 17:00:36
summary: [forms] Adapt CLI commands to changes in lib
affected #: 2 files (149 bytes)
--- a/ckanext/harvest/commands/harvester.py Fri May 13 14:17:58 2011 +0100
+++ b/ckanext/harvest/commands/harvester.py Fri May 13 16:00:36 2011 +0100
@@ -14,19 +14,19 @@
harvester initdb
- Creates the necessary tables in the database
- harvester source {url} {type} [{active}] [{user-id}] [{publisher-id}]
+ harvester source {url} {type} [{active}] [{user-id}] [{publisher-id}]
- create new harvest source
harvester rmsource {id}
- remove (inactivate) a harvester source
- harvester sources [all]
+ harvester sources [all]
- lists harvest sources
If 'all' is defined, it also shows the Inactive sources
harvester job {source-id}
- create new harvest job
-
+
harvester jobs
- lists harvest jobs
@@ -66,7 +66,7 @@
sys.exit(1)
cmd = self.args[0]
if cmd == 'source':
- self.create_harvest_source()
+ self.create_harvest_source()
elif cmd == "rmsource":
self.remove_harvest_source()
elif cmd == 'sources':
@@ -96,7 +96,7 @@
def _load_config(self):
super(Harvester, self)._load_config()
-
+
def initdb(self):
from ckanext.harvest.model import setup as db_setup
db_setup()
@@ -128,23 +128,29 @@
publisher_id = unicode(self.args[5])
else:
publisher_id = u''
-
- source = create_harvest_source({
- 'url':url,
- 'type':type,
- 'active':active,
- 'user_id':user_id,
- 'publisher_id':publisher_id})
+ try:
+ source = create_harvest_source({
+ 'url':url,
+ 'type':type,
+ 'active':active,
+ 'user_id':user_id,
+ 'publisher_id':publisher_id})
- print 'Created new harvest source:'
- self.print_harvest_source(source)
+ print 'Created new harvest source:'
+ self.print_harvest_source(source)
- sources = get_harvest_sources()
- self.print_there_are('harvest source', sources)
-
- # Create a Harvest Job for the new Source
- create_harvest_job(source['id'])
- print 'A new Harvest Job for this source has also been created'
+ sources = get_harvest_sources()
+ self.print_there_are('harvest source', sources)
+
+ # Create a Harvest Job for the new Source
+ create_harvest_job(source['id'])
+ print 'A new Harvest Job for this source has also been created'
+
+ except ValidationError,e:
+ print 'An error occurred:'
+ print str(e.error_dict)
+ raise e
+
def remove_harvest_source(self):
if len(self.args) >= 2:
@@ -155,7 +161,7 @@
remove_harvest_source(source_id)
print 'Removed harvest source: %s' % source_id
-
+
def list_harvest_sources(self):
if len(self.args) >= 2 and self.args[1] == 'all':
sources = get_harvest_sources()
@@ -185,7 +191,7 @@
jobs = get_harvest_jobs()
self.print_harvest_jobs(jobs)
self.print_there_are(what='harvest job', sequence=jobs)
-
+
def run_harvester(self):
try:
jobs = run_harvest_jobs()
@@ -211,7 +217,7 @@
print 'Source id: %s' % source['id']
print ' url: %s' % source['url']
print ' type: %s' % source['type']
- print ' active: %s' % source['active']
+ print ' active: %s' % source['active']
print ' user: %s' % source['user_id']
print 'publisher: %s' % source['publisher_id']
print ' jobs: %s' % len(source['jobs'])
@@ -234,7 +240,7 @@
if (len(job['gather_errors']) > 0):
for error in job['gather_errors']:
print ' %s' % error['message']
-
+
print ''
def print_there_are(self, what, sequence, condition=''):
--- a/ckanext/harvest/lib/__init__.py Fri May 13 14:17:58 2011 +0100
+++ b/ckanext/harvest/lib/__init__.py Fri May 13 16:00:36 2011 +0100
@@ -4,7 +4,7 @@
from sqlalchemy import distinct,func
from ckan.model import Session, repo
from ckan.model import Package
-from ckan.lib.navl.dictization_functions import validate
+from ckan.lib.navl.dictization_functions import validate
from ckan.logic import NotFound, ValidationError
from ckanext.harvest.logic.schema import harvest_source_form_schema
@@ -48,8 +48,8 @@
if last_job:
#TODO: Should we encode the dates as strings?
out['last_harvest_request'] = str(last_job.gather_finished)
-
-
+
+
#Get HarvestObjects from last job whit links to packages
last_objects = [obj for obj in last_job.objects if obj.package is not None]
@@ -76,8 +76,8 @@
# We have the gathering errors in last_job.gather_errors, so let's also
# get also the object errors.
object_errors = Session.query(HarvestObjectError).join(HarvestObject) \
- .filter(HarvestObject.job==last_job).all()
-
+ .filter(HarvestObject.job==last_job).all()
+
out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \
+ len(object_errors)
for gather_error in last_job.gather_errors:
@@ -87,7 +87,7 @@
msg = 'GUID %s: %s' % (object_error.object.guid,object_error.message)
out['last_harvest_errors'].append(msg)
-
+
# Overall statistics
packages = Session.query(distinct(HarvestObject.package_id),Package.name) \
@@ -120,7 +120,7 @@
for job in source.jobs:
out['jobs'].append(job.as_dict())
-
+
out['status'] = _get_source_status(source)
@@ -179,7 +179,7 @@
netloc = ':'.join(parts)
else:
netloc = o.netloc
-
+
# Remove trailing slash
path = o.path.rstrip('/')
@@ -240,7 +240,9 @@
schema = harvest_source_form_schema()
source = HarvestSource.get(source_id)
-
+ if not source:
+ raise NotFound('Harvest source %s does not exist' % source_id)
+
# Add source id to the dict, as some validators will need it
data_dict["id"] = source.id
@@ -260,11 +262,11 @@
def remove_harvest_source(source_id):
- try:
- source = HarvestSource.get(source_id)
- except:
- raise Exception('Source %s does not exist' % source_id)
-
+
+ source = HarvestSource.get(source_id)
+ if not source:
+ raise NotFound('Harvest source %s does not exist' % source_id)
+
# Don't actually delete the record, just flag it as inactive
source.active = False
source.save()
@@ -291,11 +293,9 @@
def create_harvest_job(source_id):
# Check if source exists
- try:
- #We'll need the actual HarvestSource
- source = HarvestSource.get(source_id)
- except:
- raise Exception('Source %s does not exist' % source_id)
+ source = HarvestSource.get(source_id)
+ if not source:
+ raise NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
@@ -318,7 +318,7 @@
jobs = get_harvest_jobs(status=u'New')
if len(jobs) == 0:
raise Exception('There are no new harvesting jobs')
-
+
# Send each job to the gather queue
publisher = get_gather_publisher()
sent_jobs = []
@@ -337,17 +337,17 @@
raise NotFound
return _object_as_dict(obj)
-
+
def get_harvest_objects(**kwds):
objects = HarvestObject.filter(**kwds).all()
return [_object_as_dict(obj) for obj in objects]
def import_last_objects(source_id=None):
if source_id:
- try:
- source = HarvestSource.get(source_id)
- except:
- raise Exception('Source %s does not exist' % source_id)
+ source = HarvestSource.get(source_id)
+ if not source:
+ raise NotFound('Harvest source %s does not exist' % source_id)
+
last_objects = Session.query(HarvestObject) \
.join(HarvestJob) \
.filter(HarvestJob.source==source) \
http://bitbucket.org/okfn/ckanext-harvest/changeset/097bec6d9a86/
changeset: r94:097bec6d9a86
branch: new-forms
user: amercader
date: 2011-05-13 18:02:18
summary: Minor enhancements in the WUI
affected #: 10 files (3.8 KB)
--- a/ckanext/harvest/controllers/view.py Fri May 13 16:00:36 2011 +0100
+++ b/ckanext/harvest/controllers/view.py Fri May 13 17:02:18 2011 +0100
@@ -136,11 +136,10 @@
try:
create_harvest_job(id)
h.flash_success(_('Refresh requested, harvesting will take place within 15 minutes.'))
- redirect(h.url_for('harvest'))
except NotFound:
abort(404,_('Harvest source not found'))
- except Exception as e:
+ except Exception, e:
msg = 'An error occurred: [%s]' % e.message
h.flash_error(msg)
- redirect(h.url_for('harvest'))
+ redirect(h.url_for('harvest'))
--- a/ckanext/harvest/lib/__init__.py Fri May 13 16:00:36 2011 +0100
+++ b/ckanext/harvest/lib/__init__.py Fri May 13 17:02:18 2011 +0100
@@ -211,7 +211,9 @@
return _source_as_dict(source)
def get_harvest_sources(**kwds):
- sources = HarvestSource.filter(**kwds).all()
+ sources = HarvestSource.filter(**kwds) \
+ .order_by(HarvestSource.created.desc()) \
+ .all()
return [_source_as_dict(source) for source in sources]
def create_harvest_source(data_dict):
Binary file ckanext/harvest/public/ckanext/harvest/images/icons/source_delete.png has changed
Binary file ckanext/harvest/public/ckanext/harvest/images/icons/source_edit.png has changed
Binary file ckanext/harvest/public/ckanext/harvest/images/icons/source_new.png has changed
Binary file ckanext/harvest/public/ckanext/harvest/images/icons/source_refresh.png has changed
Binary file ckanext/harvest/public/ckanext/harvest/images/icons/source_view.png has changed
--- a/ckanext/harvest/public/ckanext/harvest/style.css Fri May 13 16:00:36 2011 +0100
+++ b/ckanext/harvest/public/ckanext/harvest/style.css Fri May 13 17:02:18 2011 +0100
@@ -1,4 +1,11 @@
/* Harvest styles */
#new-harvest-source {
+ background: transparent url("images/icons/source_new.png") no-repeat 0px 0px;
+ padding-left: 20px;
+ margin-bottom: 10px;
font-weight: bold;
}
+
+#harvest-sources th.action{
+ font-style: italic;
+}
--- a/ckanext/harvest/templates/index.html Fri May 13 16:00:36 2011 +0100
+++ b/ckanext/harvest/templates/index.html Fri May 13 17:02:18 2011 +0100
@@ -12,16 +12,16 @@
<div py:match="content"><div class="harvest-content"><h1>Harvesting Sources</h1>
- <a id="new-harvest-source" href="harvest/new">Add a harvesting source</a>
+ <div id="new-harvest-source"><a href="harvest/new">Add a harvesting source</a></div><py:choose><py:when test="c.sources"><table id="harvest-sources"><tr>
- <th></th>
- <th></th>
- <th></th>
+ <th class="action">View</th>
+ <th class="action">Edit</th>
+ <th class="action">Refresh</th><th>URL</th><th>Type</th><th>Active</th>
@@ -31,9 +31,9 @@
</tr><tr py:for="source in c.sources">
- <td>${h.link_to('view', 'harvest/%s' % source.id)}</td>
- <td>${h.link_to('edit', 'harvest/edit/%s' % source.id)}</td>
- <td>${h.link_to('refresh', 'harvest/refresh/%s' % source.id)}</td>
+ <td><a href="harvest/${source.id}"><img src="ckanext/harvest/images/icons/source_view.png" alt="View" title="View" /></a></td>
+ <td><a href="harvest/edit/${source.id}"><img src="ckanext/harvest/images/icons/source_edit.png" alt="Edit" title="Edit" /></a></td>
+ <td><a href="harvest/refresh/${source.id}"><img src="ckanext/harvest/images/icons/source_refresh.png" alt="Refresh" title="Refresh" /></a></td><td>${source.url}</td><td>${source.type}</td><td>${source.active}</td>
--- a/ckanext/harvest/templates/source/new_source_form.html Fri May 13 16:00:36 2011 +0100
+++ b/ckanext/harvest/templates/source/new_source_form.html Fri May 13 17:02:18 2011 +0100
@@ -25,12 +25,11 @@
<py:for each="type in types"><option value="${type}" py:attrs="{'selected': 'selected' if data.get('type', '') == type else None}" >${type}</option></py:for>
- <option value="FAKW">FAKW</option></select></dd><dd class="field_error" py:if="errors.get('type', '')">${errors.get('type', '')}</dd><dd class="instructions basic">Which type of source does the URL above represent?
- TODO: get these from the harvesters
+ <!--TODO: get these from the harvesters--><ul><li>A server's CSW interface</li><li>A Web Accessible Folder (WAF) displaying a list of GEMINI 2.1 documents</li>
@@ -42,6 +41,6 @@
<dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd></dl></fieldset>
- <input id="save" name="save" value="Save" type="submit" />
+ <input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a></form>
http://bitbucket.org/okfn/ckanext-harvest/changeset/ba375547ecdd/
changeset: r95:ba375547ecdd
branch: new-forms
user: amercader
date: 2011-05-13 18:08:21
summary: Remove references to ckanext-dgu
affected #: 3 files (446 bytes)
--- a/README.rst Fri May 13 17:02:18 2011 +0100
+++ b/README.rst Fri May 13 17:08:21 2011 +0100
@@ -36,16 +36,6 @@
paster sysadmin add harvest
-The user's API key must be defined in the CKAN
-configuration file (.ini) in the [app:main] section::
-
- ckan.harvest.api_key = 4e1dac58-f642-4e54-bbc4-3ea262271fe2
-
-The API URL used can be also defined in the ini file (it defaults to
-http://localhost:5000/)::
-
- ckan.api_url = <api_url>
-
Tests
=====
@@ -100,6 +90,12 @@
paster harvester sources --config=../ckan/development.ini
+The CKAN haverster
+==================
+
+TODO
+
+
The harvesting interface
========================
--- a/pip-requirements.txt Fri May 13 17:02:18 2011 +0100
+++ b/pip-requirements.txt Fri May 13 17:08:21 2011 +0100
@@ -3,8 +3,3 @@
# to suit the packaging system.
carrot==0.10.1
-
-# These are other dependencies to bear in mind:
-
-# -e hg+https://bitbucket.org/okfn/ckanext-dgu@default#egg=ckanext-dgu
-# -e hg+https://bitbucket.org/okfn/ckanext-csw@default#egg=ckanext-csw
--- a/setup.py Fri May 13 17:02:18 2011 +0100
+++ b/setup.py Fri May 13 17:08:21 2011 +0100
@@ -6,7 +6,7 @@
setup(
name='ckanext-harvest',
version=version,
- description="CSW harvesting plugin for CKAN",
+ description="Harvesting interface plugin for CKAN",
long_description="""\
""",
classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
http://bitbucket.org/okfn/ckanext-harvest/changeset/7b960e47ee31/
changeset: r96:7b960e47ee31
branch: new-forms
user: amercader
date: 2011-05-13 19:39:36
summary: Add a new info method to the harvester interface so implementations can provide details. Use this to build the WUI form
affected #: 11 files (2.0 KB)
--- a/README.rst Fri May 13 17:08:21 2011 +0100
+++ b/README.rst Fri May 13 18:39:36 2011 +0100
@@ -9,7 +9,7 @@
============
The harvest extension uses Message Queuing to handle the different gather
-stages.
+stages.
You will need to install the RabbitMQ server::
@@ -23,12 +23,12 @@
Configuration
=============
-Run the following command (in the ckanext-harvest directory) to create
+Run the following command (in the ckanext-harvest directory) to create
the necessary tables in the database::
paster harvester initdb --config=../ckan/development.ini
-The extension needs a user with sysadmin privileges to perform the
+The extension needs a user with sysadmin privileges to perform the
harvesting jobs. You can create such a user running these two commands in
the ckan directory::
@@ -53,25 +53,25 @@
Command line interface
======================
-The following operations can be run from the command line using the
+The following operations can be run from the command line using the
``paster harvester`` command::
harvester initdb
- Creates the necessary tables in the database
- harvester source {url} {type} [{active}] [{user-id}] [{publisher-id}]
+ harvester source {url} {type} [{active}] [{user-id}] [{publisher-id}]
- create new harvest source
harvester rmsource {id}
- remove (inactivate) a harvester source
- harvester sources [all]
+ harvester sources [all]
- lists harvest sources
If 'all' is defined, it also shows the Inactive sources
harvester job {source-id}
- create new harvest job
-
+
harvester jobs
- lists harvest jobs
@@ -83,9 +83,9 @@
harvester fetch_consumer
- starts the consumer for the fetching queue
-
+
The commands should be run from the ckanext-harvest directory and expect
-a development.ini file to be present. Most of the time you will specify
+a development.ini file to be present. Most of the time you will specify
the config explicitly though::
paster harvester sources --config=../ckan/development.ini
@@ -103,18 +103,18 @@
operations. The harvesting process takes place on three stages:
1. The **gather** stage compiles all the resource identifiers that need to
- be fetched in the next stage (e.g. in a CSW server, it will perform a
+ be fetched in the next stage (e.g. in a CSW server, it will perform a
`GetRecords` operation).
2. The **fetch** stage gets the contents of the remote objects and stores
- them in the database (e.g. in a CSW server, it will perform n
+ them in the database (e.g. in a CSW server, it will perform n
`GetRecordById` operations).
3. The **import** stage performs any necessary actions on the fetched
resource (generally creating a CKAN package, but it can be anything the
extension needs).
-Plugins willing to implement the harvesting interface must provide the
+Plugins willing to implement the harvesting interface must provide the
following methods::
from ckan.plugins.core import SingletonPlugin, implements
@@ -126,17 +126,32 @@
'''
implements(IHarvester)
- def get_type(self):
+ def info(self):
'''
- Plugins must provide this method, which will return a string with the
- Harvester type implemented by the plugin (e.g ``CSW``,``INSPIRE``, etc).
- This will ensure that they only receive Harvest Jobs and Objects
- relevant to them.
+ Harvesting implementations must provide this method, which will return a
+ dictionary containing different descriptors of the harvester. The
+ returned dictionary should contain:
- returns: A string with the harvester type
+ * name: machine-readable name. This will be the value stored in the
+ database, and the one used by ckanext-harvest to call the appropiate
+ harvester.
+ * title: human-readable name. This will appear in the form's select box
+ in the WUI.
+ * description: a small description of what the harvester does. This will
+ appear on the form as a guidance to the user.
+
+ A complete example may be::
+
+ {
+ 'name': 'csw',
+ 'title': 'CSW Server',
+ 'description': 'A server that implements OGC's Catalog Service
+ for the Web (CSW) standard'
+ }
+
+ returns: A dictionary with the harvester descriptors
'''
-
def gather_stage(self, harvest_job):
'''
The gather stage will recieve a HarvestJob object and will be
@@ -172,7 +187,7 @@
'''
The import stage will receive a HarvestObject object and will be
responsible for:
- - performing any necessary action with the fetched object (e.g
+ - performing any necessary action with the fetched object (e.g
create a CKAN package).
Note: if this stage creates or updates a package, a reference
to the package should be added to the HarvestObject.
@@ -196,7 +211,7 @@
The harvesting extension uses two different queues, one that handles the
gathering and another one that handles the fetching and importing. To start
-the consumers run the following command from the ckanext-harvest directory
+the consumers run the following command from the ckanext-harvest directory
(make sure you have your python environment activated)::
paster harvester gather_consumer --config=../ckan/development.ini
--- a/ckanext/harvest/controllers/view.py Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/controllers/view.py Fri May 13 18:39:36 2011 +0100
@@ -9,7 +9,7 @@
from ckanext.harvest.logic.schema import harvest_source_form_schema
from ckanext.harvest.lib import create_harvest_source, edit_harvest_source, \
get_harvest_source, get_harvest_sources, \
- create_harvest_job, get_registered_harvesters_types
+ create_harvest_job, get_registered_harvesters_info
import logging
log = logging.getLogger(__name__)
@@ -39,7 +39,7 @@
errors = errors or {}
error_summary = error_summary or {}
#TODO: Use new description interface to build the types select and descriptions
- vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'types': get_registered_harvesters_types()}
+ vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
c.form = render('source/new_source_form.html', extra_vars=vars)
return render('source/new.html')
@@ -61,7 +61,7 @@
abort(400, 'Integrity Error')
except ValidationError,e:
errors = e.error_dict
- error_summary = e.error_summary if 'error_summary' in e else None
+ error_summary = e.error_summary if hasattr(e,'error_summary') else None
return self.new(data_dict, errors, error_summary)
def edit(self, id, data = None,errors = None, error_summary = None):
@@ -79,7 +79,7 @@
errors = errors or {}
error_summary = error_summary or {}
#TODO: Use new description interface to build the types select and descriptions
- vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'types': get_registered_harvesters_types()}
+ vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
c.form = render('source/new_source_form.html', extra_vars=vars)
return render('source/edit.html')
@@ -99,7 +99,7 @@
abort(404, _('Harvest Source not found'))
except ValidationError,e:
errors = e.error_dict
- error_summary = e.error_summary if 'error_summary' in e else None
+ error_summary = e.error_summary if hasattr(e,'error_summary') else None
return self.edit(id,data_dict, errors, error_summary)
def _check_data_dict(self, data_dict):
--- a/ckanext/harvest/harvesters.py Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/harvesters.py Fri May 13 18:39:36 2011 +0100
@@ -75,8 +75,12 @@
err.save()
log.error(message)
- def get_type(self):
- return 'CKAN'
+ def info(self):
+ return {
+ 'name': 'ckan',
+ 'title': 'CKAN',
+ 'description': 'Harvests remote CKAN instances'
+ }
def gather_stage(self,harvest_job):
log.debug('In CKANHarvester gather_stage')
--- a/ckanext/harvest/interfaces.py Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/interfaces.py Fri May 13 18:39:36 2011 +0100
@@ -6,17 +6,32 @@
'''
- def get_type(self):
+ def info(self):
'''
- Plugins must provide this method, which will return a string with the
- Harvester type implemented by the plugin (e.g ``CSW``,``INSPIRE``, etc).
- This will ensure that they only receive Harvest Jobs and Objects
- relevant to them.
+ Harvesting implementations must provide this method, which will return a
+ dictionary containing different descriptors of the harvester. The
+ returned dictionary should contain:
- returns: A string with the harvester type
+ * name: machine-readable name. This will be the value stored in the
+ database, and the one used by ckanext-harvest to call the appropiate
+ harvester.
+ * title: human-readable name. This will appear in the form's select box
+ in the WUI.
+ * description: a small description of what the harvester does. This will
+ appear on the form as a guidance to the user.
+
+ A complete example may be::
+
+ {
+ 'name': 'csw',
+ 'title': 'CSW Server',
+ 'description': 'A server that implements OGC's Catalog Service
+ for the Web (CSW) standard'
+ }
+
+ returns: A dictionary with the harvester descriptors
'''
-
def gather_stage(self, harvest_job):
'''
The gather stage will recieve a HarvestJob object and will be
@@ -55,7 +70,7 @@
'''
The import stage will receive a HarvestObject object and will be
responsible for:
- - performing any necessary action with the fetched object (e.g
+ - performing any necessary action with the fetched object (e.g
create a CKAN package).
Note: if this stage creates or updates a package, a reference
to the package should be added to the HarvestObject.
--- a/ckanext/harvest/lib/__init__.py Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/lib/__init__.py Fri May 13 18:39:36 2011 +0100
@@ -196,7 +196,6 @@
return field_name.replace('_', ' ')
def _error_summary(error_dict):
-
error_summary = {}
for key, error in error_dict.iteritems():
error_summary[_prettify(key)] = error[0]
@@ -373,7 +372,7 @@
if obj.guid != last_obj_guid:
imported_objects.append(obj)
for harvester in PluginImplementations(IHarvester):
- if harvester.get_type() == obj.job.source.type:
+ if harvester.info()['name'] == obj.job.source.type:
if hasattr(harvester,'force_import'):
harvester.force_import = True
harvester.import_stage(obj)
@@ -381,9 +380,14 @@
return imported_objects
-def get_registered_harvesters_types():
+def get_registered_harvesters_info():
# TODO: Use new description interface when implemented
- available_types = []
+ available_harvesters = []
for harvester in PluginImplementations(IHarvester):
- available_types.append(harvester.get_type())
- return available_types
+ info = harvester.info()
+ if not info or 'name' not in info:
+ log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester))
+ continue
+ available_harvesters.append(info)
+
+ return available_harvesters
--- a/ckanext/harvest/logic/validators.py Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/logic/validators.py Fri May 13 18:39:36 2011 +0100
@@ -66,7 +66,12 @@
# Get all the registered harvester types
available_types = []
for harvester in PluginImplementations(IHarvester):
- available_types.append(harvester.get_type())
+ info = harvester.info()
+ if not info or 'name' not in info:
+ log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester))
+ continue
+ available_types.append(info['name'])
+
if not value in available_types:
raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value)
--- a/ckanext/harvest/public/ckanext/harvest/style.css Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/public/ckanext/harvest/style.css Fri May 13 18:39:36 2011 +0100
@@ -9,3 +9,7 @@
#harvest-sources th.action{
font-style: italic;
}
+
+.harvester-title{
+ font-weight: bold;
+}
--- a/ckanext/harvest/queue.py Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/queue.py Fri May 13 18:39:36 2011 +0100
@@ -77,7 +77,7 @@
# matches
harvester_found = False
for harvester in PluginImplementations(IHarvester):
- if harvester.get_type() == job.source.type:
+ if harvester.info()['name'] == job.source.type:
harvester_found = True
# Get a list of harvest object ids from the plugin
job.gather_started = datetime.datetime.now()
@@ -123,7 +123,7 @@
# the Harvester interface, only if the source type
# matches
for harvester in PluginImplementations(IHarvester):
- if harvester.get_type() == obj.source.type:
+ if harvester.info()['name'] == obj.source.type:
# See if the plugin can fetch the harvest object
obj.fetch_started = datetime.datetime.now()
--- a/ckanext/harvest/templates/source/edit.html Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/templates/source/edit.html Fri May 13 18:39:36 2011 +0100
@@ -8,6 +8,7 @@
<py:def function="body_class">hide-sidebar</py:def><py:def function="optional_head"><link rel="stylesheet" href="${g.site_url}/css/forms.css" type="text/css" media="screen, print" />
+ <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" /></py:def><div py:match="content">
--- a/ckanext/harvest/templates/source/new.html Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/templates/source/new.html Fri May 13 18:39:36 2011 +0100
@@ -8,6 +8,7 @@
<py:def function="body_class">hide-sidebar</py:def><py:def function="optional_head"><link rel="stylesheet" href="${g.site_url}/css/forms.css" type="text/css" media="screen, print" />
+ <link type="text/css" rel="stylesheet" media="all" href="/ckanext/harvest/style.css" /></py:def><div py:match="content">
--- a/ckanext/harvest/templates/source/new_source_form.html Fri May 13 17:08:21 2011 +0100
+++ b/ckanext/harvest/templates/source/new_source_form.html Fri May 13 18:39:36 2011 +0100
@@ -22,18 +22,17 @@
<dt><label class="field_req" for="type">Source Type *</label></dt><dd><select id="type" name="type">
- <py:for each="type in types">
- <option value="${type}" py:attrs="{'selected': 'selected' if data.get('type', '') == type else None}" >${type}</option>
+ <py:for each="harvester in harvesters">
+ <option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None}" >${harvester.title}</option></py:for></select></dd><dd class="field_error" py:if="errors.get('type', '')">${errors.get('type', '')}</dd><dd class="instructions basic">Which type of source does the URL above represent?
- <!--TODO: get these from the harvesters-->
- <ul>
- <li>A server's CSW interface</li>
- <li>A Web Accessible Folder (WAF) displaying a list of GEMINI 2.1 documents</li>
- <li>A single GEMINI 2.1 document</li>
+ <ul>
+ <py:for each="harvester in harvesters">
+ <li><span class="harvester-title">${harvester.title}</span>: ${harvester.description}</li>
+ </py:for></ul></dd><dt><label class="field_opt" for="description">Description</label></dt>
Repository URL: https://bitbucket.org/okfn/ckanext-harvest/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list