[ckan-changes] commit/ckanext-harvest: 3 new changesets
Bitbucket
commits-noreply at bitbucket.org
Tue Jun 7 12:37:09 UTC 2011
3 new changesets in ckanext-harvest:
http://bitbucket.org/okfn/ckanext-harvest/changeset/e8d67a100d96/
changeset: e8d67a100d96
branches:
user: amercader
date: 2011-06-07 12:58:35
summary: Reverting previous changeset, as it conflicts with dgu_form_api
affected #: 1 file (2 bytes)
--- a/ckanext/harvest/plugin.py Mon Jun 06 10:16:34 2011 +0200
+++ b/ckanext/harvest/plugin.py Tue Jun 07 11:58:35 2011 +0100
@@ -42,10 +42,10 @@
template_dir = os.path.join(here, 'templates')
public_dir = os.path.join(here, 'public')
if config.get('extra_template_paths'):
- config['extra_template_paths'] += ', ' + template_dir
+ config['extra_template_paths'] += ',' + template_dir
else:
config['extra_template_paths'] = template_dir
if config.get('extra_public_paths'):
- config['extra_public_paths'] += ', ' + public_dir
+ config['extra_public_paths'] += ',' + public_dir
else:
config['extra_public_paths'] = public_dir
http://bitbucket.org/okfn/ckanext-harvest/changeset/6fe07de0bba8/
changeset: 6fe07de0bba8
branches:
user: amercader
date: 2011-06-07 13:07:53
summary: Add a simple way for harvesters to store configuration options. If form_config_interface is Text on the info dictionary, the configuration field will be enabled in the form. Harvesters can also provide a validate_config method.
affected #: 7 files (2.4 KB)
--- a/README.rst Tue Jun 07 11:58:35 2011 +0100
+++ b/README.rst Tue Jun 07 12:07:53 2011 +0100
@@ -139,6 +139,10 @@
in the WUI.
* description: a small description of what the harvester does. This will
appear on the form as a guidance to the user.
+ * form_config_interface [optional]: Harvesters willing to store configuration
+ values in the database must provide this key. The only supported value is
+ 'Text'. This will enable the configuration text box in the form. See also
+ the ``validate_config`` method.
A complete example may be::
@@ -152,6 +156,15 @@
returns: A dictionary with the harvester descriptors
'''
+ def validate_config(self, config):
+ '''
+ Harvesters can provide this method to validate the configuration entered in the
+ form. It should return a single string, which will be stored in the database.
+ Exceptions raised will be shown in the form's error messages.
+
+ returns A string with the validated configuration options
+ '''
+
def gather_stage(self, harvest_job):
'''
The gather stage will recieve a HarvestJob object and will be
--- a/ckanext/harvest/controllers/view.py Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/controllers/view.py Tue Jun 07 12:07:53 2011 +0100
@@ -38,7 +38,6 @@
data = data or {}
errors = errors or {}
error_summary = error_summary or {}
- #TODO: Use new description interface to build the types select and descriptions
vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
c.form = render('source/new_source_form.html', extra_vars=vars)
@@ -104,7 +103,7 @@
def _check_data_dict(self, data_dict):
'''Check if the return data is correct'''
- surplus_keys_schema = ['id','publisher_id','user_id','active','save']
+ surplus_keys_schema = ['id','publisher_id','user_id','active','save','config']
schema_keys = harvest_source_form_schema().keys()
keys_in_schema = set(schema_keys) - set(surplus_keys_schema)
--- a/ckanext/harvest/harvesters/ckanharvester.py Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/harvesters/ckanharvester.py Tue Jun 07 12:07:53 2011 +0100
@@ -44,9 +44,19 @@
return {
'name': 'ckan',
'title': 'CKAN',
- 'description': 'Harvests remote CKAN instances'
+ 'description': 'Harvests remote CKAN instances',
+ 'form_config_interface':'Text'
}
+ def validate_config(self,config):
+ try:
+ config_obj = json.loads(config)
+ except ValueError,e:
+ raise e
+
+ return config
+
+
def gather_stage(self,harvest_job):
log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
get_all_packages = True
@@ -64,7 +74,7 @@
base_url = harvest_job.source.url.rstrip('/')
base_rest_url = base_url + self._get_rest_api_offset()
base_search_url = base_url + self._get_search_api_offset()
-
+
if previous_job and not previous_job.gather_errors:
get_all_packages = False
@@ -126,7 +136,7 @@
return object_ids
else:
- self._save_gather_error('No packages received for URL: %s' % url,
+ self._save_gather_error('No packages received for URL: %s' % url,
harvest_job)
return None
except Exception, e:
@@ -159,7 +169,7 @@
return False
if harvest_object.content is None:
- self._save_object_error('Empty content for object %s' % harvest_object.id,
+ self._save_object_error('Empty content for object %s' % harvest_object.id,
harvest_object, 'Import')
return False
@@ -167,7 +177,7 @@
package_dict = json.loads(harvest_object.content)
return self._create_or_update_package(package_dict,harvest_object)
except ValidationError,e:
- self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
+ self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
harvest_object, 'Import')
except Exception, e:
self._save_object_error('%r'%e,harvest_object,'Import')
--- a/ckanext/harvest/lib/__init__.py Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/lib/__init__.py Tue Jun 07 12:07:53 2011 +0100
@@ -228,7 +228,7 @@
source.url = data['url']
source.type = data['type']
- opt = ['active','description','user_id','publisher_id']
+ opt = ['active','description','user_id','publisher_id','config']
for o in opt:
if o in data and data[o] is not None:
source.__setattr__(o,data[o])
@@ -245,14 +245,14 @@
raise NotFound('Harvest source %s does not exist' % source_id)
# Add source id to the dict, as some validators will need it
- data_dict["id"] = source.id
+ data_dict['id'] = source.id
data, errors = validate(data_dict, schema)
if errors:
Session.rollback()
raise ValidationError(errors,_error_summary(errors))
- fields = ['url','type','active','description','user_id','publisher_id']
+ fields = ['url','type','active','description','user_id','publisher_id','config']
for f in fields:
if f in data_dict and data_dict[f] is not None and data_dict[f] != '':
source.__setattr__(f,data_dict[f])
@@ -381,13 +381,13 @@
return imported_objects
def get_registered_harvesters_info():
- # TODO: Use new description interface when implemented
available_harvesters = []
for harvester in PluginImplementations(IHarvester):
info = harvester.info()
if not info or 'name' not in info:
log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester))
continue
+ info['show_config'] = (info.get('form_config_interface','') == 'Text')
available_harvesters.append(info)
return available_harvesters
--- a/ckanext/harvest/logic/schema.py Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/logic/schema.py Tue Jun 07 12:07:53 2011 +0100
@@ -7,7 +7,8 @@
from ckanext.harvest.logic.validators import harvest_source_id_exists, \
harvest_source_url_validator, \
- harvest_source_type_exists
+ harvest_source_type_exists, \
+ harvest_source_config_validator
def default_harvest_source_schema():
@@ -19,7 +20,7 @@
'active': [ignore_missing],
'user_id': [ignore_missing],
'publisher_id': [ignore_missing],
- #'config'
+ 'config': [harvest_source_config_validator]
}
return schema
--- a/ckanext/harvest/logic/validators.py Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/logic/validators.py Tue Jun 07 12:07:53 2011 +0100
@@ -77,3 +77,20 @@
raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value)
return value
+
+def harvest_source_config_validator(key,data,errors,context):
+ harvester_type = data.get(('type',),'')
+ for harvester in PluginImplementations(IHarvester):
+ info = harvester.info()
+ if info['name'] == harvester_type:
+ if info.get('form_config_interface','') != 'Text':
+ raise Invalid('This harvester does not allow configuration options: %s' % harvester_type)
+
+ if harvester.validate_config:
+ try:
+ return harvester.validate_config(data[key])
+ except Exception, e:
+ raise Invalid('Error parsing the configuration options: %s' % str(e))
+ else:
+ return data[key]
+
--- a/ckanext/harvest/templates/source/new_source_form.html Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/templates/source/new_source_form.html Tue Jun 07 12:07:53 2011 +0100
@@ -12,18 +12,18 @@
</ul></div>
- <fieldset>
- <legend>Details</legend>
- <dl>
- <dt><label class="field_req" for="url">URL for source of metadata *</label></dt>
+ <fieldset>
+ <legend>Details</legend>
+ <dl>
+ <dt><label class="field_req" for="url">URL for source of metadata *</label></dt><dd><input id="url" name="url" size="80" type="text" value="${data.get('url', '')}" /></dd><dd class="field_error" py:if="errors.get('url', '')">${errors.get('url', '')}</dd>
- <dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
- <dt><label class="field_req" for="type">Source Type *</label></dt>
- <dd>
+ <dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
+ <dt><label class="field_req" for="type">Source Type *</label></dt>
+ <dd><select id="type" name="type"><py:for each="harvester in harvesters">
- <option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None}" >${harvester.title}</option>
+ <option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None, 'data-config': harvester.show_config}" >${harvester.title}</option></py:for></select></dd>
@@ -33,13 +33,24 @@
<py:for each="harvester in harvesters"><li><span class="harvester-title">${harvester.title}</span>: ${harvester.description}</li></py:for>
- </ul>
- </dd>
- <dt><label class="field_opt" for="description">Description</label></dt>
- <dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
- <dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
- </dl>
- </fieldset>
- <input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a>
-
+ </ul>
+ </dd>
+ <dt><label class="field_opt" for="description">Description</label></dt>
+ <dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
+ <dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
+ <dt><label class="field_opt" for="config">Configuration</label></dt>
+ <dd><textarea id="config" name="config" cols="30" rows="2" style="height:75px">${data.get('config', '')}</textarea></dd>
+ </dl>
+ </fieldset>
+ <input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a>
+ <script type="text/javascript">
+ $(document).ready(function() {
+ $("#type").change(function(){
+ var show_config = ($("#type option:selected").attr("data-config") == "True");
+ if (!show_config) $("#config").val("");
+ $("#config").attr("disabled", !show_config);
+ });
+ $("#type").trigger("change");
+ });
+ </script></form>
http://bitbucket.org/okfn/ckanext-harvest/changeset/ea683b4e5435/
changeset: ea683b4e5435
branches:
user: amercader
date: 2011-06-07 14:35:11
summary: Load config in the CKAN harvester
affected #: 1 file (496 bytes)
--- a/ckanext/harvest/harvesters/ckanharvester.py Tue Jun 07 12:07:53 2011 +0100
+++ b/ckanext/harvest/harvesters/ckanharvester.py Tue Jun 07 13:35:11 2011 +0100
@@ -18,6 +18,7 @@
'''
A Harvester for CKAN instances
'''
+ config = None
#TODO: check different API versions
api_version = '2'
@@ -40,6 +41,13 @@
except Exception, e:
raise e
+ def _set_config(self,config_str):
+ if config_str:
+ self.config = json.loads(config_str)
+ log.debug('Using config: %r', self.config)
+ else:
+ self.config = {}
+
def info(self):
return {
'name': 'ckan',
@@ -62,6 +70,9 @@
get_all_packages = True
package_ids = []
+ if not self.config:
+ self._set_config(harvest_job.source.config)
+
# Check if this source has been harvested before
previous_job = Session.query(HarvestJob) \
.filter(HarvestJob.source==harvest_job.source) \
@@ -145,6 +156,10 @@
def fetch_stage(self,harvest_object):
log.debug('In CKANHarvester fetch_stage')
+
+ if not self.config:
+ self._set_config(harvest_object.job.source.config)
+
# Get source URL
url = harvest_object.source.url.rstrip('/')
url = url + self._get_rest_api_offset() + '/package/' + harvest_object.guid
@@ -173,6 +188,9 @@
harvest_object, 'Import')
return False
+ if not self.config:
+ self._set_config(harvest_object.job.source.config)
+
try:
package_dict = json.loads(harvest_object.content)
return self._create_or_update_package(package_dict,harvest_object)
Repository URL: https://bitbucket.org/okfn/ckanext-harvest/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list