[ckan-changes] commit/ckanext-harvest: 3 new changesets

Bitbucket commits-noreply at bitbucket.org
Tue Jun 7 12:37:09 UTC 2011


3 new changesets in ckanext-harvest:

http://bitbucket.org/okfn/ckanext-harvest/changeset/e8d67a100d96/
changeset:   e8d67a100d96
branches:    
user:        amercader
date:        2011-06-07 12:58:35
summary:     Reverting previous changeset, as it conflicts with dgu_form_api
affected #:  1 file (2 bytes)

--- a/ckanext/harvest/plugin.py	Mon Jun 06 10:16:34 2011 +0200
+++ b/ckanext/harvest/plugin.py	Tue Jun 07 11:58:35 2011 +0100
@@ -42,10 +42,10 @@
         template_dir = os.path.join(here, 'templates')
         public_dir = os.path.join(here, 'public')
         if config.get('extra_template_paths'):
-            config['extra_template_paths'] += ', ' + template_dir
+            config['extra_template_paths'] += ',' + template_dir
         else:
             config['extra_template_paths'] = template_dir
         if config.get('extra_public_paths'):
-            config['extra_public_paths'] += ', ' + public_dir
+            config['extra_public_paths'] += ',' + public_dir
         else:
             config['extra_public_paths'] = public_dir


http://bitbucket.org/okfn/ckanext-harvest/changeset/6fe07de0bba8/
changeset:   6fe07de0bba8
branches:    
user:        amercader
date:        2011-06-07 13:07:53
summary:     Add a simple way for harvesters to store configuration options. If form_config_interface is Text on the info dictionary, the configuration field will be enabled in the form. Harvesters can also provide a validate_config method.
affected #:  7 files (2.4 KB)

--- a/README.rst	Tue Jun 07 11:58:35 2011 +0100
+++ b/README.rst	Tue Jun 07 12:07:53 2011 +0100
@@ -139,6 +139,10 @@
           in the WUI.
         * description: a small description of what the harvester does. This will
           appear on the form as a guidance to the user.
+        * form_config_interface [optional]: Harvesters willing to store configuration
+          values in the database must provide this key. The only supported value is
+          'Text'. This will enable the configuration text box in the form. See also
+          the ``validate_config`` method.
 
         A complete example may be::
 
@@ -152,6 +156,15 @@
         returns: A dictionary with the harvester descriptors
         '''
 
+    def validate_config(self, config):
+        '''
+        Harvesters can provide this method to validate the configuration entered in the
+        form. It should return a single string, which will be stored in the database.
+        Exceptions raised will be shown in the form's error messages.
+
+        returns A string with the validated configuration options
+        '''
+
     def gather_stage(self, harvest_job):
         '''
         The gather stage will recieve a HarvestJob object and will be


--- a/ckanext/harvest/controllers/view.py	Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/controllers/view.py	Tue Jun 07 12:07:53 2011 +0100
@@ -38,7 +38,6 @@
         data = data or {}
         errors = errors or {}
         error_summary = error_summary or {}
-        #TODO: Use new description interface to build the types select and descriptions
         vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
         
         c.form = render('source/new_source_form.html', extra_vars=vars)
@@ -104,7 +103,7 @@
 
     def _check_data_dict(self, data_dict):
         '''Check if the return data is correct'''
-        surplus_keys_schema = ['id','publisher_id','user_id','active','save']
+        surplus_keys_schema = ['id','publisher_id','user_id','active','save','config']
 
         schema_keys = harvest_source_form_schema().keys()
         keys_in_schema = set(schema_keys) - set(surplus_keys_schema)


--- a/ckanext/harvest/harvesters/ckanharvester.py	Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/harvesters/ckanharvester.py	Tue Jun 07 12:07:53 2011 +0100
@@ -44,9 +44,19 @@
         return {
             'name': 'ckan',
             'title': 'CKAN',
-            'description': 'Harvests remote CKAN instances'
+            'description': 'Harvests remote CKAN instances',
+            'form_config_interface':'Text'
         }
 
+    def validate_config(self,config):
+        try:
+            config_obj = json.loads(config)
+        except ValueError,e:
+            raise e
+
+        return config
+
+
     def gather_stage(self,harvest_job):
         log.debug('In CKANHarvester gather_stage (%s)' % harvest_job.source.url)
         get_all_packages = True
@@ -64,7 +74,7 @@
         base_url = harvest_job.source.url.rstrip('/')
         base_rest_url = base_url + self._get_rest_api_offset()
         base_search_url = base_url + self._get_search_api_offset()
-        
+
         if previous_job and not previous_job.gather_errors:
             get_all_packages = False
 
@@ -126,7 +136,7 @@
                 return object_ids
 
             else:
-               self._save_gather_error('No packages received for URL: %s' % url, 
+               self._save_gather_error('No packages received for URL: %s' % url,
                        harvest_job)
                return None
         except Exception, e:
@@ -159,7 +169,7 @@
             return False
 
         if harvest_object.content is None:
-            self._save_object_error('Empty content for object %s' % harvest_object.id, 
+            self._save_object_error('Empty content for object %s' % harvest_object.id,
                     harvest_object, 'Import')
             return False
 
@@ -167,7 +177,7 @@
             package_dict = json.loads(harvest_object.content)
             return self._create_or_update_package(package_dict,harvest_object)
         except ValidationError,e:
-            self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict), 
+            self._save_object_error('Invalid package with GUID %s: %r' % (harvest_object.guid, e.error_dict),
                     harvest_object, 'Import')
         except Exception, e:
             self._save_object_error('%r'%e,harvest_object,'Import')


--- a/ckanext/harvest/lib/__init__.py	Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/lib/__init__.py	Tue Jun 07 12:07:53 2011 +0100
@@ -228,7 +228,7 @@
     source.url = data['url']
     source.type = data['type']
 
-    opt = ['active','description','user_id','publisher_id']
+    opt = ['active','description','user_id','publisher_id','config']
     for o in opt:
         if o in data and data[o] is not None:
             source.__setattr__(o,data[o])
@@ -245,14 +245,14 @@
         raise NotFound('Harvest source %s does not exist' % source_id)
 
     # Add source id to the dict, as some validators will need it
-    data_dict["id"] = source.id
+    data_dict['id'] = source.id
 
     data, errors = validate(data_dict, schema)
     if errors:
         Session.rollback()
         raise ValidationError(errors,_error_summary(errors))
 
-    fields = ['url','type','active','description','user_id','publisher_id']
+    fields = ['url','type','active','description','user_id','publisher_id','config']
     for f in fields:
         if f in data_dict and data_dict[f] is not None and data_dict[f] != '':
             source.__setattr__(f,data_dict[f])
@@ -381,13 +381,13 @@
     return imported_objects
 
 def get_registered_harvesters_info():
-    # TODO: Use new description interface when implemented
     available_harvesters = []
     for harvester in PluginImplementations(IHarvester):
         info = harvester.info()
         if not info or 'name' not in info:
             log.error('Harvester %r does not provide the harvester name in the info response' % str(harvester))
             continue
+        info['show_config'] = (info.get('form_config_interface','') == 'Text')
         available_harvesters.append(info)
 
     return available_harvesters


--- a/ckanext/harvest/logic/schema.py	Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/logic/schema.py	Tue Jun 07 12:07:53 2011 +0100
@@ -7,7 +7,8 @@
 
 from ckanext.harvest.logic.validators import harvest_source_id_exists, \
                                             harvest_source_url_validator, \
-                                            harvest_source_type_exists
+                                            harvest_source_type_exists, \
+                                            harvest_source_config_validator
 
 def default_harvest_source_schema():
 
@@ -19,7 +20,7 @@
         'active': [ignore_missing],
         'user_id': [ignore_missing],
         'publisher_id': [ignore_missing],
-        #'config'
+        'config': [harvest_source_config_validator]
     }
 
     return schema


--- a/ckanext/harvest/logic/validators.py	Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/logic/validators.py	Tue Jun 07 12:07:53 2011 +0100
@@ -77,3 +77,20 @@
         raise Invalid('Unknown harvester type: %s. Have you registered a harvester for this type?' % value)
     
     return value
+
+def harvest_source_config_validator(key,data,errors,context):
+    harvester_type = data.get(('type',),'')
+    for harvester in PluginImplementations(IHarvester):
+        info = harvester.info()
+        if info['name'] == harvester_type:
+            if info.get('form_config_interface','') != 'Text':
+                raise Invalid('This harvester does not allow configuration options: %s' % harvester_type)
+
+            if harvester.validate_config:
+                try:
+                    return harvester.validate_config(data[key])
+                except Exception, e:
+                    raise Invalid('Error parsing the configuration options: %s' % str(e))
+            else:
+                return data[key]
+


--- a/ckanext/harvest/templates/source/new_source_form.html	Tue Jun 07 11:58:35 2011 +0100
+++ b/ckanext/harvest/templates/source/new_source_form.html	Tue Jun 07 12:07:53 2011 +0100
@@ -12,18 +12,18 @@
 </ul></div>
 
-    <fieldset> 
-    <legend>Details</legend> 
-        <dl> 
-            <dt><label class="field_req" for="url">URL for source of metadata *</label></dt> 
+    <fieldset>
+    <legend>Details</legend>
+        <dl>
+            <dt><label class="field_req" for="url">URL for source of metadata *</label></dt><dd><input id="url" name="url" size="80" type="text" value="${data.get('url', '')}" /></dd><dd class="field_error" py:if="errors.get('url', '')">${errors.get('url', '')}</dd>
-            <dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd> 
-            <dt><label class="field_req" for="type">Source Type *</label></dt> 
-            <dd> 
+            <dd class="instructions basic">This should include the <tt>http://</tt> part of the URL</dd>
+            <dt><label class="field_req" for="type">Source Type *</label></dt>
+            <dd><select id="type" name="type"><py:for each="harvester in harvesters">
-                     <option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None}" >${harvester.title}</option>
+                     <option value="${harvester.name}" py:attrs="{'selected': 'selected' if data.get('type', '') == harvester.name else None, 'data-config': harvester.show_config}" >${harvester.title}</option></py:for></select></dd>
@@ -33,13 +33,24 @@
                     <py:for each="harvester in harvesters"><li><span class="harvester-title">${harvester.title}</span>: ${harvester.description}</li></py:for>
-                </ul>         
-            </dd> 
-            <dt><label class="field_opt" for="description">Description</label></dt> 
-            <dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd> 
-            <dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd> 
-        </dl> 
-    </fieldset> 
-    <input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a> 
-
+                </ul>
+            </dd>
+            <dt><label class="field_opt" for="description">Description</label></dt>
+            <dd><textarea id="description" name="description" cols="30" rows="2" style="height:75px">${data.get('description', '')}</textarea></dd>
+            <dd class="instructions basic">You can add your own notes here about what the URL above represents to remind you later.</dd>
+            <dt><label class="field_opt" for="config">Configuration</label></dt>
+            <dd><textarea id="config" name="config" cols="30" rows="2" style="height:75px">${data.get('config', '')}</textarea></dd>
+        </dl>
+    </fieldset>
+    <input id="save" name="save" value="Save" type="submit" /> or <a href="/harvest">Return to the harvest sources list</a>
+    <script type="text/javascript">
+    $(document).ready(function() {
+       $("#type").change(function(){
+            var show_config = ($("#type option:selected").attr("data-config") == "True");
+            if (!show_config) $("#config").val("");
+            $("#config").attr("disabled", !show_config);
+        });
+       $("#type").trigger("change");
+    });
+    </script></form>


http://bitbucket.org/okfn/ckanext-harvest/changeset/ea683b4e5435/
changeset:   ea683b4e5435
branches:    
user:        amercader
date:        2011-06-07 14:35:11
summary:     Load config in the CKAN harvester
affected #:  1 file (496 bytes)

--- a/ckanext/harvest/harvesters/ckanharvester.py	Tue Jun 07 12:07:53 2011 +0100
+++ b/ckanext/harvest/harvesters/ckanharvester.py	Tue Jun 07 13:35:11 2011 +0100
@@ -18,6 +18,7 @@
     '''
     A Harvester for CKAN instances
     '''
+    config = None
 
     #TODO: check different API versions
     api_version = '2'
@@ -40,6 +41,13 @@
         except Exception, e:
             raise e
 
+    def _set_config(self,config_str):
+        if config_str:
+            self.config = json.loads(config_str)
+            log.debug('Using config: %r', self.config)
+        else:
+            self.config = {}
+
     def info(self):
         return {
             'name': 'ckan',
@@ -62,6 +70,9 @@
         get_all_packages = True
         package_ids = []
 
+        if not self.config:
+            self._set_config(harvest_job.source.config)
+
         # Check if this source has been harvested before
         previous_job = Session.query(HarvestJob) \
                         .filter(HarvestJob.source==harvest_job.source) \
@@ -145,6 +156,10 @@
 
     def fetch_stage(self,harvest_object):
         log.debug('In CKANHarvester fetch_stage')
+
+        if not self.config:
+            self._set_config(harvest_object.job.source.config)
+
         # Get source URL
         url = harvest_object.source.url.rstrip('/')
         url = url + self._get_rest_api_offset() + '/package/' + harvest_object.guid
@@ -173,6 +188,9 @@
                     harvest_object, 'Import')
             return False
 
+        if not self.config:
+           self._set_config(harvest_object.job.source.config)
+
         try:
             package_dict = json.loads(harvest_object.content)
             return self._create_or_update_package(package_dict,harvest_object)

Repository URL: https://bitbucket.org/okfn/ckanext-harvest/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list