[ckan-changes] commit/ckanextiati: 2 new changesets

Bitbucket commits-noreply at bitbucket.org
Thu Oct 27 16:29:59 UTC 2011


2 new commits in ckanextiati:


https://bitbucket.org/okfn/ckanextiati/changeset/5673b305c80b/
changeset:   5673b305c80b
branch:      spreadsheet-support
user:        amercader
date:        2011-10-27 11:02:04
summary:     Set values to null if cells are empty
affected #:  1 file

diff -r 4408b0e1fe398feb381dd22ea610cc3046b6152b -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 ckanext/iati/controllers/spreadsheet.py
--- a/ckanext/iati/controllers/spreadsheet.py
+++ b/ckanext/iati/controllers/spreadsheet.py
@@ -211,20 +211,23 @@
         package = {}
         for fieldname, entity, key in self.csv_mapping:
             if fieldname in row:
+                # If value is None (empty cell), property will be set to blank
                 value = row[fieldname]
-                if value:
-                    if entity == 'groups':
-                        package['groups'] = [value]
-                    elif entity == 'resources':
-                        if not 'resources' in package:
-                           package['resources'] = [{}]
-                        package['resources'][0][key] = value
-                    elif entity == 'extras':
-                        if not 'extras' in package:
-                           package['extras'] = {}
-                        package['extras'][key] = value
-                    else:
-                        package[key] = value
+                if entity == 'groups':
+                    if not value:
+                        # This has already been checked
+                        raise ValueError('Publisher not defined')
+                    package['groups'] = [value]
+                elif entity == 'resources':
+                    if not 'resources' in package:
+                       package['resources'] = [{}]
+                    package['resources'][0][key] = value
+                elif entity == 'extras':
+                    if not 'extras' in package:
+                       package['extras'] = {}
+                    package['extras'][key] = value
+                else:
+                    package[key] = value
         return package
 
     def create_or_update_package(self, package_dict, counts = None):



https://bitbucket.org/okfn/ckanextiati/changeset/8a2d55a9d505/
changeset:   8a2d55a9d505
branch:      spreadsheet-support
user:        amercader
date:        2011-10-27 18:29:02
summary:     Add validation to CSV import. Still some validators missing. Nicer error summary.
affected #:  5 files

diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/controllers/package_iati.py
--- a/ckanext/iati/controllers/package_iati.py
+++ b/ckanext/iati/controllers/package_iati.py
@@ -13,12 +13,12 @@
                                       ignore,
                                       keep_extras,
                                      )
-from ckan.lib.navl.dictization_functions import unflatten
+from ckan.logic.validators import int_validator
 from ckan.logic.converters import convert_from_extras, convert_to_extras, date_to_db, date_to_form
-from ckan.lib.navl.dictization_functions import Missing, Invalid
-from ckan.lib.field_types import DateType, DateConvertError
 
 from ckanext.iati.lists import COUNTRIES
+from ckanext.iati.logic.validators import iati_dataset_name
+from ckanext.iati.logic.converters import convert_from_comma_list, convert_to_comma_list, checkbox_value
 
 class PackageIatiController(PackageController):
 
@@ -42,7 +42,7 @@
             'data_updated': [date_to_db, convert_to_extras,ignore_missing],
             'activity_period-from': [date_to_db, convert_to_extras,ignore_missing],
             'activity_period-to': [date_to_db, convert_to_extras,ignore_missing],
-            'activity_count': [integer,convert_to_extras,ignore_missing],
+            'activity_count': [int_validator,convert_to_extras,ignore_missing],
             'archive_file': [checkbox_value, convert_to_extras,ignore_missing],
             'verified': [checkbox_value, convert_to_extras,ignore_missing],
             'language': [convert_to_extras, ignore_missing],
@@ -107,42 +107,3 @@
         return [{'id':group.id,'name':group.name, 'title':group.title} for group in groups if group.state==model.State.ACTIVE]
 
 
-def convert_to_comma_list(value, context):
-
-    return ', '.join(json.loads(value))
-
-def convert_from_comma_list(value, context):
-
-    return [x.strip() for x in value.split(',') if len(x)]
-
-def checkbox_value(value,context):
-
-    return 'yes' if not isinstance(value, Missing) else 'no'
-
-def integer(value,context):
-
-    if not value == '':
-        try:
-            value = int(value)
-        except ValueError,e:
-            raise Invalid(str(e))
-        return value
-
-def iati_dataset_name(key,data,errors,context):
-
-    unflattened = unflatten(data)
-    value = data[key]
-    for grp in unflattened['groups']:
-        if grp['id']:
-            group_id = grp['id']
-            break
-    group = get_action('group_show')(context,{'id':group_id})
-    group_name = group['name']
-
-    parts = value.split('-')
-    code_part = parts[-1]
-    group_part = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
-    if not code_part or not group_part or not group_part == group_name:
-        errors[key].append('Dataset name does not follow the convention <publisher>-<code>: "%s" (using publisher %s)' % (value,group_name))
-
-


diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/controllers/spreadsheet.py
--- a/ckanext/iati/controllers/spreadsheet.py
+++ b/ckanext/iati/controllers/spreadsheet.py
@@ -7,30 +7,37 @@
 from ckan.lib.helpers import json
 from ckan.authz import Authorizer
 from ckan.logic import get_action, NotFound, ValidationError, NotAuthorized
+from ckan.logic.converters import date_to_db
+from ckan.logic.validators import int_validator
+from ckan.lib.navl.validators import not_empty
+from ckan.lib.navl.dictization_functions import validate
 from ckanext.iati.authz import get_user_administered_groups
 
+from ckanext.iati.logic.validators import iati_dataset_name_from_csv, file_type_validator
+from ckanext.iati.logic.converters import iso_date
+
 log = logging.getLogger(__name__)
 
+CSV_MAPPING = [
+        ('registry-publisher-id', 'groups', 'name', [not_empty]),
+        ('registry-file-id', 'package', 'name', [not_empty, iati_dataset_name_from_csv]),
+        ('title', 'package', 'title', []),
+        ('contact-email', 'package', 'author_email', []),
+        ('source-url', 'resources', 'url', []),
+        ('format', 'resources', 'format', []),
+        ('file-type','extras', 'filetype', [file_type_validator]),
+        ('recipient-country','extras', 'country', []),
+        ('activity-period-start','extras', 'activity_period-from', [iso_date]),
+        ('activity-period-end','extras', 'activity_period-to', [iso_date]),
+        ('last-updated-datetime','extras', 'data_updated', [iso_date]),
+        ('generated-datetime','extras', 'record_updated', [iso_date]),
+        ('activity-count','extras', 'activity_count', [int_validator]),
+        ('verification-status','extras', 'verified', []),
+        ('default-language','extras', 'language', [])
+        ]
 
 class CSVController(BaseController):
 
-    csv_mapping = [
-            ('registry-publisher-id', 'groups', 'name'),
-            ('registry-file-id', 'package', 'name'),
-            ('title', 'package', 'title'),
-            ('contact-email', 'package', 'author_email'),
-            ('source-url', 'resources', 'url'),
-            ('format', 'resources', 'format'),
-            ('file-type','extras', 'filetype'),
-            ('recipient-country','extras', 'country'),
-            ('activity-period-start','extras', 'activity_period-from'),
-            ('activity-period-end','extras', 'activity_period-to'),
-            ('last-updated-datetime','extras', 'data_updated'),
-            ('generated-datetime','extras', 'record_updated'),
-            ('activity-count','extras', 'activity_count'),
-            ('verification-status','extras', 'verified'),
-            ('default-language','extras', 'language')
-            ]
 
     def __before__(self, action, **params):
         super(CSVController,self).__before__(action, **params)
@@ -104,6 +111,7 @@
             added, updated, errors = self.read_csv_file(csv_file)
             c.added = added
             c.updated = updated
+
             c.errors = errors
 
             log.info('CSV import finished: file %s, %i added, %i updated, %i errors' % \
@@ -126,9 +134,9 @@
 
         output = ''
         try:
-            fieldnames = [n[0] for n in self.csv_mapping]
+            fieldnames = [f[0] for n in CSV_MAPPING]
             writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
-            headers = dict( (n[0],n[0]) for n in self.csv_mapping )
+            headers = dict( (f[0],f[0]) for n in CSV_MAPPING )
             writer.writerow(headers)
 
             packages.sort()
@@ -140,7 +148,7 @@
                     continue
                 if package:
                     row = {}
-                    for fieldname, entity, key in self.csv_mapping:
+                    for fieldname, entity, key in CSV_MAPPING:
                         value = None
                         if entity == 'groups':
                             if len(package['groups']):
@@ -163,7 +171,7 @@
         return output
 
     def read_csv_file(self,csv_file):
-        fieldnames = [n[0] for n in self.csv_mapping]
+        fieldnames = [f[0] for f in CSV_MAPPING]
 
         # Try to sniff the file dialect
         dialect = csv.Sniffer().sniff(csv_file.file.read(1024))
@@ -177,46 +185,46 @@
         groups= get_action('group_list')(context, {})
 
         counts = {'added': [], 'updated': []}
-        errors = []
+        errors = {}
         for i,row in enumerate(reader):
+            row_index = str(i + 1)
+            errors[row_index] = {}
             try:
-                # Check mandatory fields
-                if not row['registry-publisher-id']:
-                     raise ValueError('Publisher not defined')
-
-                if not row['registry-file-id']:
-                    raise ValueError('File id not defined')
-
-                # Check name convention
-                name = row['registry-file-id']
-                parts = name.split('-')
-                group_name = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
-                if not group_name or not group_name in groups:
-                    raise ValueError('Dataset name does not follow the convention <publisher>-<code>: "%s"' % name)
+                # We will now run the IATI specific validation, CKAN core will
+                # run the default one later on
+                schema = dict([(f[0],f[3]) for f in CSV_MAPPING])
+                row, row_errors = validate(row,schema)
+                if row_errors:
+                    for key, msgs in row_errors.iteritems():
+                        log.error('Error in row %i: %s: %s' % (i+1,key,str(msgs)))
+                        errors[row_index][key] = msgs
+                    continue
 
                 package_dict = self.get_package_dict_from_row(row)
                 self.create_or_update_package(package_dict,counts)
-            except ValueError,e:
-                msg = 'Error in row %i: %s' % (i+1,str(e))
-                log.error(msg)
-                errors.append(msg)
+
+                del errors[row_index]
+            except ValidationError,e:
+                iati_keys = dict([(f[2],f[0]) for f in CSV_MAPPING])
+                for key, msgs in e.error_dict.iteritems():
+                    iati_key = iati_keys[key]
+                    log.error('Error in row %i: %s: %s' % (i+1,iati_key,str(msgs)))
+                    errors[row_index][iati_key] = msgs
             except NotAuthorized,e:
-                msg = 'Error in row %i: Not authorized to publish to this group: %s' % (i+1,row['registry-publisher-id'])
-                log.error(msg)
-                errors.append(msg)
+                msg = 'Not authorized to publish to this group: %s' % row['registry-publisher-id']
+                log.error('Error in row %i: %s' % msg)
+                errors[row_index]['registry-publisher-id'] = [msg]
 
+        errors = sorted(errors.iteritems())
         return counts['added'], counts['updated'], errors
 
     def get_package_dict_from_row(self,row):
         package = {}
-        for fieldname, entity, key in self.csv_mapping:
+        for fieldname, entity, key, v in CSV_MAPPING:
             if fieldname in row:
                 # If value is None (empty cell), property will be set to blank
                 value = row[fieldname]
                 if entity == 'groups':
-                    if not value:
-                        # This has already been checked
-                        raise ValueError('Publisher not defined')
                     package['groups'] = [value]
                 elif entity == 'resources':
                     if not 'resources' in package:
@@ -231,40 +239,34 @@
         return package
 
     def create_or_update_package(self, package_dict, counts = None):
+
+        context = {
+            'model': model,
+            'session': model.Session,
+            'user': c.user,
+            'api_version':'1'
+        }
+
+        # Check if package exists
+        data_dict = {}
+        data_dict['id'] = package_dict['name']
         try:
+            existing_package_dict = get_action('package_show')(context, data_dict)
 
-            context = {
-                'model': model,
-                'session': model.Session,
-                'user': c.user,
-                'api_version':'1'
-            }
+            # Update package
+            log.info('Package with name "%s" exists and will be updated' % package_dict['name'])
 
-            # Check if package exists
-            data_dict = {}
-            data_dict['id'] = package_dict['name']
-            try:
-                existing_package_dict = get_action('package_show')(context, data_dict)
+            context.update({'id':existing_package_dict['id']})
+            package_dict.update({'id':existing_package_dict['id']})
+            updated_package = get_action('package_update_rest')(context, package_dict)
+            if counts:
+                counts['updated'].append(updated_package['name'])
+            log.debug('Package with name "%s" updated' % package_dict['name'])
+        except NotFound:
+            # Package needs to be created
+            log.info('Package with name "%s" does not exist and will be created' % package_dict['name'])
+            new_package = get_action('package_create_rest')(context, package_dict)
+            if counts:
+                counts['added'].append(new_package['name'])
+            log.debug('Package with name "%s" created' % package_dict['name'])
 
-                # Update package
-                log.info('Package with name "%s" exists and will be updated' % package_dict['name'])
-
-                context.update({'id':existing_package_dict['id']})
-                package_dict.update({'id':existing_package_dict['id']})
-                updated_package = get_action('package_update_rest')(context, package_dict)
-                if counts:
-                    counts['updated'].append(updated_package['name'])
-                log.debug('Package with name "%s" updated' % package_dict['name'])
-            except NotFound:
-                # Package needs to be created
-                log.info('Package with name "%s" does not exist and will be created' % package_dict['name'])
-                new_package = get_action('package_create_rest')(context, package_dict)
-                if counts:
-                    counts['added'].append(new_package['name'])
-                log.debug('Package with name "%s" created' % package_dict['name'])
-        except ValidationError,e:
-            raise ValueError(str(e))
-
-
-
-


diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/logic/converters.py
--- /dev/null
+++ b/ckanext/iati/logic/converters.py
@@ -0,0 +1,22 @@
+from ckan.lib.navl.dictization_functions import Missing
+
+def convert_to_comma_list(value, context):
+
+    return ', '.join(json.loads(value))
+
+def convert_from_comma_list(value, context):
+
+    return [x.strip() for x in value.split(',') if len(x)]
+
+def checkbox_value(value,context):
+
+    return 'yes' if not isinstance(value, Missing) else 'no'
+
+def iso_date(value,context):
+    from ckan.lib.field_types import DateType, DateConvertError
+    try:
+        value = DateType.iso_to_db(value)
+    except DateConvertError, e:
+        raise Invalid(str(e))
+    return value
+


diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/logic/validators.py
--- /dev/null
+++ b/ckanext/iati/logic/validators.py
@@ -0,0 +1,47 @@
+from ckan.logic import get_action
+from ckan.lib.navl.dictization_functions import unflatten
+
+from ckanext.iati.lists import FILE_TYPES
+
+def iati_dataset_name(key,data,errors,context):
+
+    unflattened = unflatten(data)
+    value = data[key]
+    for grp in unflattened['groups']:
+        if grp['id']:
+            group_id = grp['id']
+            break
+    group = get_action('group_show')(context,{'id':group_id})
+    group_name = group['name']
+
+    parts = value.split('-')
+    code_part = parts[-1]
+    group_part = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
+    if not code_part or not group_part or not group_part == group_name:
+        errors[key].append('Dataset name does not follow the convention <publisher>-<code>: "%s" (using publisher %s)' % (value,group_name))
+
+def iati_dataset_name_from_csv(key,data,errors,context):
+
+    unflattened = unflatten(data)
+    value = data[key]
+
+    if not 'registry-publisher-id' in unflattened:
+        errors[key].append('Publisher name missing')
+        return
+
+    group_name = unflattened['registry-publisher-id']
+
+    parts = value.split('-')
+    code_part = parts[-1]
+    group_part = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
+    if not code_part or not group_part or not group_part == group_name:
+        errors[key].append('Dataset name does not follow the convention <publisher>-<code>: "%s" (using publisher %s)' % (value,group_name))
+
+def file_type_validator(key,data,errors, context=None):
+    value = data.get(key)
+
+    allowed_values = [t[0] for t in FILE_TYPES] 
+    if not value or not value in allowed_values:
+        errors[key].append('File type must be one of [%s]' % ', '.join(allowed_values))
+
+


diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/templates/csv/result.html
--- a/ckanext/iati/templates/csv/result.html
+++ b/ckanext/iati/templates/csv/result.html
@@ -20,6 +20,9 @@
         <py:for each="pkg in c.added"><li><a href="${h.url_for(controller='package', action='read', id=pkg)}">${g.site_url}${h.url_for(controller='package', action='read', id=pkg)}</a></li></py:for>
+        <py:if test="not c.added">
+            <li><i>None</i></li>
+        </py:if></ul><h3><a name="updated">Datasets updated</a></h3>
@@ -27,13 +30,27 @@
         <py:for each="pkg in c.updated"><li><a href="${h.url_for(controller='package', action='read', id=pkg)}">${g.site_url}${h.url_for(controller='package', action='read', id=pkg)}</a></li></py:for>
+        <py:if test="not c.updated">
+            <li><i>None</i></li>
+        </py:if>
+
         </ul><h3><a name="errors">Errors found</a></h3><ul>
-        <py:for each="error in c.errors">
-            <li>${error}</li>
+        <py:for each="row,fields in c.errors">
+            <li>Line ${row}:
+                <ul>
+                <py:for each="field,msgs in fields.iteritems()">
+                    <li><strong>${field}</strong>: ${msgs}</li>
+                </py:for>
+                </ul>
+            </li></py:for>
+        <py:if test="not c.errors">
+            <li><i>None</i></li>
+        </py:if>
+
         </ul></div>

Repository URL: https://bitbucket.org/okfn/ckanextiati/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list