[ckan-changes] commit/ckanextiati: 2 new changesets
Bitbucket
commits-noreply at bitbucket.org
Thu Oct 27 16:29:59 UTC 2011
2 new commits in ckanextiati:
https://bitbucket.org/okfn/ckanextiati/changeset/5673b305c80b/
changeset: 5673b305c80b
branch: spreadsheet-support
user: amercader
date: 2011-10-27 11:02:04
summary: Set values to null if cells are empty
affected #: 1 file
diff -r 4408b0e1fe398feb381dd22ea610cc3046b6152b -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 ckanext/iati/controllers/spreadsheet.py
--- a/ckanext/iati/controllers/spreadsheet.py
+++ b/ckanext/iati/controllers/spreadsheet.py
@@ -211,20 +211,23 @@
package = {}
for fieldname, entity, key in self.csv_mapping:
if fieldname in row:
+ # If value is None (empty cell), property will be set to blank
value = row[fieldname]
- if value:
- if entity == 'groups':
- package['groups'] = [value]
- elif entity == 'resources':
- if not 'resources' in package:
- package['resources'] = [{}]
- package['resources'][0][key] = value
- elif entity == 'extras':
- if not 'extras' in package:
- package['extras'] = {}
- package['extras'][key] = value
- else:
- package[key] = value
+ if entity == 'groups':
+ if not value:
+ # This has already been checked
+ raise ValueError('Publisher not defined')
+ package['groups'] = [value]
+ elif entity == 'resources':
+ if not 'resources' in package:
+ package['resources'] = [{}]
+ package['resources'][0][key] = value
+ elif entity == 'extras':
+ if not 'extras' in package:
+ package['extras'] = {}
+ package['extras'][key] = value
+ else:
+ package[key] = value
return package
def create_or_update_package(self, package_dict, counts = None):
https://bitbucket.org/okfn/ckanextiati/changeset/8a2d55a9d505/
changeset: 8a2d55a9d505
branch: spreadsheet-support
user: amercader
date: 2011-10-27 18:29:02
summary: Add validation to CSV import. Still some validators missing. Nicer error summary.
affected #: 5 files
diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/controllers/package_iati.py
--- a/ckanext/iati/controllers/package_iati.py
+++ b/ckanext/iati/controllers/package_iati.py
@@ -13,12 +13,12 @@
ignore,
keep_extras,
)
-from ckan.lib.navl.dictization_functions import unflatten
+from ckan.logic.validators import int_validator
from ckan.logic.converters import convert_from_extras, convert_to_extras, date_to_db, date_to_form
-from ckan.lib.navl.dictization_functions import Missing, Invalid
-from ckan.lib.field_types import DateType, DateConvertError
from ckanext.iati.lists import COUNTRIES
+from ckanext.iati.logic.validators import iati_dataset_name
+from ckanext.iati.logic.converters import convert_from_comma_list, convert_to_comma_list, checkbox_value
class PackageIatiController(PackageController):
@@ -42,7 +42,7 @@
'data_updated': [date_to_db, convert_to_extras,ignore_missing],
'activity_period-from': [date_to_db, convert_to_extras,ignore_missing],
'activity_period-to': [date_to_db, convert_to_extras,ignore_missing],
- 'activity_count': [integer,convert_to_extras,ignore_missing],
+ 'activity_count': [int_validator,convert_to_extras,ignore_missing],
'archive_file': [checkbox_value, convert_to_extras,ignore_missing],
'verified': [checkbox_value, convert_to_extras,ignore_missing],
'language': [convert_to_extras, ignore_missing],
@@ -107,42 +107,3 @@
return [{'id':group.id,'name':group.name, 'title':group.title} for group in groups if group.state==model.State.ACTIVE]
-def convert_to_comma_list(value, context):
-
- return ', '.join(json.loads(value))
-
-def convert_from_comma_list(value, context):
-
- return [x.strip() for x in value.split(',') if len(x)]
-
-def checkbox_value(value,context):
-
- return 'yes' if not isinstance(value, Missing) else 'no'
-
-def integer(value,context):
-
- if not value == '':
- try:
- value = int(value)
- except ValueError,e:
- raise Invalid(str(e))
- return value
-
-def iati_dataset_name(key,data,errors,context):
-
- unflattened = unflatten(data)
- value = data[key]
- for grp in unflattened['groups']:
- if grp['id']:
- group_id = grp['id']
- break
- group = get_action('group_show')(context,{'id':group_id})
- group_name = group['name']
-
- parts = value.split('-')
- code_part = parts[-1]
- group_part = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
- if not code_part or not group_part or not group_part == group_name:
- errors[key].append('Dataset name does not follow the convention <publisher>-<code>: "%s" (using publisher %s)' % (value,group_name))
-
-
diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/controllers/spreadsheet.py
--- a/ckanext/iati/controllers/spreadsheet.py
+++ b/ckanext/iati/controllers/spreadsheet.py
@@ -7,30 +7,37 @@
from ckan.lib.helpers import json
from ckan.authz import Authorizer
from ckan.logic import get_action, NotFound, ValidationError, NotAuthorized
+from ckan.logic.converters import date_to_db
+from ckan.logic.validators import int_validator
+from ckan.lib.navl.validators import not_empty
+from ckan.lib.navl.dictization_functions import validate
from ckanext.iati.authz import get_user_administered_groups
+from ckanext.iati.logic.validators import iati_dataset_name_from_csv, file_type_validator
+from ckanext.iati.logic.converters import iso_date
+
log = logging.getLogger(__name__)
+CSV_MAPPING = [
+ ('registry-publisher-id', 'groups', 'name', [not_empty]),
+ ('registry-file-id', 'package', 'name', [not_empty, iati_dataset_name_from_csv]),
+ ('title', 'package', 'title', []),
+ ('contact-email', 'package', 'author_email', []),
+ ('source-url', 'resources', 'url', []),
+ ('format', 'resources', 'format', []),
+ ('file-type','extras', 'filetype', [file_type_validator]),
+ ('recipient-country','extras', 'country', []),
+ ('activity-period-start','extras', 'activity_period-from', [iso_date]),
+ ('activity-period-end','extras', 'activity_period-to', [iso_date]),
+ ('last-updated-datetime','extras', 'data_updated', [iso_date]),
+ ('generated-datetime','extras', 'record_updated', [iso_date]),
+ ('activity-count','extras', 'activity_count', [int_validator]),
+ ('verification-status','extras', 'verified', []),
+ ('default-language','extras', 'language', [])
+ ]
class CSVController(BaseController):
- csv_mapping = [
- ('registry-publisher-id', 'groups', 'name'),
- ('registry-file-id', 'package', 'name'),
- ('title', 'package', 'title'),
- ('contact-email', 'package', 'author_email'),
- ('source-url', 'resources', 'url'),
- ('format', 'resources', 'format'),
- ('file-type','extras', 'filetype'),
- ('recipient-country','extras', 'country'),
- ('activity-period-start','extras', 'activity_period-from'),
- ('activity-period-end','extras', 'activity_period-to'),
- ('last-updated-datetime','extras', 'data_updated'),
- ('generated-datetime','extras', 'record_updated'),
- ('activity-count','extras', 'activity_count'),
- ('verification-status','extras', 'verified'),
- ('default-language','extras', 'language')
- ]
def __before__(self, action, **params):
super(CSVController,self).__before__(action, **params)
@@ -104,6 +111,7 @@
added, updated, errors = self.read_csv_file(csv_file)
c.added = added
c.updated = updated
+
c.errors = errors
log.info('CSV import finished: file %s, %i added, %i updated, %i errors' % \
@@ -126,9 +134,9 @@
output = ''
try:
- fieldnames = [n[0] for n in self.csv_mapping]
+ fieldnames = [f[0] for n in CSV_MAPPING]
writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
- headers = dict( (n[0],n[0]) for n in self.csv_mapping )
+ headers = dict( (f[0],f[0]) for n in CSV_MAPPING )
writer.writerow(headers)
packages.sort()
@@ -140,7 +148,7 @@
continue
if package:
row = {}
- for fieldname, entity, key in self.csv_mapping:
+ for fieldname, entity, key in CSV_MAPPING:
value = None
if entity == 'groups':
if len(package['groups']):
@@ -163,7 +171,7 @@
return output
def read_csv_file(self,csv_file):
- fieldnames = [n[0] for n in self.csv_mapping]
+ fieldnames = [f[0] for f in CSV_MAPPING]
# Try to sniff the file dialect
dialect = csv.Sniffer().sniff(csv_file.file.read(1024))
@@ -177,46 +185,46 @@
groups= get_action('group_list')(context, {})
counts = {'added': [], 'updated': []}
- errors = []
+ errors = {}
for i,row in enumerate(reader):
+ row_index = str(i + 1)
+ errors[row_index] = {}
try:
- # Check mandatory fields
- if not row['registry-publisher-id']:
- raise ValueError('Publisher not defined')
-
- if not row['registry-file-id']:
- raise ValueError('File id not defined')
-
- # Check name convention
- name = row['registry-file-id']
- parts = name.split('-')
- group_name = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
- if not group_name or not group_name in groups:
- raise ValueError('Dataset name does not follow the convention <publisher>-<code>: "%s"' % name)
+ # We will now run the IATI specific validation, CKAN core will
+ # run the default one later on
+ schema = dict([(f[0],f[3]) for f in CSV_MAPPING])
+ row, row_errors = validate(row,schema)
+ if row_errors:
+ for key, msgs in row_errors.iteritems():
+ log.error('Error in row %i: %s: %s' % (i+1,key,str(msgs)))
+ errors[row_index][key] = msgs
+ continue
package_dict = self.get_package_dict_from_row(row)
self.create_or_update_package(package_dict,counts)
- except ValueError,e:
- msg = 'Error in row %i: %s' % (i+1,str(e))
- log.error(msg)
- errors.append(msg)
+
+ del errors[row_index]
+ except ValidationError,e:
+ iati_keys = dict([(f[2],f[0]) for f in CSV_MAPPING])
+ for key, msgs in e.error_dict.iteritems():
+ iati_key = iati_keys[key]
+ log.error('Error in row %i: %s: %s' % (i+1,iati_key,str(msgs)))
+ errors[row_index][iati_key] = msgs
except NotAuthorized,e:
- msg = 'Error in row %i: Not authorized to publish to this group: %s' % (i+1,row['registry-publisher-id'])
- log.error(msg)
- errors.append(msg)
+ msg = 'Not authorized to publish to this group: %s' % row['registry-publisher-id']
+ log.error('Error in row %i: %s' % msg)
+ errors[row_index]['registry-publisher-id'] = [msg]
+ errors = sorted(errors.iteritems())
return counts['added'], counts['updated'], errors
def get_package_dict_from_row(self,row):
package = {}
- for fieldname, entity, key in self.csv_mapping:
+ for fieldname, entity, key, v in CSV_MAPPING:
if fieldname in row:
# If value is None (empty cell), property will be set to blank
value = row[fieldname]
if entity == 'groups':
- if not value:
- # This has already been checked
- raise ValueError('Publisher not defined')
package['groups'] = [value]
elif entity == 'resources':
if not 'resources' in package:
@@ -231,40 +239,34 @@
return package
def create_or_update_package(self, package_dict, counts = None):
+
+ context = {
+ 'model': model,
+ 'session': model.Session,
+ 'user': c.user,
+ 'api_version':'1'
+ }
+
+ # Check if package exists
+ data_dict = {}
+ data_dict['id'] = package_dict['name']
try:
+ existing_package_dict = get_action('package_show')(context, data_dict)
- context = {
- 'model': model,
- 'session': model.Session,
- 'user': c.user,
- 'api_version':'1'
- }
+ # Update package
+ log.info('Package with name "%s" exists and will be updated' % package_dict['name'])
- # Check if package exists
- data_dict = {}
- data_dict['id'] = package_dict['name']
- try:
- existing_package_dict = get_action('package_show')(context, data_dict)
+ context.update({'id':existing_package_dict['id']})
+ package_dict.update({'id':existing_package_dict['id']})
+ updated_package = get_action('package_update_rest')(context, package_dict)
+ if counts:
+ counts['updated'].append(updated_package['name'])
+ log.debug('Package with name "%s" updated' % package_dict['name'])
+ except NotFound:
+ # Package needs to be created
+ log.info('Package with name "%s" does not exist and will be created' % package_dict['name'])
+ new_package = get_action('package_create_rest')(context, package_dict)
+ if counts:
+ counts['added'].append(new_package['name'])
+ log.debug('Package with name "%s" created' % package_dict['name'])
- # Update package
- log.info('Package with name "%s" exists and will be updated' % package_dict['name'])
-
- context.update({'id':existing_package_dict['id']})
- package_dict.update({'id':existing_package_dict['id']})
- updated_package = get_action('package_update_rest')(context, package_dict)
- if counts:
- counts['updated'].append(updated_package['name'])
- log.debug('Package with name "%s" updated' % package_dict['name'])
- except NotFound:
- # Package needs to be created
- log.info('Package with name "%s" does not exist and will be created' % package_dict['name'])
- new_package = get_action('package_create_rest')(context, package_dict)
- if counts:
- counts['added'].append(new_package['name'])
- log.debug('Package with name "%s" created' % package_dict['name'])
- except ValidationError,e:
- raise ValueError(str(e))
-
-
-
-
diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/logic/converters.py
--- /dev/null
+++ b/ckanext/iati/logic/converters.py
@@ -0,0 +1,22 @@
+from ckan.lib.navl.dictization_functions import Missing
+
+def convert_to_comma_list(value, context):
+
+ return ', '.join(json.loads(value))
+
+def convert_from_comma_list(value, context):
+
+ return [x.strip() for x in value.split(',') if len(x)]
+
+def checkbox_value(value,context):
+
+ return 'yes' if not isinstance(value, Missing) else 'no'
+
+def iso_date(value,context):
+ from ckan.lib.field_types import DateType, DateConvertError
+ try:
+ value = DateType.iso_to_db(value)
+ except DateConvertError, e:
+ raise Invalid(str(e))
+ return value
+
diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/logic/validators.py
--- /dev/null
+++ b/ckanext/iati/logic/validators.py
@@ -0,0 +1,47 @@
+from ckan.logic import get_action
+from ckan.lib.navl.dictization_functions import unflatten
+
+from ckanext.iati.lists import FILE_TYPES
+
+def iati_dataset_name(key,data,errors,context):
+
+ unflattened = unflatten(data)
+ value = data[key]
+ for grp in unflattened['groups']:
+ if grp['id']:
+ group_id = grp['id']
+ break
+ group = get_action('group_show')(context,{'id':group_id})
+ group_name = group['name']
+
+ parts = value.split('-')
+ code_part = parts[-1]
+ group_part = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
+ if not code_part or not group_part or not group_part == group_name:
+ errors[key].append('Dataset name does not follow the convention <publisher>-<code>: "%s" (using publisher %s)' % (value,group_name))
+
+def iati_dataset_name_from_csv(key,data,errors,context):
+
+ unflattened = unflatten(data)
+ value = data[key]
+
+ if not 'registry-publisher-id' in unflattened:
+ errors[key].append('Publisher name missing')
+ return
+
+ group_name = unflattened['registry-publisher-id']
+
+ parts = value.split('-')
+ code_part = parts[-1]
+ group_part = parts[0] if len(parts) == 2 else '-'.join(parts[:-1])
+ if not code_part or not group_part or not group_part == group_name:
+ errors[key].append('Dataset name does not follow the convention <publisher>-<code>: "%s" (using publisher %s)' % (value,group_name))
+
+def file_type_validator(key,data,errors, context=None):
+ value = data.get(key)
+
+ allowed_values = [t[0] for t in FILE_TYPES]
+ if not value or not value in allowed_values:
+ errors[key].append('File type must be one of [%s]' % ', '.join(allowed_values))
+
+
diff -r 5673b305c80b9a22e460148c0e992ae0ed24ab81 -r 8a2d55a9d505ac42757d15d2c30f5cae7dffb75a ckanext/iati/templates/csv/result.html
--- a/ckanext/iati/templates/csv/result.html
+++ b/ckanext/iati/templates/csv/result.html
@@ -20,6 +20,9 @@
<py:for each="pkg in c.added"><li><a href="${h.url_for(controller='package', action='read', id=pkg)}">${g.site_url}${h.url_for(controller='package', action='read', id=pkg)}</a></li></py:for>
+ <py:if test="not c.added">
+ <li><i>None</i></li>
+ </py:if></ul><h3><a name="updated">Datasets updated</a></h3>
@@ -27,13 +30,27 @@
<py:for each="pkg in c.updated"><li><a href="${h.url_for(controller='package', action='read', id=pkg)}">${g.site_url}${h.url_for(controller='package', action='read', id=pkg)}</a></li></py:for>
+ <py:if test="not c.updated">
+ <li><i>None</i></li>
+ </py:if>
+
</ul><h3><a name="errors">Errors found</a></h3><ul>
- <py:for each="error in c.errors">
- <li>${error}</li>
+ <py:for each="row,fields in c.errors">
+ <li>Line ${row}:
+ <ul>
+ <py:for each="field,msgs in fields.iteritems()">
+ <li><strong>${field}</strong>: ${msgs}</li>
+ </py:for>
+ </ul>
+ </li></py:for>
+ <py:if test="not c.errors">
+ <li><i>None</i></li>
+ </py:if>
+
</ul></div>
Repository URL: https://bitbucket.org/okfn/ckanextiati/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list