[ckan-changes] commit/ckanext-dgu: dread: [bin]: #1116 Package counts script, integrated into gov_daily.py. Scripts moved from ckanext/dgu/script to ckanext/dgu/bin.
Bitbucket
commits-noreply at bitbucket.org
Mon May 30 15:57:44 UTC 2011
1 new changeset in ckanext-dgu:
http://bitbucket.org/okfn/ckanext-dgu/changeset/ed4e1cd15a3f/
changeset: ed4e1cd15a3f
branches:
user: dread
date: 2011-05-30 17:56:26
summary: [bin]: #1116 Package counts script, integrated into gov_daily.py. Scripts moved from ckanext/dgu/script to ckanext/dgu/bin.
affected #: 43 files (18 bytes)
--- a/ckanext/dgu/bin/gov-daily.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-'''
-Daily script for gov server
-'''
-import os
-import logging
-import sys
-import zipfile
-import traceback
-import datetime
-import re
-
-USAGE = '''Daily script for government
-Usage: python %s [config.ini]
-''' % sys.argv[0]
-
-if len(sys.argv) < 2 or sys.argv[1] in ('--help', '-h'):
- err = 'Error: Please specify config file.'
- print USAGE, err
- logging.error('%s\n%s' % (USAGE, err))
- sys.exit(1)
-config_file = sys.argv[1]
-path = os.path.abspath(config_file)
-
-def load_config(path):
- import paste.deploy
- conf = paste.deploy.appconfig('config:' + path)
- import ckan
- ckan.config.environment.load_environment(conf.global_conf,
- conf.local_conf)
-
-load_config(path)
-
-import ckan.model as model
-import ckan.lib.dumper as dumper
-from pylons import config
-
-# settings
-
-log_filepath = os.path.join(os.path.expanduser(config.get('ckan.log_dir', '~')),
- 'gov-daily.log')
-dump_dir = os.path.expanduser(config.get('ckan.dump_dir', '~/dump'))
-ckan_instance_name = re.sub(r'[^\w.-]|https?', '',
- config.get('ckan.site_url', 'dgu'))
-dump_filebase = ckan_instance_name + '-%Y-%m-%d'
-tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp')
-backup_dir = os.path.expanduser(config.get('ckan.backup_dir', '~/backup'))
-backup_filebase = ckan_instance_name + '.%Y-%m-%d.pg_dump'
-
-logging.basicConfig(filename=log_filepath, level=logging.INFO)
-logging.info('----------------------------')
-logging.info('Starting daily script')
-start_time = datetime.datetime.today()
-logging.info(start_time.strftime('%H:%M %d-%m-%Y'))
-
-def report_time_taken():
- time_taken = (datetime.datetime.today() - start_time).seconds
- logging.info('Time taken: %i seconds' % time_taken)
-
-
-# Check database looks right
-num_packages_before = model.Session.query(model.Package).count()
-logging.info('Number of existing packages: %i' % num_packages_before)
-if num_packages_before < 2500:
- logging.error('Expected more packages.')
- sys.exit(1)
-
-# Import recent ONS data - REMOVED
-
-# Create dumps for users
-logging.info('Creating database dump')
-if not os.path.exists(dump_dir):
- logging.info('Creating dump dir: %s' % dump_dir)
- os.makedirs(dump_dir)
-query = model.Session.query(model.Package)
-for file_type, dumper in (('csv', dumper.SimpleDumper().dump_csv),
- ('json', dumper.SimpleDumper().dump_json),
- ):
- dump_file_base = start_time.strftime(dump_filebase)
- dump_filename = '%s.%s' % (dump_file_base, file_type)
- dump_filepath = os.path.join(dump_dir, dump_filename + '.zip')
- tmp_file = open(tmp_filepath, 'w')
- logging.info('Creating %s file: %s' % (file_type, dump_filepath))
- dumper(tmp_file, query)
- tmp_file.close()
- dump_file = zipfile.ZipFile(dump_filepath, 'w', zipfile.ZIP_DEFLATED)
- dump_file.write(tmp_filepath, dump_filename)
- dump_file.close()
-report_time_taken()
-
-# Create complete backup
-logging.info('Creating database backup')
-if not os.path.exists(backup_dir):
- logging.info('Creating backup dir: %s' % backup_dir)
- os.makedirs(backup_dir)
-
-def get_db_config(): # copied from fabfile
- url = config['sqlalchemy.url']
- # e.g. 'postgres://tester:pass@localhost/ckantest3'
- db_details_match = re.match('^\s*(?P<db_type>\w*)://(?P<db_user>\w*):?(?P<db_pass>[^@]*)@(?P<db_host>[^/:]*):?(?P<db_port>[^/]*)/(?P<db_name>[\w.-]*)', url)
-
- db_details = db_details_match.groupdict()
- return db_details
-db_details = get_db_config()
-pg_dump_filename = start_time.strftime(backup_filebase)
-pg_dump_filepath = os.path.join(backup_dir, pg_dump_filename)
-cmd = 'export PGPASSWORD=%s&&pg_dump -U %s -h %s %s > %s' % (db_details['db_pass'], db_details['db_user'], db_details['db_host'], db_details['db_name'], pg_dump_filepath)
-logging.info('Backup command: %s' % cmd)
-ret = os.system(cmd)
-if ret == 0:
- logging.info('Backup successful: %s' % pg_dump_filepath)
-else:
- logging.error('Backup error: %s' % ret)
-
-# Log footer
-report_time_taken()
-logging.info('Finished daily script')
-logging.info('----------------------------')
--- a/ckanext/dgu/scripts/change_licenses.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-from mass_changer import *
-from common import ScriptError
-
-class ChangeLicenses(object):
- def __init__(self, ckanclient, dry_run=False, force=False):
- '''
- Changes licenses of packages.
- @param ckanclient: instance of ckanclient to make the changes
- @param license_id: id of the license to change packages to
- @param force: do not stop if there is an error with one package
- '''
- self.ckanclient = ckanclient
- self.dry_run = dry_run
- self.force = force
-
- def change_all_packages(self, license_id):
- instructions = [
- ChangeInstruction(AnyPackageMatcher(),
- BasicPackageChanger('license_id', license_id))
- ]
- self.mass_changer = MassChanger(self.ckanclient,
- instructions,
- dry_run=self.dry_run,
- force=self.force)
- self.mass_changer.run()
-
- def change_oct_2010(self, license_id):
- instructions = [
- ChangeInstruction(
- [
- BasicPackageMatcher('license_id', 'localauth-withrights'),
- BasicPackageMatcher('name', 'spotlightonspend-transactions-download'),
- BasicPackageMatcher('name', 'better-connected-2010-council-website-performance-survey-results'),
- ],
- NoopPackageChanger()),
- ChangeInstruction(
- AnyPackageMatcher(),
- BasicPackageChanger('license_id', self.license_id)),
- ]
- self.mass_changer = MassChanger(self.ckanclient,
- instructions,
- dry_run=self.dry_run,
- force=self.force)
- self.mass_changer.run()
--- a/ckanext/dgu/scripts/change_licenses_cmd.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-import sys
-
-from ckanext.importlib.loader import ResourceSeries
-from ckanext.dgu.scripts.change_licenses import ChangeLicenses
-from ckanext.dgu.scripts.mass_changer_cmd import MassChangerCommand
-from ckanclient import CkanClient
-
-class ChangeLicensesCommand(MassChangerCommand):
- def __init__(self):
- commands = ('all', 'oct10')
- super(ChangeLicensesCommand, self).__init__(commands)
-
- def add_options(self):
- self.parser.add_option("--license-id",
- dest="license_id",
- help="ID of the license to change all packages to")
-
- def command(self):
- super(ChangeLicensesCommand, self).command()
- if self.options.license_id is None:
- self.parser.error("Please specify a license ID")
- if len(self.args) != 1:
- self.parser.error("Command is required")
-
- getattr(self, self.args[0])()
-
- def all(self):
- client = CkanClient(base_location=self.options.api_url,
- api_key=self.options.api_key,
- http_user=self.options.username,
- http_pass=self.options.password)
- change_licenses = ChangeLicenses(client, dry_run=self.options.dry_run, force=self.options.force)
- change_licenses.change_all_packages(self.options.license_id)
-
- def oct10(self):
- client = CkanClient(base_location=self.options.api_url,
- api_key=self.options.api_key)
- change_licenses = ChangeLicenses(client, dry_run=self.options.dry_run, force=self.options.force)
- change_licenses.change_oct_2010(self.options.license_id)
-
-def command():
- ChangeLicensesCommand().command()
-
--- a/ckanext/dgu/scripts/common.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-class ScriptError(Exception):
- pass
-
-def remove_readonly_fields(pkg):
- '''Takes a package dictionary and gets rid of any read-only fields
- so that you can write the package to the API.'''
- for read_only_field in ('id', 'relationships', 'ratings_average',
- 'ratings_count', 'ckan_url',
- 'metadata_modified',
- 'metadata_created'):
- if pkg.has_key(read_only_field):
- del pkg[read_only_field]
--- a/ckanext/dgu/scripts/dump_analysis.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,281 +0,0 @@
-from optparse import OptionParser
-import zipfile
-import gzip
-import json
-from collections import defaultdict
-import datetime
-import os
-import logging
-import re
-import glob
-
-from datautil.tabular import TabularData, CsvReader, CsvWriter
-from sqlalchemy.util import OrderedDict
-
-log = logging.getLogger(__file__)
-
-import_source_prefixes = {
- 'ONS': 'ONS feed',
- 'COSPREAD': 'Spreadsheet upload',
- 'Manual': 'Spreadsheet upload',
- 'DATA4NR': 'Data for Neighbourhoods and Regeneration import',
- }
-date_converters = (
- (re.compile('(\d{4})(\d{2})(\d{2})'), '%Y%m%d'),
- (re.compile('(\d{4})-(\d{2})-(\d{2})'), '%Y-%m-%d'),
- )
-
-def parse_date(date_str, search=False):
- assert isinstance(date_str, basestring)
- for date_regex, date_format in date_converters:
- matcher = date_regex.search if search else date_regex.match
- match = matcher(date_str)
- if match:
- date = datetime.datetime.strptime(match.group(), date_format)
- return date.date()
- raise ValueError('Cannot parse date: %r' % date_str)
-
-def get_run_info():
- run_info = 'This analysis is produced by an OKF script\n'
- run_info += 'Date last updated: %r\n' % datetime.date.today().strftime('%Y-%m-%d')
- run_info += 'Script filename: %r\n' % os.path.basename(__file__)
- run_info += 'Script repository: http://bitbucket.org/okfn/ckanext-dgu\n'
- run_info += 'Dump files for analysis: http://data.gov.uk/data/dumps\n'
- return run_info
-
-class AnalysisFile(object):
- def __init__(self, analysis_filepath, run_info=None):
- self.analysis_filepath = analysis_filepath
- self.data_by_date = None # {date: analysis_dict}
- self.run_info = run_info
- self.load()
-
- def load(self):
- '''Load analysis file and store in self.data_by_date'''
- raise NotImplementedError
-
- def init(self):
- '''Initialise self.data for the first time (instead of loading
- from an existing file).'''
- self.data_by_date = {}
-
- def save(self):
- '''Save self.data_by_date to analysis file'''
- raise NotImplementedError
-
- def format_date(self, date):
- assert isinstance(date, datetime.date)
- return date.strftime('%Y-%m-%d')
-
- def add_analysis(self, date, analysis_dict):
- '''Takes an analysis and updates self.data_by_date.'''
- assert isinstance(date, datetime.date)
- assert isinstance(analysis_dict, dict)
- self.data_by_date[date] = analysis_dict
-
-class TabularAnalysisFile(AnalysisFile):
- def load(self):
- '''Load analysis file and store in self.data_by_date'''
- assert self.data_by_date == None, 'Data already present'
- self.data_by_date = {}
- table = self.load_table()
- if len(table.data) > 0:
- try:
- date_column = table.header.index('date')
- except ValueError:
- raise ValueError('Data does not have a date: %r' % table.header)
- for row in table.data:
- row_data = dict(zip(table.header, row))
- date = parse_date(row_data['date'])
- del row_data['date']
- self.data_by_date[date] = row_data
-
- def load_table(self):
- '''Load analysis file and return as TabularData'''
- raise NotImplementedError
-
- def save(self):
- '''Save self.data_by_date to analysis file'''
- if self.data_by_date:
- header = ['date']
- for date in self.data_by_date:
- for column in self.data_by_date[date].keys():
- if column not in header:
- header.append(column)
- else:
- header = []
- data_rows = []
- for date, analysis in sorted(self.data_by_date.items(), key=lambda (date, analysis): date):
- data_row = [self.format_date(date)]
- for title in header[1:]:
- data_row.append(analysis.get(title))
- data_rows.append(data_row)
- data_table = TabularData(data_rows, header)
- self.save_table(data_table)
-
- def save_table(self, data_table):
- '''Save data_table to analysis file'''
- raise NotImplementedError
-
-class CsvAnalysisFile(TabularAnalysisFile):
- def load_table(self):
- if not os.path.exists(self.analysis_filepath):
- log.info('Creating new analysis file: %s', self.analysis_filepath)
- return TabularData()
- data_table = CsvReader().read(filepath_or_fileobj=self.analysis_filepath)
- return data_table
-
- def save_table(self, data_table):
- fileobj = open(self.analysis_filepath, 'w')
- try:
- CsvWriter().write(data_table, fileobj)
- finally:
- fileobj.close()
-
-class TxtAnalysisFile(AnalysisFile):
- def load(self):
- self.data_by_date = {}
- if not os.path.exists(self.analysis_filepath):
- log.info('Creating new analysis file: %s', self.analysis_filepath)
- return
- fileobj = open(self.analysis_filepath, 'r')
- regex = re.compile(r'^(\d{4}-\d{2}-\d{2}) : (.*)\n')
- try:
- while True:
- line = fileobj.readline()
- if line == '':
- break
- match = regex.match(line)
- if not match:
- if line.strip() and ' : ' in line:
- raise AssertionError('Could not parse line: %r' % line)
- else:
- # just a comment
- continue
- date_str, analysis_str = match.groups()
- date = parse_date(date_str)
- self.data_by_date[date] = analysis_str
- finally:
- fileobj.close()
-
- def save(self):
- fileobj = open(self.analysis_filepath, 'w')
- try:
- fileobj.write(self.run_info + '\n')
- for date, analysis in self.data_by_date.items():
- line = '%s : %s\n' % (self.format_date(date), repr(analysis))
- fileobj.write(line)
- finally:
- fileobj.close()
-
-
-class DumpAnalysis(object):
- def __init__(self, dump_filepath):
- log.info('Analysing %s' % dump_filepath)
- self.dump_filepath = dump_filepath
- self.run()
-
- def run(self):
- self.save_date()
- self.analysis_dict = OrderedDict()
- packages = self.get_packages()
- self.analysis_dict['Total active and deleted packages'] = len(packages)
- packages = self.filter_out_deleted_packages(packages)
- self.analysis_dict['Total active packages'] = len(packages)
- pkg_bins = self.analyse_by_source(packages)
- for bin, pkgs in pkg_bins.items():
- self.analysis_dict['Packages by source: %s' % bin] = len(pkgs)
- self.print_analysis(pkg_bins)
-
- def save_date(self):
- self.date = parse_date(self.dump_filepath, search=True)
- log.info('Date of dumpfile: %r', self.date.strftime('%Y %m %d'))
-
- def get_packages(self):
- if zipfile.is_zipfile(self.dump_filepath):
- log.info('Unzipping...')
- zf = zipfile.ZipFile(self.dump_filepath)
- assert len(zf.infolist()) == 1, 'Archive must contain one file: %r' % zf.infolist()
- f = zf.open(zf.namelist()[0])
- elif self.dump_filepath.endswith('gz'):
- f = gzip.open(self.dump_filepath, 'rb')
- else:
- f = open(self.dump_filepath, 'rb')
- log.info('Reading file...')
- json_buf = f.read()
- log.info('Parsing JSON...')
- packages = json.loads(json_buf)
- log.info('Read in packages: %i' % len(packages))
- return packages
-
- def filter_out_deleted_packages(self, packages):
- filtered_pkgs = []
- for pkg in packages:
- if pkg.has_key('state'):
- is_active = pkg['state'] == 'active'
- else:
- is_active = pkg['state_id'] == 1
- if is_active:
- filtered_pkgs.append(pkg)
- log.info('Deleted packages discarded: %i', (len(packages) - len(filtered_pkgs)))
- log.info('Number of active packages: %i', (len(filtered_pkgs)))
- return filtered_pkgs
-
- def analyse_by_source(self, packages):
- pkg_bins = defaultdict(list)
- for pkg in packages:
- import_source = pkg['extras'].get('import_source')
- if import_source:
- for prefix in import_source_prefixes:
- if import_source.startswith(prefix):
- import_source = import_source_prefixes[prefix]
- break
- pkg_bins[import_source].append(pkg['name'])
- continue
- if pkg['extras'].get('INSPIRE') == 'True':
- pkg_bins['INSPIRE'].append(pkg['name'])
- continue
- pkg_bins['Manual creation using web form'].append(pkg['name'])
- return pkg_bins
-
- def print_analysis(self, pkg_bins):
- log.info('* Analysis by source *')
- for pkg_bin, pkgs in sorted(pkg_bins.items(), key=lambda (pkg_bin, pkgs): -len(pkgs)):
- log.info(' %s: %i (e.g. %r)', pkg_bin, len(pkgs), pkgs[0])
-
-def command():
- usage = 'usage: %prog [options] dumpfile.json.zip'
- usage += '\nNB: dumpfile can be gzipped, zipped or json'
- usage += '\n can be list of files and can be a wildcard.'
- parser = OptionParser(usage=usage)
- parser.add_option('--csv', dest='csv_filepath',
- help='add analysis to CSV report FILENAME', metavar='FILENAME')
- parser.add_option('--txt', dest='txt_filepath',
- help='add analysis to textual report FILENAME', metavar='FILENAME')
- (options, args) = parser.parse_args()
- input_file_descriptors = args
- input_filepaths = []
- for input_file_descriptor in input_file_descriptors:
- input_filepaths.extend(glob.glob(os.path.expanduser(input_file_descriptor)))
-
- # Open output files
- output_types = (
- # (output_filepath, analysis_file_class)
- (options.txt_filepath, TxtAnalysisFile),
- (options.csv_filepath, CsvAnalysisFile),
- )
- analysis_files = {} # analysis_file_class, analysis_file
- run_info = get_run_info()
- for output_filepath, analysis_file_class in output_types:
- if output_filepath:
- analysis_files[analysis_file_class] = analysis_file_class(output_filepath, run_info)
-
- for input_filepath in input_filepaths:
- # Run analysis
- analysis = DumpAnalysis(input_filepath)
-
- for analysis_file_class, analysis_file in analysis_files.items():
- analysis_file.add_analysis(analysis.date, analysis.analysis_dict)
- # Save
- analysis_file.save()
- log.info('Finished')
--- a/ckanext/dgu/scripts/extract_urls.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-#! /usr/bin/python
-#
-# Script for Simon Demissie to take a CKAN JSON dump and extract all the URLS.
-#
-# For help:
-# python extract_urls.py
-#
-
-import sys
-import json
-
-usage = '''
-URL extractor
-=============
-
-Takes a CKAN dump (JSON) format and writes all the urls to a CSV file.
-
-Usage:
- %s data.gov.uk-ckan-meta-data.json urls.csv
- ''' % sys.argv[0]
-if len(sys.argv) < 3:
- print usage
- sys.exit(1)
-in_fname = sys.argv[1]
-out_fname = sys.argv[2]
-f = open(in_fname)
-try:
- print 'Reading %r' % in_fname
- pkgs_json = f.read()
-finally:
- f.close()
-print 'Parsing'
-pkgs = json.loads(pkgs_json)
-print 'Found %i packages' % len(pkgs)
-print 'Writing URLs to %r' % out_fname
-out = open(out_fname, 'w')
-try:
- for pkg in pkgs:
- urls = set()
- for url in (pkg['url'],
- pkg['extras'].get('taxonomy_url')):
- if url:
- urls.add(url)
- for res in pkg['resources']:
- url = res.get('url')
- if url:
- urls.add(url)
- for url in urls:
- out.write('%s\r\n' % url.encode('utf8'))
-finally:
- f.close()
-print 'Finished successfully'
--- a/ckanext/dgu/scripts/mass_changer.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,239 +0,0 @@
-import copy
-
-from ckan import model
-from common import ScriptError, remove_readonly_fields
-
-log = __import__("logging").getLogger(__name__)
-
-class PackageMatcher(object):
- def match_ref(self, pkg_name):
- '''Override this and return True or False depending on whether
- the supplied package matches or not. If the matcher requires
- more package information, then return None and the match method
- is run.'''
- pass
-
- def match(self, pkg):
- '''Override this and return True or False depending on whether
- the supplied package matches or not.'''
- assert NotImplementedError
-
-class BasicPackageMatcher(PackageMatcher):
- def __init__(self, match_field, match_field_value):
- '''Package matching criteria: match_field==match_field_value'''
- self.match_field = match_field
- self.match_field_value = match_field_value
-
- def match(self, pkg):
- value = pkg.get(self.match_field) or \
- pkg['extras'].get(self.match_field)
- if value == self.match_field_value:
- return True
- else:
- return False
-
-class AnyPackageMatcher(PackageMatcher):
- def match(self, pkg):
- return True
-
-class ListedPackageMatcher(PackageMatcher):
- '''Package matcher based on a supplied list of package names.'''
- def __init__(self, pkg_name_list):
- assert iter(pkg_name_list)
- assert not isinstance(pkg_name_list, basestring)
- self.pkg_name_list = set(pkg_name_list)
-
- def match_name(self, pkg_name):
- return pkg_name in self.pkg_name_list
-
- def match(self, pkg):
- return pkg['name'] in self.pkg_name_list
-
-class PackageChanger(object):
- def change(self, pkg):
- '''Override this to return a changed package dictionary.
- @param pkg: package dictionary
- '''
- assert NotImplementedError
-
- def resolve_field_value(self, input_field_value, pkg):
- '''Resolves the specified field_value to one
- specific to this package.
- Examples:
- "pollution" -> "pollution"
- "%(name)s" -> "uk-pollution-2008"
- '''
- return input_field_value % pkg
-
- def flatten_pkg(self, pkg_dict):
- flat_pkg = copy.deepcopy(pkg_dict)
- for name, value in pkg_dict.items()[:]:
- if isinstance(value, (list, tuple)):
- if value and isinstance(value[0], dict) and name == 'resources':
- for i, res in enumerate(value):
- prefix = 'resource-%i' % i
- flat_pkg[prefix + '-url'] = res['url']
- flat_pkg[prefix + '-format'] = res['format']
- flat_pkg[prefix + '-description'] = res['description']
- else:
- flat_pkg[name] = ' '.join(value)
- elif isinstance(value, dict):
- for name_, value_ in value.items():
- flat_pkg[name_] = value_
- else:
- flat_pkg[name] = value
- return flat_pkg
-
-
-class BasicPackageChanger(PackageChanger):
- def __init__(self, change_field, change_field_value):
- '''Changes: pkg.change_field = change_field_value'''
- self.change_field = change_field
- self.change_field_value = change_field_value
-
- def change(self, pkg):
- flat_pkg = self.flatten_pkg(pkg)
- if pkg.has_key(self.change_field):
- pkg_field_root = pkg
- else:
- pkg_field_root = pkg['extras']
- value = self.resolve_field_value(self.change_field_value, flat_pkg)
-
- log.info('%s.%s Value %r -> %r' % \
- (pkg['name'], self.change_field,
- flat_pkg.get(self.change_field),
- value))
-
- pkg_field_root[self.change_field] = value
- return pkg
-
-class CreateResource(PackageChanger):
- def __init__(self, **resource_values):
- '''Adds new resource with the given values.
- @param resources_values: resource dictionary. e.g.:
- {'url'=xyz, 'description'=xyz}
- '''
- for key in resource_values.keys():
- assert key in model.PackageResource.get_columns()
- self.resource_values = resource_values
-
- def change(self, pkg):
- flat_pkg = self.flatten_pkg(pkg)
- resource = {}
- for key, value in self.resource_values.items():
- resource[key] = self.resolve_field_value(value, flat_pkg)
- resource_index = len(pkg['resources'])
-
- log.info('%s.resources[%i] -> %r' % \
- (pkg['name'], resource_index, resource))
-
- pkg['resources'].append(resource)
- return pkg
-
-class NoopPackageChanger(PackageChanger):
- def change(self, pkg):
- log.info('%s No change' % \
- (pkg['name']))
- return pkg
-
-class ChangeInstruction(object):
- def __init__(self, matchers=None, changers=None):
- '''Finds packages matching criteria and changes them as specified.
- Matchers are derived from PackageMatcher and any of the matchers
- can match to apply all the changes, which derive from PackageChanger.
- '''
- if isinstance(matchers, PackageMatcher):
- self.matchers = [matchers]
- elif isinstance(matchers, list) or matchers == None:
- self.matchers = matchers
- if isinstance(changers, PackageChanger):
- self.changers = [changers]
- elif isinstance(changers, list) or changers == None:
- self.changers = changers
-
-
-class MassChanger(object):
- def __init__(self, ckanclient, instructions, dry_run=False, force=False):
- '''
- Changes package properties en masse
- @param ckanclient: instance of ckanclient to make the changes
- @param instructions: (ordered) list of ChangeInstruction objects
- @param dry_run: show matching and potential changes, but do not
- write the changes back to the server.
- @param force: prevents aborting when there is an error with one package
- '''
- self.ckanclient = ckanclient
- version = self.ckanclient.api_version_get()
- assert int(version) >= 2, 'API Version is %s. Script requires at least Version 2.' % version
- self.instructions = instructions
- self.dry_run = dry_run
- self.force = force
- self._pkg_cache = {}
-
- def run(self):
- pkg_refs = self.ckanclient.package_register_get()
- for pkg_ref in pkg_refs:
- try:
- pkg = self._get_pkg(pkg_ref)
- instruction = self._match_instructions(pkg_ref)
- if instruction:
- self._change_package(self._get_pkg(pkg_ref), instruction)
- except ScriptError, e:
- err = 'Problem with package %s: %r' % (pkg_ref, e.args)
- log.error(err)
- if not self.force:
- log.error('Aborting (avoid this with --force)')
- raise ScriptError(err)
-
- def _get_pkg(self, pkg_ref):
- if not self._pkg_cache.has_key(pkg_ref):
- pkg = self.ckanclient.package_entity_get(pkg_ref)
- if self.ckanclient.last_status != 200:
- raise ScriptError('Could not get package ID %s: %r' % \
- (pkg_ref, self.ckanclient.last_status))
- remove_readonly_fields(pkg)
- self._pkg_cache[pkg_ref] = pkg
- return self._pkg_cache[pkg_ref]
-
- def _match_instructions(self, pkg_ref):
- for instruction in self.instructions:
- for matcher in instruction.matchers:
- assert isinstance(matcher, PackageMatcher), matcher
- name_match = None
-# name_match = matcher.match_name(pkg_ref)
- if name_match == None:
- pkg = self._get_pkg(pkg_ref)
- if matcher.match(pkg):
- return instruction
-
- def _change_package(self, pkg, instruction):
- pkg_before = copy.deepcopy(pkg)
- for changer in instruction.changers:
- pkg = changer.change(pkg)
-
- if pkg == pkg_before:
- log.info('...not changed')
- return
-
- if not self.dry_run:
- self.ckanclient.package_entity_put(pkg)
- if self.ckanclient.last_status == 200:
- log.info('...saved %s:' % pkg['name'])
- log.debug('Package saved: %r' % self.ckanclient.last_message)
- else:
- raise ScriptError('Post package %s error: %s' % (pkg['name'], self.ckanclient.last_message))
-
-
-class MassChangerNamedPackages(MassChanger):
- def run(self):
- for pkg_ref in self.pkg_name_list:
- try:
- instruction = self._match_instructions(pkg_ref)
- if instruction:
- self._change_package(self._get_pkg(pkg_ref), instruction)
- except ScriptError, e:
- err = 'Problem with package %s: %r' % (pkg_ref, e.args)
- log.error(err)
- if not self.force:
- log.error('Aborting (avoid this with --force)')
- raise ScriptError(err)
--- a/ckanext/dgu/scripts/mass_changer_cmd.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-import sys
-
-from ckanext.importlib.api_command import ApiCommand
-
-class MassChangerCommand(ApiCommand):
- def __init__(self, commands=None):
- usage = "% %prog [options]"
- if commands:
- usage += " {%s}" % '|'.join(commands)
- super(MassChangerCommand, self).__init__(usage=usage)
-
- def add_options(self):
- super(MassChangerCommand, self).add_options()
- self.parser.add_option("-d", "--dry-run",
- dest="dry_run",
- action="store_true",
- default=False,
- help="Write no changes")
- self.parser.add_option("-f", "--force",
- dest="force",
- action="store_true",
- default=False,
- help="Don't abort rest of packages on an error")
-
- def command(self):
- super(MassChangerCommand, self).command()
-
- # now do command
-
-def command():
- MassChangerCommand().command()
-
--- a/ckanext/dgu/scripts/metadata_v3_migration.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-from collections import defaultdict
-import socket
-import copy
-
-from nose.tools import assert_equal
-
-from common import ScriptError, remove_readonly_fields
-from ckanclient import CkanApiError
-
-from ckanext.importlib.spreadsheet_importer import CsvData
-
-log = __import__("logging").getLogger(__name__)
-
-mapped_attributes = {
- 'temporal_granularity': dict(zip(['years', 'quarters', 'months', 'weeks', 'days', 'hours', 'points'],
- ['year', 'quarter', 'month', 'week', 'day', 'hour', 'point'])),
-
- 'update_frequency': dict(zip(('annually', 'quarterly', 'monthly', 'never'),
- ('annual', 'quarterly', 'monthly', 'never'))), #'discontinued'
- }
-
-class MetadataV3Migration:
- '''Changes department/agency fields to published_by/_via'''
- def __init__(self, ckanclient,
- dry_run=False):
- self.ckanclient = ckanclient
- self.dry_run = dry_run
-
- def run(self):
- pkgs_done = []
- pkgs_rejected = defaultdict(list) # reason: [pkgs]
- all_pkgs = self.ckanclient.package_register_get()
- log.info('Working on %i packages', len(all_pkgs))
- for pkg_ref in all_pkgs:
- log.info('Package: %s', pkg_ref)
- try:
- try:
- pkg = self.ckanclient.package_entity_get(pkg_ref)
- except CkanApiError, e:
- log.error('Could not get: %r' % e)
- pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
- continue
- pkg_before_changes = copy.deepcopy(pkg)
-
- for attribute in mapped_attributes:
- orig_value = pkg['extras'].get(attribute)
- if not orig_value:
- continue
- mapped_value = mapped_attributes[attribute].get(orig_value)
- if mapped_value:
- pkg['extras'][attribute] = mapped_value
- log.info('%s: %r -> %r', \
- attribute, orig_value, mapped_value)
- else:
- log.warn('Invalid value for %r: %r', \
- attribute, orig_value)
-
- if pkg == pkg_before_changes:
- log.info('...package unchanged: %r' % pkg['name'])
- pkgs_rejected['Package unchanged: %r' % pkg['name']].append(pkg)
- continue
- if not self.dry_run:
- remove_readonly_fields(pkg)
- try:
- self.ckanclient.package_entity_put(pkg)
- except CkanApiError, e:
- log.error('Could not put: %r' % e)
- pkgs_rejected['Could not put package: %r' % e].append(pkg_ref)
- continue
- log.info('...done')
- pkgs_done.append(pkg)
- except ScriptError, e:
- log.error('Error during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Error: %r' % e].append(pkg_ref)
- continue
- except Exception, e:
- log.error('Uncaught exception during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Exception: %r' % e].append(pkg_ref)
- raise
- log.info('-- Finished --')
- log.info('Processed %i packages', len(pkgs_done))
- rejected_pkgs = []
- for reason, pkgs in pkgs_rejected.items():
- rejected_pkgs.append('\n %i: %s' % (len(pkgs), reason))
- log.info('Rejected packages: %s', rejected_pkgs)
-
-import sys
-
-#from ckanext.api_command import ApiCommand
-from mass_changer_cmd import MassChangerCommand
-
-class Command(MassChangerCommand):
- def add_options(self):
- super(Command, self).add_options()
-
- def command(self):
- super(Command, self).command()
-
- # now do command
- cmd = MetadataV3Migration(self.client,
- dry_run=self.options.dry_run)
- cmd.run()
-
-def command():
- Command().command()
-
--- a/ckanext/dgu/scripts/ofsted_fix.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-from collections import defaultdict
-
-from common import ScriptError, remove_readonly_fields
-
-from ckanclient import CkanApiError
-
-class OfstedFix:
- '''Fixes old ONS imports misattributed to Ofsted DGU#780'''
- def __init__(self, ckanclient, dry_run):
- self.ckanclient = ckanclient
- self.dry_run = dry_run
-
- def run(self):
- limit = 100
- def search(page=None):
- opts = {
-# 'external_reference': 'ONSHUB',
- 'limit': limit}
- if page != None:
- opts['offset'] = page * limit
- return self.ckanclient.package_search(
- 'Education',
-# 'Source agency: Education',
- opts)
- res = search()
- print 'Found %i packages possibly related.' % res['count']
- pkgs_done = []
- pkgs_rejected = defaultdict(list) # reason: [pkgs]
- for page in range(res['count'] / limit):
- res = search(page)
- pkg_refs = res['results']
- for pkg_ref in pkg_refs:
- pkg = self.ckanclient.package_entity_get(pkg_ref)
- if 'ONS' not in pkg['extras'].get('import_source', ''):
- pkgs_rejected['Not imported from ONS'].append(pkg)
- continue
- if pkg.get('state', 'active') != 'active':
- pkgs_rejected['Package state = %r' % pkg.get('state')].append(pkg)
- continue
- source_agency = '|'.join([line.replace('Source agency:', '').strip() for line in pkg['notes'].split('\n') if 'Source agency' in line])
- if source_agency != 'Education':
- pkgs_rejected['Source agency = %r' % source_agency].append(pkg)
- continue
- if 'Department for Education' in pkg['extras'].get('department', ''):
- pkgs_rejected['Department = %r' % pkg['extras'].get('department', '')].append(pkg)
- continue
-
- pkg_name = pkg['name']
- dept = pkg['extras'].get('department')
- agency = pkg['extras'].get('agency')
- author = pkg['author']
- print '%s :\n %r %r %r' % (pkg_name, dept, agency, author)
- if not self.dry_run:
- pkg['extras']['department'] = 'Department for Education'
- pkg['extras']['agency'] = ''
- pkg['author'] = 'Department for Education'
- remove_readonly_fields(pkg)
- self.ckanclient.package_entity_put(pkg)
- print '...done'
- pkgs_done.append(pkg)
- print 'Processed %i packages' % len(pkgs_done)
- print 'Rejected packages:'
- for reason, pkgs in pkgs_rejected.items():
- print ' %i: %s' % (len(pkgs), reason)
--- a/ckanext/dgu/scripts/ofsted_fix_cmd.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-import sys
-
-#from ckanext.api_command import ApiCommand
-from mass_changer_cmd import MassChangerCommand
-from ofsted_fix import OfstedFix
-
-class OfstedFixCmd(MassChangerCommand):
- def command(self):
- super(OfstedFixCmd, self).command()
-
- # now do command
- cmd = OfstedFix(self.client, dry_run=self.options.dry_run)
- cmd.run()
-
-def command():
- OfstedFixCmd().command()
-
--- a/ckanext/dgu/scripts/ons_analysis.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-from common import ScriptError
-
-from ckanclient import CkanApiError
-
-class OnsAnalysis:
- def __init__(self, ckanclient):
- self.ckanclient = ckanclient
- assert self.ckanclient.api_version_get() == '1', self.ckanclient.api_version_get()
-
- def run(self):
- pkg_names = self.ckanclient.package_register_get()
- pkgs = []
- for pkg_name in pkg_names:
- if pkg_name.endswith('_'):
- pkg = self.ckanclient.package_entity_get(pkg_name)
- if 'ONS' in pkg['extras'].get('import_source', '') and \
- pkg.get('state', 'active'):
- dept = pkg['extras'].get('department')
- try:
- pkg_associated = self.ckanclient.package_entity_get(pkg_name.rstrip('_'))
- except CkanApiError:
- dept_associated = None
- else:
- dept_associated = pkg_associated['extras'].get('department')
- pkgs.append(pkg)
- print '%r\n-> %r %r' % (pkg_name, dept, dept_associated)
- print '%i packages' % len(pkgs)
-
--- a/ckanext/dgu/scripts/ons_analysis_cmd.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-import sys
-
-from ckanclient import CkanClient
-from ckanext.importlib.api_command import ApiCommand
-from ons_analysis import OnsAnalysis
-
-class OnsAnalysisCommand(ApiCommand):
- def add_options(self):
- pass
-
- def command(self):
- super(OnsAnalysisCommand, self).command()
-
- # now do command
- client = CkanClient(base_location=self.options.api_url,
- api_key=self.options.api_key,
- http_user=self.options.username,
- http_pass=self.options.password)
- change_licenses = OnsAnalysis(client)
- change_licenses.run()
-
-def command():
- OnsAnalysisCommand().command()
-
--- a/ckanext/dgu/scripts/ons_delete_resourceless_packages.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-from collections import defaultdict
-import socket
-import copy
-
-from xmlrpclib import ServerProxy, ProtocolError
-from nose.tools import assert_equal
-
-from common import ScriptError, remove_readonly_fields
-from ckanclient import CkanApiError
-
-from ckanext.importlib.spreadsheet_importer import CsvData
-
-log = __import__("logging").getLogger(__name__)
-
-# NB This script doesn't do anything because of problem with deleting
-# packages over the API:
-# http://trac.ckan.org/ticket/1053
-
-class OnsDeleteResourcelessPackages:
- '''Remove all resources from ONS packages'''
- def __init__(self, ckanclient,
- xmlrpc_domain, xmlrpc_username, xmlrpc_password,
- dry_run=False):
- self.ckanclient = ckanclient
- self.dry_run = dry_run
- self.xmlrpc = {'username':xmlrpc_username,
- 'password':xmlrpc_password,
- 'domain':xmlrpc_domain}
-
- def run(self):
- pkgs_done = []
- pkgs_rejected = defaultdict(list) # reason: [pkgs]
- all_pkgs = self.ckanclient.package_register_get()
- log.info('Working on %i packages', len(all_pkgs))
- for pkg_ref in all_pkgs:
- log.info('Package: %s', pkg_ref)
- try:
- try:
- pkg = self.ckanclient.package_entity_get(pkg_ref)
- except CkanApiError, e:
- log.error('Could not get: %r' % e)
- pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
- continue
- pkg_before_changes = copy.deepcopy(pkg)
-
- if pkg['state'] != 'active':
- msg = 'Not active (%s)' % pkg['state']
- log.info('...%s: %r' % (msg, pkg['name']))
- pkgs_rejected[msg].append(pkg)
- continue
- if pkg['extras'].get('external_reference') != 'ONSHUB':
- msg = 'Not ONS'
- log.info('...%s: %r' % (msg, pkg['name']))
- pkgs_rejected[msg].append(pkg)
- continue
-
- if pkg['resources'] == []:
- pkg['state'] = 'deleted'
-
- if pkg == pkg_before_changes:
- log.info('...package unchanged: %r' % pkg['name'])
- pkgs_rejected['Package unchanged'].append(pkg)
- continue
- if not self.dry_run:
- remove_readonly_fields(pkg)
- try:
- self.ckanclient.package_entity_put(pkg)
- except CkanApiError, e:
- log.error('Could not put: %r' % e)
- pkgs_rejected['Could not put package: %r' % e].append(pkg_ref)
- continue
- log.info('...done')
- pkgs_done.append(pkg)
- except ScriptError, e:
- log.error('Error during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Error: %r' % e].append(pkg_ref)
- continue
- except Exception, e:
- log.error('Uncaught exception during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Exception: %r' % e].append(pkg_ref)
- raise
- log.info('-- Finished --')
- log.info('Processed %i packages', len(pkgs_done))
- rejected_pkgs = []
- for reason, pkgs in pkgs_rejected.items():
- rejected_pkgs.append('\n %i: %s' % (len(pkgs), reason))
- log.info('Rejected packages: %s', rejected_pkgs)
-
-import sys
-
-from mass_changer_cmd import MassChangerCommand
-
-class Command(MassChangerCommand):
- def add_options(self):
- super(Command, self).add_options()
- self.parser.add_option("-D", "--xmlrpc-domain",
- dest="xmlrpc_domain",
- )
- self.parser.add_option("-U", "--xmlrpc-username",
- dest="xmlrpc_username",
- )
- self.parser.add_option("-P", "--xmlrpc-password",
- dest="xmlrpc_password",
- )
-
- def command(self):
- super(Command, self).command()
-
- # now do command
- cmd = OnsDeleteResourcelessPackages(self.client,
- self.options.xmlrpc_domain,
- self.options.xmlrpc_username,
- self.options.xmlrpc_password,
- dry_run=self.options.dry_run)
- cmd.run()
-
-def command():
- Command().command()
-
--- a/ckanext/dgu/scripts/ons_remove_resources.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-from collections import defaultdict
-import socket
-import copy
-
-from xmlrpclib import ServerProxy, ProtocolError
-from nose.tools import assert_equal
-
-from common import ScriptError, remove_readonly_fields
-from ckanclient import CkanApiError
-
-from ckanext.importlib.spreadsheet_importer import CsvData
-
-log = __import__("logging").getLogger(__name__)
-
-class OnsRemoveResources:
- '''Remove all resources from ONS packages'''
- def __init__(self, ckanclient,
- xmlrpc_domain, xmlrpc_username, xmlrpc_password,
- dry_run=False):
- self.ckanclient = ckanclient
- self.dry_run = dry_run
- self.xmlrpc = {'username':xmlrpc_username,
- 'password':xmlrpc_password,
- 'domain':xmlrpc_domain}
-
- def run(self):
- pkgs_done = []
- pkgs_rejected = defaultdict(list) # reason: [pkgs]
- all_pkgs = self.ckanclient.package_register_get()
- log.info('Working on %i packages', len(all_pkgs))
- for pkg_ref in all_pkgs:
- log.info('Package: %s', pkg_ref)
- try:
- try:
- pkg = self.ckanclient.package_entity_get(pkg_ref)
- except CkanApiError, e:
- log.error('Could not get: %r' % e)
- pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
- continue
- pkg_before_changes = copy.deepcopy(pkg)
-
- if pkg['state'] != 'active':
- msg = 'Not active (%s)' % pkg['state']
- log.info('...%s: %r' % (msg, pkg['name']))
- pkgs_rejected[msg].append(pkg)
- continue
- if pkg['extras'].get('external_reference') != 'ONSHUB':
- msg = 'Not ONS'
- log.info('...%s: %r' % (msg, pkg['name']))
- pkgs_rejected[msg].append(pkg)
- continue
- pkg['resources'] = []
-
- if pkg == pkg_before_changes:
- log.info('...package unchanged: %r' % pkg['name'])
- pkgs_rejected['Package unchanged'].append(pkg)
- continue
- if not self.dry_run:
- remove_readonly_fields(pkg)
- try:
- self.ckanclient.package_entity_put(pkg)
- except CkanApiError, e:
- log.error('Could not put: %r' % e)
- pkgs_rejected['Could not put package: %r' % e].append(pkg_ref)
- continue
- log.info('...done')
- pkgs_done.append(pkg)
- except ScriptError, e:
- log.error('Error during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Error: %r' % e].append(pkg_ref)
- continue
- except Exception, e:
- log.error('Uncaught exception during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Exception: %r' % e].append(pkg_ref)
- raise
- log.info('-- Finished --')
- log.info('Processed %i packages', len(pkgs_done))
- rejected_pkgs = []
- for reason, pkgs in pkgs_rejected.items():
- rejected_pkgs.append('\n %i: %s' % (len(pkgs), reason))
- log.info('Rejected packages: %s', rejected_pkgs)
-
-import sys
-
-from mass_changer_cmd import MassChangerCommand
-
-class Command(MassChangerCommand):
- def add_options(self):
- super(Command, self).add_options()
- self.parser.add_option("-D", "--xmlrpc-domain",
- dest="xmlrpc_domain",
- )
- self.parser.add_option("-U", "--xmlrpc-username",
- dest="xmlrpc_username",
- )
- self.parser.add_option("-P", "--xmlrpc-password",
- dest="xmlrpc_password",
- )
-
- def command(self):
- super(Command, self).command()
-
- # now do command
- cmd = OnsRemoveResources(self.client,
- self.options.xmlrpc_domain,
- self.options.xmlrpc_username,
- self.options.xmlrpc_password,
- dry_run=self.options.dry_run)
- cmd.run()
-
-def command():
- Command().command()
-
--- a/ckanext/dgu/scripts/publisher_map_2.csv Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-Agency text,Corrected name,Parent organisation
- Newcastle PCT,Newcastle Primary Care Trust,National Health Service
- NHS Information Centre Health and Social Care Information Centre,NHS Information Centre for Health and Social Care,National Health Service
-2gether NHS Foundation Trust,,National Health Service
-Acas,"Advisory, Conciliation and Arbitration Service (Acas)","Department for Business, Innovation and Skills"
-Advantage West Midlands,,"Department for Business, Innovation and Skills"
-"Advisory, Conciliation and Arbitration Service (Acas)",,"Department for Business, Innovation and Skills"
-Airedale NHS Foundation Trust,,National Health Service
-Alder Hey Children's NHS Foundation Trust,,National Health Service
-Animal Health,,"Department for Environment, Food and Rural Affairs"
-Appointments Commision,Appointments Commission,Department of Health
-Appointments Commission,,Department of Health
-Ashford and St. Peter's Hospitals NHS Trust,,National Health Service
-Avon and Wiltshire Mental Health Partnership NHS Trust,,National Health Service
-Barnet Primary Care Trust,,National Health Service
-Barnsley Primary Care Trust,,National Health Service
-Barts and The London NHS Trust,,National Health Service
-Berkshire East Primary Care Trust,,National Health Service
-Birmingham and Solihull Mental Health Foundation Trust,,National Health Service
-Birmingham Children's NHS Foundation Trust,,National Health Service
-Blackburn with Darwen Care Trust Plus,,National Health Service
-"Blackpool, Fylde & Wyre Hospitals NHS Foundation Trust",,National Health Service
-Bradford District Care Trust,,National Health Service
-Brighton and Sussex University Hospitals NHS Trust,,National Health Service
-British Educational Communications and Technology Agency,,Department for Education
-British Transport Police Authority,,Department for Transport
-Buckinghamshire Healthcare NHS Trust,,National Health Service
-Burton Hospitals NHSFT,,National Health Service
-Buying Solutions,,Cabinet Office
-Calderstones Partnership NHS Foundation Trust,,National Health Service
-CCRC,Criminal Cases Review Commission,Ministry of Justice
-CEFAS,"Centre for Environment, Fisheries & Aquaculture Science","Department for Environment, Food and Rural Affairs"
-Central and Eastern Cheshire Primary Care NHS Trust,,National Health Service
-Central and Eastern Cheshire Primary Care Trust,Central and Eastern Cheshire Primary Care NHS Trust,National Health Service
-Charity Commission,,
-Children and Family Court Advisory and Support Service,,Department for Education
-Children's Workforce Development Council (CWDC),Children's Workforce Development Council,Department for Education
-CIB - Company Investigation Brancch,Companies Investigation Branch,"Department for Business, Innovation and Skills"
-CICA,Criminal Injuries Compensation Authority,Ministry of Justice
-CITB-ConstructionSkills,Construction Industry Training Board,"Department for Business, Innovation and Skills"
-COI,Central Office of Information,Cabinet Office
-Colchester Hospital University NHS Foundation Trust,,National Health Service
-Commission for Architecture and the Built Environment (CABE) ,Commission for Architecture and the Built Environment,"Department for Culture, Media and Sport"
-Commission for Rural Communities,,"Department for Environment, Food and Rural Affairs"
-Cornwall and Isles of Scilly Community Health Services,,National Health Service
-Cornwall Partnership NHS Foundation Trust,,National Health Service
-Council for Healthcare and Regulatory Excellence,Council for Healthcare Regulatory Excellence,Department of Health
-Counsumer Council for Water,Consumer Council for Water,"Department for Environment, Food and Rural Affairs"
-County Durham Primary Care Trust,,National Health Service
-Coventry & Warwickshire Partnership NHS Trust,,National Health Service
-Coventry PCT,Coventry Primary Care Trust,National Health Service
-CRB,Criminal Records Bureau,Home Office
-Criminal Records Bureau,,Home Office
-Cumbria PCT,Cumbria Primary Care Trust,National Health Service
-Darlington Primary Care Trust,,National Health Service
-DARTFORD & GRAVESHAM NHS TRUST,Dartford and Gravesham NHS Trust,National Health Service
-Dartford and Gravesham NHS Trust,,National Health Service
-Defence Analytical Services and Advice,Ministry of Defence,
-Derbyshire Community Health Services,,National Health Service
-Derbyshire Mental Health Services Trust,,National Health Service
-Devon Partnership NHS Trust,,National Health Service
-Directly Operated Railways,,Department for Transport
-Dudley and Walsall Mental Health Partnership NHS Trust,,National Health Service
-Dudley Group of Hospitals NHS Foundation Trust,,National Health Service
-Dudley Primary Care Trust,,National Health Service
-Ealing PCT,Ealing Primary Care Trust,National Health Service
-East Cheshire NHS Trust,,National Health Service
-East Lancashire Hospitals NHS Trust,,National Health Service
-East Midlands Ambulance Services NHS Trust,,National Health Service
-East Midlands Strategic Health Authority,,National Health Service
-East Sussex Hospitals NHS Trust,,National Health Service
-Energy Saving Trust ,,Private Sector
-Enfield PRimary Care Trust,Enfield Primary Care Trust,National Health Service
-Enfield Primary Care Trust,,National Health Service
-Engineering and Physical Sciences Research Council,,"Department for Business, Innovation and Skills"
-English Heritage,,"Department for Culture, Media and Sport"
-Environment Agency,,"Department for Environment, Food and Rural Affairs"
-Environment Agency,,"Department for Environment, Food and Rural Affairs"
-Environment Agency,,"Department for Environment, Food and Rural Affairs"
-Environment Agency,,"Department for Environment, Food and Rural Affairs"
-Epsom and St Helier University Hospitals NHS Trust,,National Health Service
-Equality and Human Rights Commission,,Government Equalities Office
-FCO Services,,Foreign and Commonwealth Office
-FERA,,"Department for Environment, Food and Rural Affairs"
-FERA,,"Department for Environment, Food and Rural Affairs"
-Fire Service College,,Department for Communities and Local Government
-Firebuy,,Department for Communities and Local Government
-Gambling Commission,,"Department for Culture, Media and Sport"
-Gangmasters Licensing Authority,,"Department for Environment, Food and Rural Affairs"
-Gateshead Primary Care Trust,,National Health Service
-General Social Care Council,,National Health Service
-Government Offices,,Department for Communities and Local Government
-Great Western Ambulance Service NHS Trust,,National Health Service
-Great Yarmouth and Waveney PCT,Great Yarmouth and Waveney Primary Care Trust,National Health Service
-Guys and St Thomas NHS Foundation Trust,,National Health Service
-Hampshire PCT,Hampshire Primary Care Trust,National Health Service
-Health and Social Care Information Centre,NHS Information Centre for Health and Social Care,National Health Service
-Health and Social Care Information Centre,NHS Information Centre for Health and Social Care,National Health Service
-Her Majesty's Inspectorate of Constabulary,NHS Information Centre for Health and Social Care,National Health Service
-Hereford Hospitals NHS Trust,,National Health Service
-Herefordshire Primary Care Trust,,National Health Service
-High Speed 2,,Private Sector
-Higher Education Statistics Agency,,"Department for Business, Innovation and Skills"
-Highways Agency,,Department for Transport
-HM Courts Service,,Ministry of Justice
-HM Land Registry,,Ministry of Justice
-HMIC,HM Inspectorate of Constabulary,Home Office
-Horniman Public Museum & Public Park Trust,,"Department for Culture, Media and Sport"
-Horserace Betting Levy Board,,"Department for Culture, Media and Sport"
-Housing and Communities Agency,,Department for Communities and Local Government
-Hull and East Yorkshire Hospitals NHS Trust,,National Health Service
-Human Tissue Authority,,Department of Health
-ICO,Information Commissioner's Office,Ministry of Justice
-Identity & Passport Service,,Home Office
-Identity and Passport Service,Identity & Passport Service,Home Office
-Imperial College Healthcare NHS Trust,,National Health Service
-Independent Living Fund,,Department for Work and Pensions
-Independent Police Complaints Commission,,Home Office
-Independent Safeguarding Authority,,Home Office
-Information Centre for Health and Social Care,NHS Information Centre for Health and Social Care,National Health Service
-Information Centre for Health and Social Care,NHS Information Centre for Health and Social Care,National Health Service
-Information Centre for health and social care,NHS Information Centre for Health and Social Care,National Health Service
-Information Centre for Health and Social Care,NHS Information Centre for Health and Social Care,National Health Service
-Infrastructure Planning Commission,,
-IPO,Infrastructure Planning Commission,
-IPS,Identity & Passport Service,Home Office
-ISD Scotland (part of NHS National Services Scotland),ISD Scotland,NHS Scotland
-ISD Scotland (part of NHS National Services Scotland),ISD Scotland,NHS Scotland
-ISD Scotland (part of NHS National Services Scotland),ISD Scotland,NHS Scotland
-ISD Scotland (part of NHS National Services Scotland),ISD Scotland,NHS Scotland
-Isle of Wight NHS Primary Care Trust,,National Health Service
-JAC,Judicial Appointments Commission,Ministry of Justice
-Joint Nature Conservation Committee,,"Department for Environment, Food and Rural Affairs"
-Keep Britain Tidy,,"Department for Environment, Food and Rural Affairs"
-Kingston Hospital NHS Trust,,National Health Service
-Kingston Primary Care Trust,,National Health Service
-Kirklees Primary Care Trust,Kirklees Primary Care Trust,National Health Service
-Kirklees Primary Care Trust,,National Health Service
-Lambeth PCT,Lambeth Primary Care Trust,National Health Service
-Lancashire Care NHS Foundation Trust,,National Health Service
-Learning and Skills Council/Skills Funding Agency,,"Department for Business, Innovation and Skills"
-Leasehold Advisory Service,,Department for Communities and Local Government
-Leeds Teaching Hospitals NHS Trust,,National Health Service
-Leicestershire Partnership NHS Trust,,National Health Service
-Lewisham PCT,Lewisham Primary Care Trust,National Health Service
-Lincolnshire Primary Care Trust,,National Health Service
-Liverpool Heart and Chest Hospital NHS Foundation Trust,,National Health Service
-Liverpool Primary Care Trust,,National Health Service
-Local Better Regulation Office (LBRO),,"Department for Business, Innovation and Skills"
-London Ambulance Service NHS Trust,,National Health Service
-London Thames Gateway Development Corporation,,Department for Communities and Local Government
-LSB,Legal Services Board,Ministry of Justice
-Manchester Mental Health and Social Care Trust,,National Health Service
-Manchester Primary Care Trust,,National Health Service
-Marine Management Organisation,,"Department for Environment, Food and Rural Affairs"
-Medway Primary Care Trust,,National Health Service
-Mersey Care NHS Trust,,National Health Service
-Met Office,,Ministry of Defence
-MHRA,Medicines and Healthcare products Regulatory Agency,Department of Health
-Mid Staffordshire NHS Foundation Trust,,National Health Service
-Milton Keynes Hospital NHS Foundation Trust,,National Health Service
-Milton Keynes Primary Care Trust,,National Health Service
-Monitor,Monitor - Independent Regulator of NHS Foundation Trusts,National Health Service
-Monitor - the Independent Regulator of NHS foundation trusts,Monitor - Independent Regulator of NHS Foundation Trusts,National Health Service
-Museum of Science and Industry,,"Department for Culture, Media and Sport"
-Museums Libraries and Archives Council,,"Department for Culture, Media and Sport"
-National College for Leadership of Schools and Childrenâs Services,National College for Leadership of Schools and Children's Services,Department for Education
-National College for Leadership of Schools and Children's Services,,Department for Education
-National College for Leadership of Schools and Children’s Services,National College for Leadership of Schools and Children's Services
-National Employment Savings Trust,,Department for Work and Pensions
-National Forest Company,,"Department for Environment, Food and Rural Affairs"
-National Fraud Authority ,,Attorney General's Office
-National Health Service in Scotland,NHS Scotland,Department of Health
-National Health Service in Scotland,NHS Scotland,Department of Health
-National Health Service in Scotland,NHS Scotland,Department of Health
-National Health Service in Scotland,NHS Scotland,Department of Health
-National Heritage Memorial Fund,,"Department for Culture, Media and Sport"
-National Museum of Science and Industry,,"Department for Culture, Media and Sport"
-National Museums Liverpool,,"Department for Culture, Media and Sport"
-National Offender Management Service,,Ministry of Justice
-National Patient Safety Agency,,National Health Service
-National Policing Improvement Agency,,Home Office
-National Treatment Agency,National Treatment Agency for Substance Misuse,National Health Service
-National Treatment Agency,National Treatment Agency for Substance Misuse,National Health Service
-National Treatment Agency for Substance Misuse,National Treatment Agency for Substance Misuse,National Health Service
-Natural Environment Research Council,,"Department for Business, Innovation and Skills"
-Neighbourhood Statistics,Office of National Statistics,UK Statistics Authority
-Newham Primary Care Trust,,National Health Service
-Newham University Hospital NHS Trust,,National Health Service
-NHM,Natural History Museum,"Department for Culture, Media and Sport"
-NHS,National Health Service,Department of Health
-NHS Ashton Leigh and Wigan,,National Health Service
-NHS Bassetlaw,,National Health Service
-NHS Bath & North East Somerset,,National Health Service
-NHS Bedfordshire,,National Health Service
-NHS Blackpool,,National Health Service
-NHS Blood and Transplant,,National Health Service
-NHS Bolton,,National Health Service
-NHS Bournemouth & Poole,,National Health Service
-NHS Bradford and Airedale,,National Health Service
-NHS Bury,,National Health Service
-NHS Central Lancashire,,National Health Service
-NHS Choices,,National Health Service
-NHS Connecting for Health,,National Health Service
-NHS Connecting for Health and NHS Business Services Authority,,National Health Service
-NHS Cornwall and Isles of Scilly,,National Health Service
-NHS Coventry,,National Health Service
-NHS Derbyshire County,,National Health Service
-NHS Devon,,National Health Service
-NHS East Lancashire,,National Health Service
-NHS East of England,,National Health Service
-NHS East Riding of Yorkshire,,National Health Service
-NHS Eastern and Coastal Kent,,National Health Service
-NHS Gloucestershire,,National Health Service
-NHS Hammersmith and Fulham,,National Health Service
-NHS Harrow,,National Health Service
-NHS Information Centre for health and social care,NHS Information Centre for Health and Social Care,National Health Service
-NHS Information Centre for Health and Social care,NHS Information Centre for Health and Social Care,National Health Service
-NHS Information Centre for Health and Social Care,NHS Information Centre for Health and Social Care,National Health Service
-NHS Institute for Innovation and Improvement,,National Health Service
-NHS Islington,,National Health Service
-NHS Knowsley,,National Health Service
-NHS Leeds,,National Health Service
-NHS Leeds (Leeds Primary Care Trust),NHS Leeds,National Health Service
-NHS Leicester City,,National Health Service
-NHS Litigation Authority,,National Health Service
-NHS London,,National Health Service
-NHS North Lincolnshire,,National Health Service
-NHS North Staffordshire,,National Health Service
-NHS North West,,National Health Service
-NHS Redbridge,,National Health Service
-NHS Rotherham,,National Health Service
-NHS South East Coast,,National Health Service
-NHS Surrey,,National Health Service
-NHS Tameside & Glossop,,National Health Service
-NHS Telford & Wrekin,,National Health Service
-NHS Trafford,,National Health Service
-NHS Walsall,,National Health Service
-NHS Wandsworth,,National Health Service
-NHS West Sussex,,National Health Service
-NHS Wiltshire,,National Health Service
-NHSBSA,Business Services Authority (NHS),National Health Service
-Nomis - Official Labour Market Statistics,Office of National Statistics,UK Statistics Authority
-Nomis - Official Labour Market Statistics,Office of National Statistics,UK Statistics Authority
-NOMS,National Offender Management Service,Ministry of Justice
-Norfolk and Waveney Mental Health NHS Foundation Trust,,National Health Service
-North Bristol NHS Trust,,National Health Service
-North Cumbria University Hospitals NHS Trust,,National Health Service
-North East Lincolnshire Care Trust Plus,,National Health Service
-North Lancashire Teaching PCT,,National Health Service
-North Staffordshire Combined Healthcare NHS Trust,,National Health Service
-North Staffordshire Shared Services,,National Health Service
-North Tyneside PCT,North Tyneside Primary Care Trust,National Health Service
-North West Specialised Commissioning Team,North West Specialised Commissioning Group,National Health Service
-Northern Devon Healtchare Trust,Northern Devon Healthcare NHS Trust,National Health Service
-Northern Devon Healthcare NHS Trust,,National Health Service
-Northern Ireland Statistics and Research Agency,,Northern Ireland Executive
-Northern Ireland Statistics and Research Agency,,Northern Ireland Executive
-Nottingham City PCT,Nottingham City Primary Care Trust,National Health Service
-Nottingham University Hospitals NHS Trust,,National Health Service
-Nottinghamshire County Teaching Primary Care Trust,,National Health Service
-Nottinghamshire Healthcare NHS Trust,,National Health Service
-NPIA,National Policing Improvement Agency,Home Office
-Office of Government Commerce,,Cabinet Office
-Office of the Advocate General,Office of the Advocate General of Scotland,
-Office of the Children's Commissioner,,Department for Education
-Office of the First and Deputy First Minister,,Northern Ireland Executive
-Office of the Immigration Services Commissioner,,Home Office
-Office of the Prime Minister,,
-Office of the Third Sector,Office for Civil Society,Cabinet Office
-Ofwat,,
-Oldham Primary Care Trust,,National Health Service
-One North East,,"Department for Business, Innovation and Skills"
-Ordnance Survey,,Department for Communities and Local Government
-Oxfordshire Learning Disability NHS Trust,,National Health Service
-Partnerships for Schools,,Department for Education
-Passenger Focus,,Department for Transport
-Planning Inspectorate,,Department for Communities and Local Government
-Police Service of Northern Ireland (PSNI),Police Service of Northern Ireland,Northern Ireland Executive
-Portsmouth City Primary Care Trust,,National Health Service
-Portsmouth NHS Trust,,National Health Service
-Probation Trusts,National Offender Management Service,Ministry of Justice
-Public Lending Right,,"Department for Culture, Media and Sport"
-Qualifications and Curriculum Development Agency,Qualifications and Curriculum Development Agency,Department for Education
-Qualifications and Curriculum Devlopment Agency,Qualifications and Curriculum Development Agency,Department for Education
-Queen Elizabeth II Conference Centre,,Department for Communities and Local Government
-Queen Victoria Hospital NHS Foundation Trust,,National Health Service
-Railway Heritage Committee,,Department for Transport
-Regional Development Agencies,,"Department for Business, Innovation and Skills"
-Renewable Fuels Agency,,Department for Transport
-Richmond and Twickenham PCT,Richmond and Twickenham Primary Care Trust,National Health Service
-Robert Jones & Agnes Hunt Orthopaedic & District Hospital NHS Trust,,National Health Service
-Rotherham Metropolitan District Council,,Local Authorities
-Royal Botanic Gardens Kew,"Royal Botanic Gardens, Kew","Department for Environment, Food and Rural Affairs"
-"Royal Botanic Gardens, Kew",,"Department for Environment, Food and Rural Affairs"
-Royal Cornwall Hospitals NHS Trust,,National Health Service
-Royal Free Hampstead NHS Trust,,National Health Service
-Royal Liverpool and Broadgreen University Hospital NHS Trust,,National Health Service
-Royal United Hospital Bath NHS Trust,,National Health Service
-Rural Payments Agency,Rural Payments Agency,"Department for Environment, Food and Rural Affairs"
-"Rural Payments Agency (RPA), Scottish Government Rural Payments & Inspections Directorate (SGRPID), Welsh Assembly Government (WAG), Northern Ireland Department of Agriculture and Rural Development (DARD)",Rural Payments Agency,"Department for Environment, Food and Rural Affairs"
-"Rural Payments Agency (RPA), Scottish Government Rural Payments & Inspections Directorate (SGRPID), Welsh Assembly Government (WAG), Northern Ireland Department of Agriculture and Rural Development (DARD) ",Rural Payments Agency,"Department for Environment, Food and Rural Affairs"
-Salford Primary Care Trust,,National Health Service
-Sandwell & West Birmingham Hospitals NHS Trust,,National Health Service
-Sandwell Mental Health and Social Care NHS Foundation Trust,,National Health Service
-Sandwell PCT,Sandwell Primary Care Trust,National Health Service
-Scarborough and North East Yorkshire Healthcare NHS Trust,,National Health Service
-School Food Trust,,Department for Education
-Science and Technology Facilities Council,,"Department for Business, Innovation and Skills"
-Science and Technology Facilities Council ,,"Department for Business, Innovation and Skills"
-Scottish Court Service,,Scottish Government
-Scottish Prison Service,,Scottish Government
-Sea Fish Industry Authority,,"Department for Environment, Food and Rural Affairs"
-Security Industry Authority,,Home Office
-Sefton Primary Care Trust,,National Health Service
-Serious Organised Crime Agency,,Home Office
-Sheffield Primary Care Trust,,National Health Service
-Shropshire County Primary Care Trust,,National Health Service
-Solihull NHS Care Trust,,National Health Service
-South Central Ambulance Service NHS Trust ,,National Health Service
-South Central Strategic Health Authority,,National Health Service
-South East Coast Ambulance Service,,National Health Service
-South Staffordshire and Shropshire Healthcare NHS Foundation Trust,,National Health Service
-South Tyneside PCT,South Tyneside Primary Care Trust,National Health Service
-South Warwickshire NHS Foundation Trust,,National Health Service
-South West London and St George's Mental Health NHS Trust,,National Health Service
-Southampton City Primary Care Trust,,National Health Service
-Southport and Ormskirk Hospital NHS Trust,,National Health Service
-Southwark PCT,Southwark Primary Care Trust,National Health Service
-St Helens and Knowsley Hospitals NHS Trust,,National Health Service
-Standards for England,,Department for Communities and Local Government
-Stockport Primary Care Trust,,National Health Service
-Stockton Primary Care Trust,,National Health Service
-Student Loans Company Limited,,"Department for Business, Innovation and Skills"
-Sunderland Teaching PCT,Sunderland Teaching Primary Care Trust,National Health Service
-Surrey and Sussex Healthcare NHS Trus,Surrey and Sussex Healthcare NHS Trust,National Health Service
-Surrey and Sussex Healthcare NHS Trust,,National Health Service
-Sustainable Development Commission,,
-Swindon PCT,Swindon Primary Care Trust,National Health Service
-Tameside Hospital NHS Foundation Trust,,National Health Service
-Tenants Service Authority,Tenant Services Authority,Department for Communities and Local Government
-The Christie NHS Foundation Trust,,National Health Service
-The Churches Conservation Trust,,"Department for Culture, Media and Sport"
-The Geffrye Museum,,
-The Genome Analysis Centre,,Biotechnology and Biological Sciences Research Council
-The Institute of Food Research,,Biotechnology and Biological Sciences Research Council
-The John Innes Centre,,Biotechnology and Biological Sciences Research Council
-Biological and Biotechnology Science Research Council,Biotechnology & Biological Science Research Council
-The Mid Yorkshire Hospitals NHS Trust,,National Health Service
-The National Archives,,Ministry of Justice
-The NHS Information Centre for Health and Social care,NHS Information Centre for Health and Social Care,National Health Service
-The Pennine Acute Hospitals NHS Trust,,National Health Service
-The Pensions Ombudsman ,Pensions Ombudsman,Department for Work and Pensions
-The Royal Marsden NHS Foundation Trust,,National Health Service
-The Royal Orthopaedic Hospital NHS Foundation Trust,,National Health Service
-The Royal Wolverhampton Hospitals NHS Trust,,National Health Service
-The Shrewsbury and Telford Hospital NHS Trust,,National Health Service
-Thurrock Thames Gateway Development Corporation,,Department for Communities and Local Government
-Torbay Care Trust,,National Health Service
-Trinity House and the Northern Lighthouse Board,,
-UK Border Agency,,Home Office
-UK Film Council,,"Department for Culture, Media and Sport"
-UK Hydrographic Office,,Ministry of Defence
-UK National Air Quality Archive,,"Department for Environment, Food and Rural Affairs"
-UK Trade and Investment,,
-UKBA,UK Border Agency,Home Office
-United Kingdom Atomic Energy Authority,,Department of Energy and Climate Change
-United Lincolnshire Hospitals NHS Trust,,National Health Service
-University College London Hospital NHS Foundation Trust,,National Health Service
-University Hospitals Coventry and Warwickshire NHS Trust,,National Health Service
-University Hospitals of Morecambe Bay NHS Foundation Trust ,,National Health Service
-Valuatin Tribunal Service,Valuation Tribunals,Department for Communities and Local Government
-Valuation Office Agency ,,Her Majesty's Revenue and Customs
-Vehicle and Operator Services Agency,,Department for Transport
-Vehicle Operator Services Agency (VOSA),Vehicle Operator Services Agency,Department for Transport
-Veterinary Laboratories Agency,,"Department for Environment, Food and Rural Affairs"
-Veterinary Laboratories Agency,,"Department for Environment, Food and Rural Affairs"
-Victoria And Albert Museum,Victoria and Albert Museum,"Department for Culture, Media and Sport"
-Victoria and Albert Museum,,"Department for Culture, Media and Sport"
-VOSA,Vehicle and Operator Services Agency,Department for Transport
-Wakefield District PCT,Wakefield District Primary Care Trust,National Health Service
-Walsall Hospitals NHS Trust,,National Health Service
-Warrington Primary Care Trust,,National Health Service
-Warwickshire PCT,Warwickshire Primary Care Trust,National Health Service
-West London Mental Health NHS Trust,,National Health Service
-West Norhamptonshire Development Corporation,West Northampton Development Corporation,Department for Communities and Local Government
-Western Sussex Hospitals NHS Trust,,National Health Service
-Whittington Hospital NHS Trust,,National Health Service
-Wolverhampton City PCT,Wolverhampton City Primary Care Trust,National Health Service
-Worcestershire Acute Hospitals NHS Trust,,National Health Service
-Worcestershire Mental Health Partnership NHS Trust,,National Health Service
-Worcestershire PCT,Worcestershire Primary Care Trust,National Health Service
-"Wrightington, Wigan & Leigh NHS Foundation Trust",,National Health Service
\ No newline at end of file
--- a/ckanext/dgu/scripts/publisher_migration.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,226 +0,0 @@
-from collections import defaultdict
-import socket
-import copy
-
-from xmlrpclib import ServerProxy, ProtocolError, ResponseError
-from nose.tools import assert_equal
-
-from common import ScriptError, remove_readonly_fields
-from ckanclient import CkanApiError
-
-from ckanext.importlib.spreadsheet_importer import CsvData
-from ckanext.dgu import schema
-
-log = __import__("logging").getLogger(__name__)
-
-mapped_attributes = {
- 'temporal_granularity': dict(zip(['years', 'quarters', 'months', 'weeks', 'days', 'hours', 'points'],
- ['year', 'quarter', 'month', 'week', 'day', 'hour', 'point'])),
-
- 'update_frequency': dict(zip(('annually', 'quarterly', 'monthly', 'never'),
- ('annual', 'quarterly', 'monthly', 'never'))), #'discontinued'
- }
-
-class PublisherMigration:
- '''Changes department/agency fields to published_by/_via'''
- def __init__(self, ckanclient,
- xmlrpc_domain, xmlrpc_username, xmlrpc_password,
- publisher_map_filepath,
- update_all,
- dry_run=False):
- self.ckanclient = ckanclient
- self.dry_run = dry_run
- self.xmlrpc = {'username':xmlrpc_username,
- 'password':xmlrpc_password,
- 'domain':xmlrpc_domain}
- self.publisher_map = self.read_publisher_map(publisher_map_filepath) \
- if publisher_map_filepath else {}
- self.update_all = update_all
- self.organisations = {}
-
- def read_publisher_map(self, publisher_map_filepath):
- logger = None
- publisher_map = {}
- data = CsvData(logger, filepath=publisher_map_filepath)
- header = data.get_row(0)
- assert_equal(header[:2], ['Agency text', 'Corrected name'])
- for row_index in range(data.get_num_rows())[1:]:
- row = data.get_row(row_index)
- if len(row) < 2:
- continue
- agency, publisher = row[:2]
- agency = agency.strip()
- publisher = publisher.strip()
- if agency and publisher:
- publisher_map[agency] = publisher
- return publisher_map
-
- def get_organisation(self, dept_or_agency):
- if not self.organisations.has_key(dept_or_agency):
- # check for name mapping
- mapped_publisher = self.publisher_map.get(dept_or_agency.strip())
- if mapped_publisher:
- log.info('Mapping %r to %r', dept_or_agency, mapped_publisher)
- dept_or_agency = mapped_publisher
-
- # try canonical name
- dept_or_agency = schema.canonise_organisation_name(dept_or_agency)
-
- # look up with Drupal
- if not hasattr(self, 'drupal'):
- domain = self.xmlrpc['domain']
- username = self.xmlrpc['username']
- password = self.xmlrpc['password']
- if username or password:
- server = '%s:%s@%s' % (username, password, domain)
- else:
- server = '%s' % domain
- self.xmlrpc_url = 'http://%s/services/xmlrpc' % server
- log.info('XMLRPC connection to %s', self.xmlrpc_url)
- self.drupal = ServerProxy(self.xmlrpc_url)
- try:
- org_id = self.drupal.organisation.match(dept_or_agency)
- except socket.error, e:
- raise ScriptError('Socket error connecting to %s', self.xmlrpc_url)
- except ProtocolError, e:
- raise ScriptError('XMLRPC error connecting to %s', self.xmlrpc_url)
- except ResponseError, e:
- raise ScriptError('XMLRPC response error connecting to %s for department: %r', self.xmlrpc_url, dept_or_agency)
- if org_id:
- try:
- org_name = self.drupal.organisation.one(org_id)
- except socket.error, e:
- raise ScriptError('Socket error connecting to %s', self.xmlrpc_url)
- except ProtocolError, e:
- raise ScriptError('XMLRPC error connecting to %s', self.xmlrpc_url)
- organisation = u'%s [%s]' % (org_name, org_id)
- log.info('Found organisation: %r', organisation)
- else:
- log.error('Could not find organisation: %s', dept_or_agency)
- organisation = ''
- self.organisations[dept_or_agency] = organisation
- return self.organisations[dept_or_agency]
-
- def run(self):
- pkgs_done = []
- pkgs_rejected = defaultdict(list) # reason: [pkgs]
- all_pkgs = self.ckanclient.package_register_get()
- log.info('Working on %i packages', len(all_pkgs))
- for pkg_ref in all_pkgs:
- log.info('Package: %s', pkg_ref)
- try:
- try:
- pkg = self.ckanclient.package_entity_get(pkg_ref)
- except CkanApiError, e:
- log.error('Could not get: %r' % e)
- pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
- continue
- pkg_before_changes = copy.deepcopy(pkg)
-
- # mapped attributes
- for attribute in mapped_attributes:
- orig_value = pkg['extras'].get(attribute)
- if not orig_value:
- continue
- mapped_value = mapped_attributes[attribute].get(orig_value)
- if not mapped_value:
- mapped_value = mapped_attributes[attribute].get(orig_value.lower().strip())
- if not mapped_value:
- if orig_value.lower() in mapped_attributes[attribute].values():
- mapped_value = orig_value.lower()
- if mapped_value and orig_value != mapped_value:
- pkg['extras'][attribute] = mapped_value
- log.info('%s: %r -> %r', \
- attribute, orig_value, mapped_value)
- else:
- log.warn('Invalid value for %r: %r', \
- attribute, orig_value)
-
- # create publisher fields
- if self.update_all or not pkg['extras'].get('published_by'):
- dept = pkg['extras'].get('department')
- agency = pkg['extras'].get('agency')
- if dept:
- pub_by = self.get_organisation(dept)
- pub_via = self.get_organisation(agency) if agency else ''
- else:
- pub_by = self.get_organisation(agency) if agency else ''
- pub_via = ''
- if not pub_by or pub_via:
- log.warn('No publisher for package: %s', pkg['name'])
- log.info('%s:\n %r/%r ->\n %r/%r', \
- pkg['name'], dept, agency, pub_by, pub_via)
- pkg['extras']['published_by'] = pub_by
- pkg['extras']['published_via'] = pub_via
-
- if pkg == pkg_before_changes:
- log.info('...package unchanged: %r' % pkg['name'])
- pkgs_rejected['Package unchanged'].append(pkg)
- continue
- if not self.dry_run:
- remove_readonly_fields(pkg)
- try:
- self.ckanclient.package_entity_put(pkg)
- except CkanApiError, e:
- log.error('Could not put: %r' % e)
- pkgs_rejected['Could not put package: %r' % e].append(pkg_ref)
- continue
- log.info('...done')
- pkgs_done.append(pkg)
- except ScriptError, e:
- log.error('Error during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Error: %r' % e].append(pkg_ref)
- continue
- except Exception, e:
- log.error('Uncaught exception during processing package %r: %r', \
- pkg_ref, e)
- pkgs_rejected['Exception: %r' % e].append(pkg_ref)
- raise
- log.info('-- Finished --')
- log.info('Processed %i packages', len(pkgs_done))
- rejected_pkgs = []
- for reason, pkgs in pkgs_rejected.items():
- rejected_pkgs.append('\n %i: %s' % (len(pkgs), reason))
- log.info('Rejected packages: %s', rejected_pkgs)
-
-import sys
-
-#from ckanext.api_command import ApiCommand
-from mass_changer_cmd import MassChangerCommand
-
-class Command(MassChangerCommand):
- def add_options(self):
- super(Command, self).add_options()
- self.parser.add_option("-D", "--xmlrpc-domain",
- dest="xmlrpc_domain",
- )
- self.parser.add_option("-U", "--xmlrpc-username",
- dest="xmlrpc_username",
- )
- self.parser.add_option("-P", "--xmlrpc-password",
- dest="xmlrpc_password",
- )
- self.parser.add_option("-m", "--publisher-map",
- dest="publisher_map_csv",
- )
- self.parser.add_option("--update-all",
- dest="update_all",
- )
-
- def command(self):
- super(Command, self).command()
-
- # now do command
- cmd = PublisherMigration(self.client,
- self.options.xmlrpc_domain,
- self.options.xmlrpc_username,
- self.options.xmlrpc_password,
- self.options.publisher_map_csv,
- self.options.update_all,
- dry_run=self.options.dry_run)
- cmd.run()
-
-def command():
- Command().command()
-
--- a/ckanext/dgu/scripts/remove_old_ons.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-# Run this in a shell
-
-dry_run = True
-key_date = datetime(2011, 5, 11)
-from collections import defaultdict
-from datetime import datetime
-pkg_status = defaultdict(list) # reason: [pkgs]
-pkgs = model.Session.query(model.Package)
-print 'Working with %i packages' % pkgs.count()
-count = 0
-for pkg in pkgs:
- count += 1
- if pkg.state != 'active':
- pkg_status['State is %s' % pkg.state].append(pkg.name)
- continue
- if pkg.extras.get('external_reference') != 'ONSHUB':
- pkg_status['Not ONS'].append(pkg.name)
- continue
- if pkg.revision.timestamp > key_date:
- pkg_status['After date'].append(pkg.name)
- continue
- pkg_status['Delete'].append(pkg.name)
- if not dry_run:
- rev = model.repo.new_revision()
- rev.author = 'okfn'
- rev.message = 'Deleting obsolete ONS packages'
- pkg.delete()
- model.repo.commit_and_remove()
-
-for reason, pkgs in pkg_status.items():
- print '\n %i: %s : %r' % (len(pkgs), reason, ' '.join(pkgs[:5]))
-
--- a/ckanext/dgu/scripts/temporal_dash.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-# Convert extra key "temporal_coverage_to" to "temporal_coverage-to"
-#
-# Paste this into a shell
-# sudo -u www-data paster --plugin=pylons shell /etc/ckan/dgu/dgu.ini
-
-dry_run = True
-from collections import defaultdict
-pkg_status = defaultdict(list) # reason: [pkgs]
-pkgs = model.Session.query(model.Package)
-count = 0
-rev = None
-def new_rev():
- global rev
- if not rev:
- rev = model.repo.new_revision()
- rev.author = 'okfn'
- rev.message = 'Correcting temporal coverage'
-
-def commit():
- global rev
- if rev:
- model.commit_and_remove()
- rev = None
-
-for pkg in pkgs:
- count += 1
- if pkg.state != 'active':
- pkg_status['State is %s' % pkg.state].append(pkg.name)
- continue
- pkg_changed = False
- for suffix in ('from', 'to'):
- if pkg.extras.has_key('temporal_coverage_%s' % suffix):
- pkg_changed = True
- new_value = pkg.extras.get('temporal_coverage-%s' % suffix) or \
- pkg.extras.get('temporal_coverage_%s' % suffix) or ''
- if not dry_run:
- new_rev()
- pkg.extras['temporal_coverage-%s' % suffix] = new_value
- del pkg.extras['temporal_coverage_%s' % suffix]
- if pkg_changed:
- pkg_status['changed'].append(pkg.name)
- commit()
-
-print 'Working with %i packages' % pkgs.count()
-for reason, pkgs in pkg_status.items():
- print '\n %i: %s : %r' % (len(pkgs), reason, ' '.join(pkgs[:5]))
-
--- a/ckanext/dgu/scripts/transfer_url.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-'''
-Takes a list of package names.
-For each package:
- * double-checks it has a URL but no resources
- * creates a new resource with download url according to the resource
-'''
-from mass_changer import *
-from common import ScriptError
-
-log = __import__("logging").getLogger(__name__)
-
-pkg_name_list = '''anti-social-behaviour-orders-1999-2007
-asylum-applications-jan-mar-2009
-control-of-immigration-quarterly-statistical-summary-united-kingdom-2009-october-december
-coroners-statistics-england-and-wales
-courts-statistics-user-survey-england-and-wales
-court-statistics-company-insolvency-and-bankruptcy-england-and-wales
-court-statistics-england-and-wales
-court-statistics-mortages-and-landlord-possession-england-and-wales
-crime-in-england-and-wales
-crime-statistics-local-reoffending-england-and-wales
-crime-statistics-prison-and-probation-england-and-wales
-crime-statistics-reoffending-of-adults-england-and-wales
-crime-statistics-reoffending-of-juvenilles-england-and-wales
-data_gov_uk-datasets
-digest-uk-energy-statistics-2008
-directgov-central-hottest-pages-monthly
-directgov-central-internal-search-terms-monthly
-directgov-section-visits-monthly
-electricity-consumption-2007
-electricity-gas-consumption-2007
-energy-consumption-uk-2008
-final-energy-consumption-2007
-foi-statistics-uk-central-government
-fuel-poverty-statistics-2007
-gas-consumption-2007
-gb-reported-bicycling-accidents
-gb-road-traffic-counts
-gb-traffic-matrix
-greenhouse-gas-emissions-2008
-high-level-indicators-energy-use-2006
-judicial-and-court-statistics-england-and-wales
-laboratory-tests-and-prices
-local-authority-carbon-dioxide-emissions-2007
-magistrates-courts-statistics-survey-england-and-wales
-monthly-energy-prices
-monthly-energy-trends
-ni_012_refused_and_deferred_houses_in_multiple_occupation_hmos_licence_applications_leading_to_immig
-ni_013_migrants_english_language_skills_and_knowledge
-ni_023_perceptions_that_people_in_the_area_treat_one_another_with_respect_and_consideration
-ni_024_satisfaction_with_the_way_the_police_and_local_council_dealt_with_anti-social_behaviour
-ni_025_satisfaction_of_different_groups_with_the_way_the_police_and_local_council_dealt_with_anti-so
-ni_026_specialist_support_to_victims_of_a_serious_sexual_offence
-ni_029_gun_crime_rate
-ni_031_re-offending_rate_of_registered_sex_offenders
-ni_032_repeat_incidents_of_domestic_violence
-ni_034_domestic_violence_-_murder
-ni_036_protection_against_terrorist_attack
-ni_038_drug_related_class_a_offending_rate
-ni_078_reduction_in_number_of_schools_where_fewer_than_30_of_pupils_achieve_5_or_more_a-_c_grades_at
-ni_101_looked_after_children_achieving_5_a-c_gcses_or_equivalent_at_key_stage_4_including_english_an
-ni_109_delivery_of_sure_start_childrens_centres
-ni_126_early_access_for_women_to_maternity_services
-ni_127_self_reported_experience_of_social_care_users
-ni_128_user_reported_measure_of_respect_and_dignity_in_their_treatment
-ni_181_time_taken_to_process_housing_benefit-council_tax_benefit_new_claims_and_change_events
-ni_184_food_establishments_in_the_area_which_are_broadly_compliant_with_food_hygiene_law
-ni_185_co2_reduction_from_local_authority_operations
-ni_190_achievement_in_meeting_standards_for_the_control_system_for_animal_health
-ni_194_air_quality_-_reduction_in_nox_and_primary_pm10_emissions_through_local_authorities_estate_an
-other-fuels-consumption-2006
-police-use-firearms-england-wales-2007-2008
-prison-end-of-custody-licence-releases-and-recalls-england-and-wales
-prison-population-england-and-wales
-probation-offender-management-caseload-statistics-england-and-wales
-probation-statistics-quarterly-brief-england-and-wales
-quality-indicators-energy-data-2007
-quarterly-energy-prices
-quarterly-energy-trends
-road-transport-energy-consumption-2007
-sentencing-statistics-england-and-wales
-statistics-terrorism-arrests-outcomes-2001-2008
-ukba-control-of-immigration-statistics-2008
-ukba-control-of-immigration-statistics-2008-supplementary-tables
-uk-energy-in-brief-2008
-uk-energy-sector-indicators-background-2008
-uk-energy-sector-indicators-key-supporting-2008
-uk-exportcontrollists
-uk-exportcontrol-sanctions
-uk-export-control-statistics
-uk-glossary-exportcontrol
-uk-ipo-offences
-weekly-fuel-prices
-'''.split()
-pkg_name_list = [name for name in pkg_name_list if name]
-
-class TransferUrl(object):
- def __init__(self, ckanclient, dry_run=False, force=False):
- '''
- Changes licenses of packages.
- @param ckanclient: instance of ckanclient to make the changes
- @param license_id: id of the license to change packages to
- @param force: do not stop if there is an error with one package
- '''
- self.ckanclient = ckanclient
- self.dry_run = dry_run
- self.force = force
-
- def transfer_url(self):
- instructions = [
- ChangeInstruction(
- [
- TransferUrlMatcher(),
- ],
- CreateResource(url='%(url)s'))
- ]
- self.mass_changer = MassChangerNamedPackages(self.ckanclient,
- instructions,
- dry_run=self.dry_run,
- force=self.force)
- self.mass_changer.pkg_name_list = pkg_name_list
- self.mass_changer.run()
-
-class TransferUrlMatcher(PackageMatcher):
- def match(self, pkg):
- if not (pkg['url'] or '').strip():
- log.warn('Ignoring package with no URL: %r', pkg['name'])
- return False
-## if not pkg['url'].lower().endswith('.pdf'):
-## log.warn('Ignoring package URL not ending in ".PDF": %r %r',
-## pkg['name'], pkg['url'])
-## return False
- if pkg['resources']:
- log.warn('Ignoring package with resources already: %r', pkg['name'])
- return False
- return True
--- a/ckanext/dgu/scripts/transfer_url_cmd.py Mon May 30 12:35:45 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-from ckanext.importlib.loader import ResourceSeries
-from ckanext.dgu.scripts.transfer_url import TransferUrl
-from ckanext.dgu.scripts.mass_changer_cmd import MassChangerCommand
-from ckanclient import CkanClient
-
-class TransferUrlCommand(MassChangerCommand):
- def command(self):
- super(TransferUrlCommand, self).command()
- if self.options.license_id is None:
- self.parser.error("Please specify a license ID")
- if len(self.args) != 1:
- self.parser.error("Command is required")
-
- client = CkanClient(base_location=self.options.api_url,
- api_key=self.options.api_key,
- http_user=self.options.username,
- http_pass=self.options.password)
- transfer_url = TransferUrl(client, dry_run=self.options.dry_run,
- force=self.options.force)
- transfer_url.transfer_url()
-
-def command():
- TransferUrlCommand().command()
-
--- a/setup.py Mon May 30 12:35:45 2011 +0100
+++ b/setup.py Mon May 30 16:56:26 2011 +0100
@@ -36,16 +36,17 @@
[console_scripts]
ons_loader = ckanext.dgu.ons:load
cospread_loader = ckanext.dgu.cospread:load
- change_licenses = ckanext.dgu.scripts.change_licenses_cmd:command
- transfer_url = ckanext.dgu.scripts.transfer_url_cmd:command
- ons_analysis = ckanext.dgu.scripts.ons_analysis_cmd:command
- ofsted_fix = ckanext.dgu.scripts.ofsted_fix_cmd:command
- publisher_migration = ckanext.dgu.scripts.publisher_migration:command
- metadata_v3_migration = ckanext.dgu.scripts.metadata_v3_migration:command
+ change_licenses = ckanext.dgu.bin.change_licenses_cmd:command
+ transfer_url = ckanext.dgu.bin.transfer_url_cmd:command
+ ons_analysis = ckanext.dgu.bin.ons_analysis_cmd:command
+ ofsted_fix = ckanext.dgu.bin.ofsted_fix_cmd:command
+ publisher_migration = ckanext.dgu.bin.publisher_migration:command
+ metadata_v3_migration = ckanext.dgu.bin.metadata_v3_migration:command
generate_test_organisations = ckanext.dgu.testtools.organisations:command
- ons_remove_resources = ckanext.dgu.scripts.ons_remove_resources:command
- ons_delete_resourceless_packages = ckanext.dgu.scripts.ons_delete_resourceless_packages:command
- dump_analysis = ckanext.dgu.scripts.dump_analysis:command
+ ons_remove_resources = ckanext.dgu.bin.ons_remove_resources:command
+ ons_delete_resourceless_packages = ckanext.dgu.bin.ons_delete_resourceless_packages:command
+ dump_analysis = ckanext.dgu.bin.dump_analysis:command
+ gov_daily = ckanext.dgu.bin.gov_daily:command
[ckan.forms]
package_gov3 = ckanext.dgu.forms.package_gov3:get_gov3_fieldset
Repository URL: https://bitbucket.org/okfn/ckanext-dgu/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list