[ckan-changes] commit/ckanext-harvest: pudo: reduce number of queries for harvest index to a less insane number. still heavy.
Bitbucket
commits-noreply at bitbucket.org
Mon Jun 13 15:41:34 UTC 2011
1 new changeset in ckanext-harvest:
http://bitbucket.org/okfn/ckanext-harvest/changeset/1aabe70226c9/
changeset: 1aabe70226c9
user: pudo
date: 2011-06-13 17:36:35
summary: reduce number of queries for harvest index to a less insane number. still heavy.
affected #: 2 files (295 bytes)
--- a/ckanext/harvest/lib/__init__.py Mon Jun 13 15:56:19 2011 +0100
+++ b/ckanext/harvest/lib/__init__.py Mon Jun 13 17:36:35 2011 +0200
@@ -19,12 +19,10 @@
log = logging.getLogger('ckanext')
-def _get_source_status(source):
+def _get_source_status(source, detailed=True):
out = dict()
-
- jobs = get_harvest_jobs(source=source)
-
- if not len(jobs):
+ job_count = HarvestJob.filter(source=source).count()
+ if not job_count:
out['msg'] = 'No jobs yet'
return out
out = {'next_harvest':'',
@@ -33,7 +31,6 @@
'last_harvest_errors':[],
'overall_statistics':{'added':0, 'errors':0},
'packages':[]}
-
# Get next scheduled job
next_job = HarvestJob.filter(source=source,status=u'New').first()
if next_job:
@@ -43,69 +40,69 @@
# Get the last finished job
last_job = HarvestJob.filter(source=source,status=u'Finished') \
- .order_by(HarvestJob.created.desc()).limit(1).first()
+ .order_by(HarvestJob.created.desc()).first()
- if last_job:
+ if last_job:
#TODO: Should we encode the dates as strings?
out['last_harvest_request'] = str(last_job.gather_finished)
+ #Get HarvestObjects from last job whit links to packages
+ if detailed:
+ last_objects = [obj for obj in last_job.objects if obj.package is not None]
- #Get HarvestObjects from last job whit links to packages
- last_objects = [obj for obj in last_job.objects if obj.package is not None]
+ if len(last_objects) == 0:
+ # No packages added or updated
+ out['last_harvest_statistics']['added'] = 0
+ out['last_harvest_statistics']['updated'] = 0
+ else:
+ # Check wether packages were added or updated
+ for last_object in last_objects:
+ # Check if the same package had been linked before
+ previous_objects = Session.query(HarvestObject) \
+ .filter(HarvestObject.package==last_object.package) \
+ .count()
- if len(last_objects) == 0:
- # No packages added or updated
- out['last_harvest_statistics']['added'] = 0
- out['last_harvest_statistics']['updated'] = 0
- else:
- # Check wether packages were added or updated
- for last_object in last_objects:
- # Check if the same package had been linked before
- previous_objects = Session.query(HarvestObject) \
- .filter(HarvestObject.package==last_object.package) \
- .all()
-
- if len(previous_objects) == 1:
- # It didn't previously exist, it has been added
- out['last_harvest_statistics']['added'] += 1
- else:
- # Pacakge already existed, but it has been updated
- out['last_harvest_statistics']['updated'] += 1
+ if previous_objects == 1:
+ # It didn't previously exist, it has been added
+ out['last_harvest_statistics']['added'] += 1
+ else:
+ # Pacakge already existed, but it has been updated
+ out['last_harvest_statistics']['updated'] += 1
# Last harvest errors
# We have the gathering errors in last_job.gather_errors, so let's also
# get also the object errors.
object_errors = Session.query(HarvestObjectError).join(HarvestObject) \
- .filter(HarvestObject.job==last_job).all()
+ .filter(HarvestObject.job==last_job)
out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \
- + len(object_errors)
- for gather_error in last_job.gather_errors:
- out['last_harvest_errors'].append(gather_error.message)
+ + object_errors.count()
+ if detailed:
+ for gather_error in last_job.gather_errors:
+ out['last_harvest_errors'].append(gather_error.message)
- for object_error in object_errors:
- msg = 'GUID %s: %s' % (object_error.object.guid,object_error.message)
- out['last_harvest_errors'].append(msg)
-
-
+ for object_error in object_errors:
+ msg = 'GUID %s: %s' % (object_error.object.guid, object_error.message)
+ out['last_harvest_errors'].append(msg)
# Overall statistics
packages = Session.query(distinct(HarvestObject.package_id),Package.name) \
.join(Package).join(HarvestJob).join(HarvestSource) \
- .filter(HarvestJob.source==source).all()
+ .filter(HarvestJob.source==source)
- out['overall_statistics']['added'] = len(packages)
- for package in packages:
- out['packages'].append(package.name)
+ out['overall_statistics']['added'] = packages.count()
+ if detailed:
+ for package in packages:
+ out['packages'].append(package.name)
gather_errors = Session.query(HarvestGatherError) \
.join(HarvestJob).join(HarvestSource) \
- .filter(HarvestJob.source==source).all()
+ .filter(HarvestJob.source==source).count()
object_errors = Session.query(HarvestObjectError) \
.join(HarvestObject).join(HarvestJob).join(HarvestSource) \
- .filter(HarvestJob.source==source).all()
- out['overall_statistics']['errors'] = len(gather_errors) + len(object_errors)
+ .filter(HarvestJob.source==source).count()
+ out['overall_statistics']['errors'] = gather_errors + object_errors
else:
out['last_harvest_request'] = 'Not yet harvested'
@@ -114,14 +111,14 @@
-def _source_as_dict(source):
+def _source_as_dict(source, detailed=True):
out = source.as_dict()
out['jobs'] = []
for job in source.jobs:
out['jobs'].append(job.as_dict())
- out['status'] = _get_source_status(source)
+ out['status'] = _get_source_status(source, detailed=detailed)
return out
@@ -213,7 +210,7 @@
sources = HarvestSource.filter(**kwds) \
.order_by(HarvestSource.created.desc()) \
.all()
- return [_source_as_dict(source) for source in sources]
+ return [_source_as_dict(source, detailed=False) for source in sources]
def create_harvest_source(data_dict):
--- a/ckanext/harvest/model/__init__.py Mon Jun 13 15:56:19 2011 +0100
+++ b/ckanext/harvest/model/__init__.py Mon Jun 13 17:36:35 2011 +0200
@@ -164,6 +164,7 @@
properties={
'jobs': relation(
HarvestJob,
+ lazy=True,
backref=u'source',
order_by=harvest_job_table.c.created,
),
@@ -181,10 +182,12 @@
properties={
'package':relation(
Package,
+ lazy=True,
backref='harvest_objects',
),
'job': relation(
HarvestJob,
+ lazy=True,
backref=u'objects',
),
},
Repository URL: https://bitbucket.org/okfn/ckanext-harvest/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list