[ckan-changes] commit/ckanext-pdeu: 3 new changesets
Bitbucket
commits-noreply at bitbucket.org
Thu Jun 9 09:35:30 UTC 2011
3 new changesets in ckanext-pdeu:
http://bitbucket.org/okfn/ckanext-pdeu/changeset/9283bb6bbeac/
changeset: 9283bb6bbeac
user: pudo
date: 2011-06-08 10:24:10
summary: basic RDFa markup in package read view
affected #: 3 files (896 bytes)
--- a/theme/templates/_util.html Wed Jun 08 17:43:19 2011 +0100
+++ b/theme/templates/_util.html Wed Jun 08 10:24:10 2011 +0200
@@ -26,7 +26,7 @@
<!--! List of tags: pass in a collection of tags and this renders the standard
tag listing --><span py:def="tag_list(tags)" class="tags clearfix">
- <span py:for="tag in tags">
+ <span py:for="tag in tags" property="dc:keyword">
${h.link_to(tag.name, h.url_for(controller='tag', action='read',
id=tag.name))},
</span>
--- a/theme/templates/package/read.html Wed Jun 08 17:43:19 2011 +0100
+++ b/theme/templates/package/read.html Wed Jun 08 10:24:10 2011 +0200
@@ -1,11 +1,19 @@
<html xmlns:py="http://genshi.edgewall.org/"
xmlns:i18n="http://genshi.edgewall.org/i18n"
xmlns:xi="http://www.w3.org/2001/XInclude"
+ xmlns:foaf="http://xmlns.com/foaf/0.1/"
+ xmlns:owl="http://www.w3.org/2002/07/owl#"
+ xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+ xmlns:dc="http://purl.org/dc/terms/"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:dcat="http://www.w3.org/ns/dcat#"
+ typeof="dcat:Dataset"
+ about=""
py:strip=""><py:def function="page_title">${c.pkg.title or c.pkg.name} - Data Packages</py:def>
- <py:def function="page_heading">${c.pkg.title}</py:def>
+ <py:def function="page_heading" property="dc:title">${c.pkg.title}</py:def><py:match path="primarysidebar">
@@ -21,15 +29,15 @@
<ul class="property-list"><li py:if="c.pkg.url"><h3>Source</h3>
- ${c.pkg_url_link}
+ <span property="foaf:homepage">${c.pkg_url_link}</span></li><li py:if="c.pkg_author_link"><h3>Author</h3>
- ${c.pkg_author_link}
+ <span property="dc:creator">${c.pkg_author_link}</span></li><li py:if="c.pkg_maintainer_link"><h3>Maintainer</h3>
- ${c.pkg_maintainer_link}
+ <span property="dc:contributor">${c.pkg_maintainer_link}</span></li><li py:if="c.pkg.version"><h3>Version</h3>
@@ -88,10 +96,10 @@
<p py:if="c.pkg.license_id">
License:
<py:choose test="">
- <strong py:when="c.pkg.license and c.pkg.license.url"><br /><a
- href="${c.pkg.license.url}">${c.pkg.license.title.split('::')[-1]}</a></strong>
- <strong py:when="c.pkg.license"><br />${c.pkg.license.title}</strong>
- <strong py:when="c.pkg.license_id"><br />${c.pkg.license_id}</strong>
+ <strong py:when="c.pkg.license and c.pkg.license.url"><a
+ href="${c.pkg.license.url}" rel="dc:rights">${c.pkg.license.title.split('::')[-1]}</a></strong>
+ <strong py:when="c.pkg.license" property="dc:rights">${c.pkg.license.title}</strong>
+ <strong py:when="c.pkg.license_id" property="dc:rights">${c.pkg.license_id}</strong></py:choose></p><p class="okd">
@@ -122,12 +130,11 @@
<xi:include href="read_core.html" /></div>
- <py:if test="config.get('rdf_packages')">
- <py:def function="optional_head">
- <link rel="alternate" type="application/rdf+xml" title="RDF/XML" href="${config['rdf_packages'] + '/' + c.pkg.id + '.rdf' }" />
- <link rel="alternate" type="application/turtle" title="RDF/Turtle" href="${config['rdf_packages'] + '/' + c.pkg.id + '.ttl' }" />
- </py:def>
- </py:if>
+ <py:def function="optional_head">
+ <link rel="alternate" type="application/rdf+xml" title="RDF/XML"
+ href="${url(controller='ckanext.rdf.controllers:DCatApiController',
+ action='show', id=c.pkg.name)}" />
+ </py:def><py:def function="optional_feed"><link rel="alternate" type="application/atom+xml" title="Package History"
--- a/theme/templates/package/read_core.html Wed Jun 08 17:43:19 2011 +0100
+++ b/theme/templates/package/read_core.html Wed Jun 08 10:24:10 2011 +0200
@@ -20,18 +20,20 @@
<th>Hash</th></tr><py:for each="res in c.pkg.resources">
- <tr>
+ <tr rel="dcat:distribution" resource="_:res${res.id}"
+ typeof="dcat:Distribution"><td><py:choose test=""><py:when test="res.description">
- <a href="${res.url}" target="_blank">${res.description}</a>
+ <a href="${res.url}" rel="dcat:accessURL" target="_blank"><span
+ property="rdfs:label">${res.description}</span></a></py:when><py:otherwise test="">
- <a href="${res.url}" target="_blank">Download <em>(no description)</em></a>
+ <a href="${res.url}" rel="dcat:accessURL" target="_blank">Download <em>(no description)</em></a></py:otherwise></py:choose></td>
- <td>${res.format}</td>
+ <td property="dc:format">${res.format}</td><td>${res.hash}</td></tr></py:for>
@@ -66,9 +68,10 @@
</tr></thead><tbody>
- <tr py:for="key, value in c.pkg_extras">
- <td class="package-label">${_(key)}</td>
- <td class="package-details">${value}</td>
+ <tr py:for="i, (key, value) in enumerate(c.pkg_extras)"
+ rel="dc:relation" resource="_:extra${i}">
+ <td class="package-label" property="rdfs:label">${_(key)}</td>
+ <td class="package-details" property="rdf:value">${value}</td></tr></tbody><caption py:if="h.am_authorized(c, actions.EDIT, c.pkg)">
@@ -96,17 +99,21 @@
${h.url_for(controller='api', register='package', action='show', id=c.pkg.name)}</a></py:if></code>
- <py:if test="config.get('rdf_packages')">
- <h5>RDF</h5>
- <code><a href="${config.get('rdf_packages') + '/' + c.pkg.id + '.rdf'}">RDF/XML</a></code>
- <code><a href="${config.get('rdf_packages') + '/' + c.pkg.id + '.ttl'}">Turtle</a></code>
- <code><a href="${config.get('rdf_packages') + '/' + c.pkg.id + '.nt'}">N-Triples</a></code>
- </py:if>
+
+ <h5>RDF</h5>
+ <code><a rel="rdfs:seeAlso"
+ href="${url(controller='ckanext.rdf.controllers:DCatApiController',
+ action='show', id=c.pkg.name)}">${url(controller='ckanext.rdf.controllers:DCatApiController', action='show', id=c.pkg.name)}</a></code>
+ <p>There is also a SPARQL endpoint to query this registry at:</p>
+ <code><a
+ href="${url(controller='ckanext.rdf.controllers:DCatApiController',
+ action='sparql')}">${url(controller='ckanext.rdf.controllers:DCatApiController',
+ action='sparql')}</a></code>
+ <h5>Information about this package:</h5><p>
The information on this page and the downloads / resources are also available using the
<a href="http://blog.okfn.org/2010/02/23/introducing-datapkg/">datapkg command line utility</a>.
</p>
- <h5>Information about this package:</h5><code>$ datapkg info ckan://${c.pkg.name}</code><h5>Download:</h5><code>$ datapkg download ckan://${c.pkg.name} .</code>
http://bitbucket.org/okfn/ckanext-pdeu/changeset/81f42605f606/
changeset: 81f42605f606
user: pudo
date: 2011-06-08 19:04:57
summary: harvester for digitaliser.dk, non-functional
affected #: 3 files (4.3 KB)
--- a/ckanext/pdeu/harvesters.py Wed Jun 08 10:24:10 2011 +0200
+++ b/ckanext/pdeu/harvesters.py Wed Jun 08 19:04:57 2011 +0200
@@ -612,7 +612,110 @@
log.exception(e)
self._save_object_error('%r' % e, harvest_object, 'Import')
+from lxml import etree
+class DigitaliserDkHarvester(HarvesterBase):
+ API_ENDPOINT = "http://api.digitaliser.dk/rest/"
+ def info(self):
+ return {
+ 'name': 'digitaliser_dk',
+ 'title': 'Digitaliser.dk',
+ 'description': 'Danish government data and document repository.',
+ 'form_config_interface':'Text'
+ }
+ def gather_stage(self, harvest_job):
+ log.debug('In Digitaliser.dk gather_stage')
+ firstResult = 0
+ maxResults = 1000
+ ids = []
+ while True:
+ req = 'resources/search?query=&firstResult=%s&maxResults=%s' % \
+ (firstResult, maxResults)
+ doc = etree.parse(self.API_ENDPOINT + req)
+ for handle in doc.findall("/ResourceHandle"):
+ link = handle.get('handleReference')
+ id = sha1(link).hexdigest()
+ print link
+ obj = HarvestObject(guid=id, job=harvest_job, content=link)
+ obj.save()
+ ids.append(obj.id)
+ firstResult += maxResults
+ if firstResult > int(doc.getroot().get('totalResults')):
+ break
+ return ids
+ def fetch_stage(self, harvest_object):
+ doc = etree.parse(harvest_object.content)
+ category = doc.findtext('//ResourceCategoryHandle/TitleText')
+ if category != "Datakilde":
+ return
+ package_dict = {'extras': {}, 'resources': [], 'tags': []}
+ package_dict['title'] = doc.findtext('/TitleText')
+ package_dict['notes'] = doc.findtext('/BodyText')
+ package_dict['author'] = doc.findtext('/ResourceOwnerGroupHandle/TitleText')
+ package_dict['extras']['harvest_dataset_url'] = harvest_object.content
+
+ package_dict['metadata_created'] = doc.findtext('/CreatedDateTime')
+ package_dict['metadata_modified'] = doc.find('/PublishedState').get('publishedDateTime')
+
+ responsible = doc.findtext('/ResponsibleReference')
+ res_doc = etree.parse(responsible)
+ package_dict['maintainer'] = res_doc.findtext('/ns2:PersonGivenName') + \
+ " " + res_doc.findtext('/ns2:PersonSurnameName')
+
+ package_dict['extras']['categories'] = []
+ for tax_handle in doc.findall('//TaxonomyNodeHandle'):
+ package_dict['extras']['categories'].append(tax_handle.findtext('TitleText'))
+
+ for tag_handle in doc.findall('//TagHandle'):
+ package_dict['tags'].append(tag_handle.findtext('LabelText'))
+
+ ref_handle = doc.find('//ReferenceHandle')
+ if ref_handle:
+ ref_doc = etree.parse(ref_handle.get('handleReference'))
+ package_dict['url'] = ref_doc.getroot().get('url')
+
+ for artefact in doc.find('//ArtefactHandle'):
+ art_doc = etree.parse(artefact.get('handleReference'))
+ package_dict['resources'].append({
+ 'url': art_doc.getroot().get('url'),
+ 'format': '',
+ 'description': artefact.findtext('TitleText')
+ })
+
+ harvest_object.content = json.dumps(package_dict)
+ harvest_object.save()
+ return True
+
+ def import_stage(self,harvest_object):
+ if not harvest_object:
+ log.error('No harvest object received')
+ return False
+
+ if harvest_object.content is None:
+ self._save_object_error('Empty content for object %s' % harvest_object.id,harvest_object,'Import')
+ return False
+
+ try:
+ package_dict = json.loads(harvest_object.content)
+ package_dict['id'] = harvest_object.guid
+ package_dict['name'] = self._gen_new_name(package_dict['title'])
+
+ # Common extras
+ package_dict['extras']['harvest_catalogue_name'] = u'Digitaliser.dk'
+ package_dict['extras']['harvest_catalogue_url'] = u'http://digitaliser.dk'
+ package_dict['extras']['eu_country'] = u'DK'
+ package_dict['extras']['eu_nuts1'] = u'DK0'
+
+ return self._create_or_update_package(package_dict, harvest_object)
+ except Exception, e:
+ log.exception(e)
+ self._save_object_error('%r' % e, harvest_object, 'Import')
+
+
+
+
+
+
--- a/setup.py Wed Jun 08 10:24:10 2011 +0200
+++ b/setup.py Wed Jun 08 19:04:57 2011 +0200
@@ -33,5 +33,6 @@
data_london_gov_uk_harvester=ckanext.pdeu.harvesters:DataLondonGovUkHarvester
data_wien_gv_at_harvester=ckanext.pdeu.harvesters:DataWienGvAtHarvester
opendata_paris_fr_harvester=ckanext.pdeu.harvesters:OpendataParisFrHarvester
+ digitaliser_dk_harvester=ckanext.pdeu.harvesters:DigitaliserDkHarvester
""",
)
--- a/theme/templates/layout_base.html Wed Jun 08 10:24:10 2011 +0200
+++ b/theme/templates/layout_base.html Wed Jun 08 19:04:57 2011 +0200
@@ -60,7 +60,7 @@
</span></div><a href="${url('home')}">
- <img src="${g.site_logo}" alt="${g.site_title} Logo" title="${g.site_title} Logo" id="logo" />
+ <img width="32" height="32" src="${g.site_logo}" alt="${g.site_title} Logo" title="${g.site_title} Logo" id="logo" /></a><div class="menu"><form action="${url(controller='package', action='search')}" method="GET">
http://bitbucket.org/okfn/ckanext-pdeu/changeset/f0087f88c6b5/
changeset: f0087f88c6b5
user: pudo
date: 2011-06-09 11:35:09
summary: fix digitaliser.dk harvester by namespacing all XML nodes
affected #: 1 file (394 bytes)
--- a/ckanext/pdeu/harvesters.py Wed Jun 08 19:04:57 2011 +0200
+++ b/ckanext/pdeu/harvesters.py Thu Jun 09 11:35:09 2011 +0200
@@ -615,6 +615,8 @@
from lxml import etree
class DigitaliserDkHarvester(HarvesterBase):
API_ENDPOINT = "http://api.digitaliser.dk/rest/"
+ NS = "{urn:oio:digitaliserdk:rest:1.0}"
+ PSN = "{http://rep.oio.dk/ebxml/xml/schemas/dkcc/2003/02/13/}"
def info(self):
return {
@@ -634,10 +636,9 @@
req = 'resources/search?query=&firstResult=%s&maxResults=%s' % \
(firstResult, maxResults)
doc = etree.parse(self.API_ENDPOINT + req)
- for handle in doc.findall("/ResourceHandle"):
+ for handle in doc.findall(self.NS + "ResourceHandle"):
link = handle.get('handleReference')
id = sha1(link).hexdigest()
- print link
obj = HarvestObject(guid=id, job=harvest_job, content=link)
obj.save()
ids.append(obj.id)
@@ -648,43 +649,46 @@
def fetch_stage(self, harvest_object):
doc = etree.parse(harvest_object.content)
- category = doc.findtext('//ResourceCategoryHandle/TitleText')
+ category = doc.findtext('//' + self.NS + 'ResourceCategoryHandle/' + self.NS + 'TitleText')
if category != "Datakilde":
return
package_dict = {'extras': {}, 'resources': [], 'tags': []}
- package_dict['title'] = doc.findtext('/TitleText')
- package_dict['notes'] = doc.findtext('/BodyText')
- package_dict['author'] = doc.findtext('/ResourceOwnerGroupHandle/TitleText')
+ package_dict['title'] = doc.findtext(self.NS + 'TitleText')
+ package_dict['notes'] = doc.findtext(self.NS + 'BodyText')
+ package_dict['author'] = doc.findtext(self.NS + \
+ 'ResourceOwnerGroupHandle/' + self.NS + 'TitleText')
package_dict['extras']['harvest_dataset_url'] = harvest_object.content
- package_dict['metadata_created'] = doc.findtext('/CreatedDateTime')
- package_dict['metadata_modified'] = doc.find('/PublishedState').get('publishedDateTime')
+ package_dict['metadata_created'] = doc.findtext(self.NS + 'CreatedDateTime')
+ package_dict['metadata_modified'] = doc.find(self.NS + 'PublishedState').get('publishedDateTime')
- responsible = doc.findtext('/ResponsibleReference')
+ responsible = doc.findtext(self.NS + 'ResponsibleReference')
res_doc = etree.parse(responsible)
- package_dict['maintainer'] = res_doc.findtext('/ns2:PersonGivenName') + \
- " " + res_doc.findtext('/ns2:PersonSurnameName')
+ package_dict['maintainer'] = res_doc.findtext('//' + self.PSN + 'PersonGivenName') + \
+ " " + res_doc.findtext('//' + self.PSN + 'PersonSurnameName')
package_dict['extras']['categories'] = []
- for tax_handle in doc.findall('//TaxonomyNodeHandle'):
- package_dict['extras']['categories'].append(tax_handle.findtext('TitleText'))
+ for tax_handle in doc.findall('//' + self.NS + 'TaxonomyNodeHandle'):
+ package_dict['extras']['categories'].append(tax_handle.findtext(self.NS + 'TitleText'))
- for tag_handle in doc.findall('//TagHandle'):
- package_dict['tags'].append(tag_handle.findtext('LabelText'))
+ for tag_handle in doc.findall('//' + self.NS + 'TagHandle'):
+ package_dict['tags'].append(tag_handle.findtext(self.NS + 'LabelText'))
- ref_handle = doc.find('//ReferenceHandle')
+ ref_handle = doc.find('//' + self.NS + 'ReferenceHandle')
if ref_handle:
ref_doc = etree.parse(ref_handle.get('handleReference'))
package_dict['url'] = ref_doc.getroot().get('url')
- for artefact in doc.find('//ArtefactHandle'):
+ for artefact in doc.findall('//' + self.NS + 'ArtefactHandle'):
art_doc = etree.parse(artefact.get('handleReference'))
package_dict['resources'].append({
'url': art_doc.getroot().get('url'),
'format': '',
- 'description': artefact.findtext('TitleText')
+ 'description': artefact.findtext(self.NS + 'TitleText')
})
+ #from pprint import pprint
+ #pprint(package_dict)
harvest_object.content = json.dumps(package_dict)
harvest_object.save()
return True
Repository URL: https://bitbucket.org/okfn/ckanext-pdeu/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list