[ckan-changes] commit/ckanext-pdeu: 3 new changesets

Bitbucket commits-noreply at bitbucket.org
Thu Jun 9 09:35:30 UTC 2011


3 new changesets in ckanext-pdeu:

http://bitbucket.org/okfn/ckanext-pdeu/changeset/9283bb6bbeac/
changeset:   9283bb6bbeac
user:        pudo
date:        2011-06-08 10:24:10
summary:     basic RDFa markup in package read view
affected #:  3 files (896 bytes)

--- a/theme/templates/_util.html	Wed Jun 08 17:43:19 2011 +0100
+++ b/theme/templates/_util.html	Wed Jun 08 10:24:10 2011 +0200
@@ -26,7 +26,7 @@
   <!--! List of tags: pass in a collection of tags and this renders the standard
         tag listing --><span py:def="tag_list(tags)" class="tags clearfix">
-    <span py:for="tag in tags">
+    <span py:for="tag in tags" property="dc:keyword">
       ${h.link_to(tag.name, h.url_for(controller='tag', action='read',
       id=tag.name))},
     </span>


--- a/theme/templates/package/read.html	Wed Jun 08 17:43:19 2011 +0100
+++ b/theme/templates/package/read.html	Wed Jun 08 10:24:10 2011 +0200
@@ -1,11 +1,19 @@
 <html xmlns:py="http://genshi.edgewall.org/"
   xmlns:i18n="http://genshi.edgewall.org/i18n"
   xmlns:xi="http://www.w3.org/2001/XInclude"
+  xmlns:foaf="http://xmlns.com/foaf/0.1/"
+  xmlns:owl="http://www.w3.org/2002/07/owl#"
+  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+  xmlns:dc="http://purl.org/dc/terms/"
+  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+  xmlns:dcat="http://www.w3.org/ns/dcat#"
+  typeof="dcat:Dataset"
+  about=""
   py:strip=""><py:def function="page_title">${c.pkg.title or c.pkg.name} - Data Packages</py:def>
   
-  <py:def function="page_heading">${c.pkg.title}</py:def>
+  <py:def function="page_heading" property="dc:title">${c.pkg.title}</py:def><py:match path="primarysidebar">
@@ -21,15 +29,15 @@
       <ul class="property-list"><li py:if="c.pkg.url"><h3>Source</h3>
-          ${c.pkg_url_link}
+          <span property="foaf:homepage">${c.pkg_url_link}</span></li><li py:if="c.pkg_author_link"><h3>Author</h3>
-          ${c.pkg_author_link}
+          <span property="dc:creator">${c.pkg_author_link}</span></li><li py:if="c.pkg_maintainer_link"><h3>Maintainer</h3>
-          ${c.pkg_maintainer_link}
+          <span property="dc:contributor">${c.pkg_maintainer_link}</span></li><li py:if="c.pkg.version"><h3>Version</h3>
@@ -88,10 +96,10 @@
       <p py:if="c.pkg.license_id">
         License:
         <py:choose test="">
-          <strong py:when="c.pkg.license and c.pkg.license.url"><br /><a
-              href="${c.pkg.license.url}">${c.pkg.license.title.split('::')[-1]}</a></strong>
-          <strong py:when="c.pkg.license"><br />${c.pkg.license.title}</strong>
-          <strong py:when="c.pkg.license_id"><br />${c.pkg.license_id}</strong>
+          <strong py:when="c.pkg.license and c.pkg.license.url"><a
+              href="${c.pkg.license.url}" rel="dc:rights">${c.pkg.license.title.split('::')[-1]}</a></strong>
+          <strong py:when="c.pkg.license" property="dc:rights">${c.pkg.license.title}</strong>
+          <strong py:when="c.pkg.license_id" property="dc:rights">${c.pkg.license_id}</strong></py:choose></p><p class="okd">
@@ -122,12 +130,11 @@
     <xi:include href="read_core.html" /></div>
 
-  <py:if test="config.get('rdf_packages')">
-    <py:def function="optional_head">
-      <link rel="alternate" type="application/rdf+xml" title="RDF/XML" href="${config['rdf_packages'] + '/' + c.pkg.id + '.rdf' }" />
-      <link rel="alternate" type="application/turtle" title="RDF/Turtle" href="${config['rdf_packages'] + '/' + c.pkg.id + '.ttl' }" />
-    </py:def>
-  </py:if>
+  <py:def function="optional_head">
+    <link rel="alternate" type="application/rdf+xml" title="RDF/XML"
+    href="${url(controller='ckanext.rdf.controllers:DCatApiController',
+    action='show', id=c.pkg.name)}" />
+  </py:def><py:def function="optional_feed"><link rel="alternate" type="application/atom+xml" title="Package History"


--- a/theme/templates/package/read_core.html	Wed Jun 08 17:43:19 2011 +0100
+++ b/theme/templates/package/read_core.html	Wed Jun 08 10:24:10 2011 +0200
@@ -20,18 +20,20 @@
             <th>Hash</th></tr><py:for each="res in c.pkg.resources">
-          <tr>
+          <tr rel="dcat:distribution" resource="_:res${res.id}"
+            typeof="dcat:Distribution"><td><py:choose test=""><py:when test="res.description">
-                      <a href="${res.url}" target="_blank">${res.description}</a>  
+                    <a href="${res.url}" rel="dcat:accessURL" target="_blank"><span
+                        property="rdfs:label">${res.description}</span></a></py:when><py:otherwise test="">
-                      <a href="${res.url}" target="_blank">Download <em>(no description)</em></a>  
+                      <a href="${res.url}" rel="dcat:accessURL" target="_blank">Download <em>(no description)</em></a></py:otherwise></py:choose></td>
-              <td>${res.format}</td>
+              <td property="dc:format">${res.format}</td><td>${res.hash}</td></tr></py:for>
@@ -66,9 +68,10 @@
         </tr></thead><tbody>
-        <tr py:for="key, value in c.pkg_extras">
-          <td class="package-label">${_(key)}</td>
-          <td class="package-details">${value}</td>
+        <tr py:for="i, (key, value) in enumerate(c.pkg_extras)"
+          rel="dc:relation" resource="_:extra${i}">
+          <td class="package-label" property="rdfs:label">${_(key)}</td>
+          <td class="package-details" property="rdf:value">${value}</td></tr></tbody><caption py:if="h.am_authorized(c, actions.EDIT, c.pkg)">
@@ -96,17 +99,21 @@
                     ${h.url_for(controller='api', register='package', action='show', id=c.pkg.name)}</a></py:if></code>
-            <py:if test="config.get('rdf_packages')">
-                <h5>RDF</h5>
-                <code><a href="${config.get('rdf_packages') + '/' + c.pkg.id + '.rdf'}">RDF/XML</a></code>
-                <code><a href="${config.get('rdf_packages') + '/' + c.pkg.id + '.ttl'}">Turtle</a></code>
-                <code><a href="${config.get('rdf_packages') + '/' + c.pkg.id + '.nt'}">N-Triples</a></code>
-            </py:if>
+
+              <h5>RDF</h5>
+              <code><a rel="rdfs:seeAlso"
+                  href="${url(controller='ckanext.rdf.controllers:DCatApiController',
+                  action='show', id=c.pkg.name)}">${url(controller='ckanext.rdf.controllers:DCatApiController', action='show', id=c.pkg.name)}</a></code>
+            <p>There is also a SPARQL endpoint to query this registry at:</p>
+              <code><a
+                  href="${url(controller='ckanext.rdf.controllers:DCatApiController',
+                  action='sparql')}">${url(controller='ckanext.rdf.controllers:DCatApiController',
+                  action='sparql')}</a></code>
+            <h5>Information about this package:</h5><p>
                 The information on this page and the downloads / resources are also available using the 
                 <a href="http://blog.okfn.org/2010/02/23/introducing-datapkg/">datapkg command line utility</a>.
             </p>
-            <h5>Information about this package:</h5><code>$ datapkg info ckan://${c.pkg.name}</code><h5>Download:</h5><code>$ datapkg download ckan://${c.pkg.name} .</code>            


http://bitbucket.org/okfn/ckanext-pdeu/changeset/81f42605f606/
changeset:   81f42605f606
user:        pudo
date:        2011-06-08 19:04:57
summary:     harvester for digitaliser.dk, non-functional
affected #:  3 files (4.3 KB)

--- a/ckanext/pdeu/harvesters.py	Wed Jun 08 10:24:10 2011 +0200
+++ b/ckanext/pdeu/harvesters.py	Wed Jun 08 19:04:57 2011 +0200
@@ -612,7 +612,110 @@
             log.exception(e)
             self._save_object_error('%r' % e, harvest_object, 'Import')
 
+from lxml import etree
+class DigitaliserDkHarvester(HarvesterBase):
+    API_ENDPOINT = "http://api.digitaliser.dk/rest/"
 
+    def info(self):
+        return {
+            'name': 'digitaliser_dk',
+            'title': 'Digitaliser.dk',
+            'description': 'Danish government data and document repository.',
+            'form_config_interface':'Text'
+        }
 
+    def gather_stage(self, harvest_job):
+        log.debug('In Digitaliser.dk gather_stage')
 
+        firstResult = 0
+        maxResults = 1000
+        ids = []
+        while True:
+            req = 'resources/search?query=&firstResult=%s&maxResults=%s' % \
+                 (firstResult, maxResults)
+            doc = etree.parse(self.API_ENDPOINT + req)
+            for handle in doc.findall("/ResourceHandle"):
+                link = handle.get('handleReference')
+                id = sha1(link).hexdigest()
+                print link
+                obj = HarvestObject(guid=id, job=harvest_job, content=link)
+                obj.save()
+                ids.append(obj.id)
+            firstResult += maxResults
+            if firstResult > int(doc.getroot().get('totalResults')):
+                break
+        return ids
 
+    def fetch_stage(self, harvest_object):
+        doc = etree.parse(harvest_object.content)
+        category = doc.findtext('//ResourceCategoryHandle/TitleText')
+        if category != "Datakilde":
+            return
+        package_dict = {'extras': {}, 'resources': [], 'tags': []}
+        package_dict['title'] = doc.findtext('/TitleText')
+        package_dict['notes'] = doc.findtext('/BodyText')
+        package_dict['author'] = doc.findtext('/ResourceOwnerGroupHandle/TitleText')
+        package_dict['extras']['harvest_dataset_url'] = harvest_object.content
+
+        package_dict['metadata_created'] = doc.findtext('/CreatedDateTime')
+        package_dict['metadata_modified'] = doc.find('/PublishedState').get('publishedDateTime')
+        
+        responsible = doc.findtext('/ResponsibleReference')
+        res_doc = etree.parse(responsible)
+        package_dict['maintainer'] = res_doc.findtext('/ns2:PersonGivenName') + \
+            " " + res_doc.findtext('/ns2:PersonSurnameName')
+
+        package_dict['extras']['categories'] = []
+        for tax_handle in doc.findall('//TaxonomyNodeHandle'):
+            package_dict['extras']['categories'].append(tax_handle.findtext('TitleText'))
+        
+        for tag_handle in doc.findall('//TagHandle'):
+            package_dict['tags'].append(tag_handle.findtext('LabelText'))
+        
+        ref_handle = doc.find('//ReferenceHandle')
+        if ref_handle: 
+            ref_doc = etree.parse(ref_handle.get('handleReference'))
+            package_dict['url'] = ref_doc.getroot().get('url')
+
+        for artefact in doc.find('//ArtefactHandle'):
+            art_doc = etree.parse(artefact.get('handleReference'))
+            package_dict['resources'].append({
+                'url': art_doc.getroot().get('url'),
+                'format': '',
+                'description': artefact.findtext('TitleText')
+                })
+        
+        harvest_object.content = json.dumps(package_dict)
+        harvest_object.save()
+        return True
+
+    def import_stage(self,harvest_object):
+        if not harvest_object:
+            log.error('No harvest object received')
+            return False
+
+        if harvest_object.content is None:
+            self._save_object_error('Empty content for object %s' % harvest_object.id,harvest_object,'Import')
+            return False
+
+        try:
+            package_dict = json.loads(harvest_object.content)
+            package_dict['id'] = harvest_object.guid
+            package_dict['name'] = self._gen_new_name(package_dict['title'])
+
+            # Common extras
+            package_dict['extras']['harvest_catalogue_name'] = u'Digitaliser.dk'
+            package_dict['extras']['harvest_catalogue_url'] = u'http://digitaliser.dk'
+            package_dict['extras']['eu_country'] = u'DK'
+            package_dict['extras']['eu_nuts1'] = u'DK0'
+
+            return self._create_or_update_package(package_dict, harvest_object)
+        except Exception, e:
+            log.exception(e)
+            self._save_object_error('%r' % e, harvest_object, 'Import')
+
+
+
+
+
+


--- a/setup.py	Wed Jun 08 10:24:10 2011 +0200
+++ b/setup.py	Wed Jun 08 19:04:57 2011 +0200
@@ -33,5 +33,6 @@
 	data_london_gov_uk_harvester=ckanext.pdeu.harvesters:DataLondonGovUkHarvester
 	data_wien_gv_at_harvester=ckanext.pdeu.harvesters:DataWienGvAtHarvester
 	opendata_paris_fr_harvester=ckanext.pdeu.harvesters:OpendataParisFrHarvester
+	digitaliser_dk_harvester=ckanext.pdeu.harvesters:DigitaliserDkHarvester
 	""",
 )


--- a/theme/templates/layout_base.html	Wed Jun 08 10:24:10 2011 +0200
+++ b/theme/templates/layout_base.html	Wed Jun 08 19:04:57 2011 +0200
@@ -60,7 +60,7 @@
           </span></div><a href="${url('home')}">
-          <img src="${g.site_logo}" alt="${g.site_title} Logo" title="${g.site_title} Logo" id="logo" />
+          <img width="32" height="32" src="${g.site_logo}" alt="${g.site_title} Logo" title="${g.site_title} Logo" id="logo" /></a><div class="menu"><form action="${url(controller='package', action='search')}" method="GET">


http://bitbucket.org/okfn/ckanext-pdeu/changeset/f0087f88c6b5/
changeset:   f0087f88c6b5
user:        pudo
date:        2011-06-09 11:35:09
summary:     fix digitaliser.dk harvester by namespacing all XML nodes
affected #:  1 file (394 bytes)

--- a/ckanext/pdeu/harvesters.py	Wed Jun 08 19:04:57 2011 +0200
+++ b/ckanext/pdeu/harvesters.py	Thu Jun 09 11:35:09 2011 +0200
@@ -615,6 +615,8 @@
 from lxml import etree
 class DigitaliserDkHarvester(HarvesterBase):
     API_ENDPOINT = "http://api.digitaliser.dk/rest/"
+    NS = "{urn:oio:digitaliserdk:rest:1.0}"
+    PSN = "{http://rep.oio.dk/ebxml/xml/schemas/dkcc/2003/02/13/}"
 
     def info(self):
         return {
@@ -634,10 +636,9 @@
             req = 'resources/search?query=&firstResult=%s&maxResults=%s' % \
                  (firstResult, maxResults)
             doc = etree.parse(self.API_ENDPOINT + req)
-            for handle in doc.findall("/ResourceHandle"):
+            for handle in doc.findall(self.NS + "ResourceHandle"):
                 link = handle.get('handleReference')
                 id = sha1(link).hexdigest()
-                print link
                 obj = HarvestObject(guid=id, job=harvest_job, content=link)
                 obj.save()
                 ids.append(obj.id)
@@ -648,43 +649,46 @@
 
     def fetch_stage(self, harvest_object):
         doc = etree.parse(harvest_object.content)
-        category = doc.findtext('//ResourceCategoryHandle/TitleText')
+        category = doc.findtext('//' + self.NS + 'ResourceCategoryHandle/' + self.NS + 'TitleText')
         if category != "Datakilde":
             return
         package_dict = {'extras': {}, 'resources': [], 'tags': []}
-        package_dict['title'] = doc.findtext('/TitleText')
-        package_dict['notes'] = doc.findtext('/BodyText')
-        package_dict['author'] = doc.findtext('/ResourceOwnerGroupHandle/TitleText')
+        package_dict['title'] = doc.findtext(self.NS + 'TitleText')
+        package_dict['notes'] = doc.findtext(self.NS + 'BodyText')
+        package_dict['author'] = doc.findtext(self.NS + \
+                'ResourceOwnerGroupHandle/' + self.NS + 'TitleText')
         package_dict['extras']['harvest_dataset_url'] = harvest_object.content
 
-        package_dict['metadata_created'] = doc.findtext('/CreatedDateTime')
-        package_dict['metadata_modified'] = doc.find('/PublishedState').get('publishedDateTime')
+        package_dict['metadata_created'] = doc.findtext(self.NS + 'CreatedDateTime')
+        package_dict['metadata_modified'] = doc.find(self.NS + 'PublishedState').get('publishedDateTime')
         
-        responsible = doc.findtext('/ResponsibleReference')
+        responsible = doc.findtext(self.NS + 'ResponsibleReference')
         res_doc = etree.parse(responsible)
-        package_dict['maintainer'] = res_doc.findtext('/ns2:PersonGivenName') + \
-            " " + res_doc.findtext('/ns2:PersonSurnameName')
+        package_dict['maintainer'] = res_doc.findtext('//' + self.PSN + 'PersonGivenName') + \
+            " " + res_doc.findtext('//' + self.PSN + 'PersonSurnameName')
 
         package_dict['extras']['categories'] = []
-        for tax_handle in doc.findall('//TaxonomyNodeHandle'):
-            package_dict['extras']['categories'].append(tax_handle.findtext('TitleText'))
+        for tax_handle in doc.findall('//' + self.NS + 'TaxonomyNodeHandle'):
+            package_dict['extras']['categories'].append(tax_handle.findtext(self.NS + 'TitleText'))
         
-        for tag_handle in doc.findall('//TagHandle'):
-            package_dict['tags'].append(tag_handle.findtext('LabelText'))
+        for tag_handle in doc.findall('//' + self.NS + 'TagHandle'):
+            package_dict['tags'].append(tag_handle.findtext(self.NS + 'LabelText'))
         
-        ref_handle = doc.find('//ReferenceHandle')
+        ref_handle = doc.find('//' + self.NS + 'ReferenceHandle')
         if ref_handle: 
             ref_doc = etree.parse(ref_handle.get('handleReference'))
             package_dict['url'] = ref_doc.getroot().get('url')
 
-        for artefact in doc.find('//ArtefactHandle'):
+        for artefact in doc.findall('//' + self.NS + 'ArtefactHandle'):
             art_doc = etree.parse(artefact.get('handleReference'))
             package_dict['resources'].append({
                 'url': art_doc.getroot().get('url'),
                 'format': '',
-                'description': artefact.findtext('TitleText')
+                'description': artefact.findtext(self.NS + 'TitleText')
                 })
         
+        #from pprint import pprint
+        #pprint(package_dict)
         harvest_object.content = json.dumps(package_dict)
         harvest_object.save()
         return True

Repository URL: https://bitbucket.org/okfn/ckanext-pdeu/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list