[ckan-changes] commit/ckanext-pdeu: 4 new changesets
Bitbucket
commits-noreply at bitbucket.org
Fri Jun 10 16:59:31 UTC 2011
4 new changesets in ckanext-pdeu:
http://bitbucket.org/okfn/ckanext-pdeu/changeset/d82865665d71/
changeset: d82865665d71
user: amercader
date: 2011-06-10 16:38:51
summary: Enhanced simbolization for the availability map. Colors and number of classes can be defined in the ini file. Popups show links to the country's packages
affected #: 7 files (10.5 KB)
--- a/ckanext/pdeu/controllers.py Thu Jun 09 22:44:44 2011 +0200
+++ b/ckanext/pdeu/controllers.py Fri Jun 10 15:38:51 2011 +0100
@@ -73,6 +73,10 @@
class MapController(BaseController):
def show(self):
+ c.startColor = config.get('pdeu.map.start_color','#F1EEF6')
+ c.endColor = config.get('pdeu.map.end_color','#045A8D')
+ c.groups = config.get('pdeu.map.groups',5)
+
template_file = os.path.join(get_root_dir(), 'ckanext', 'pdeu', 'theme', 'templates', 'home', 'map.html')
return render(template_file)
@@ -96,7 +100,7 @@
# Set the package count for each country
for ft in o['features']:
code = ft['properties']['NUTS']
- ft['properties']['datasets'] = values[code] if code in values else 0
+ ft['properties']['packages'] = values[code] if code in values else 0
response.content_type = 'application/json'
return json.dumps(o)
--- a/ckanext/pdeu/theme/public/css/map.css Thu Jun 09 22:44:44 2011 +0200
+++ b/ckanext/pdeu/theme/public/css/map.css Fri Jun 10 15:38:51 2011 +0100
@@ -2,6 +2,24 @@
background-color: #E9F4FF;
}
+.popup {
+ padding: 1em
+}
+
+.popup .name{
+ font-weight: bold;
+}
+
+.popup .local_name{
+ font-style: italic;
+ border-bottom: 1px solid #E3E3E3;
+}
+
+.popup .packages{
+ margin-top: 1em;
+}
+
+
.olPopupCloseBox {
background: url("/js/libs/openlayers/img/close.png") no-repeat scroll 0 0 transparent !important;
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/pdeu/theme/public/js/libs/jquery.color.js Fri Jun 10 15:38:51 2011 +0100
@@ -0,0 +1,243 @@
+/* Copyright (c) 2006 Mathias Bank (http://www.mathias-bank.de)
+ * Dual licensed under the MIT (http://www.opensource.org/licenses/mit-license.php)
+ * and GPL (http://www.opensource.org/licenses/gpl-license.php) licenses.
+ */
+
+
+/**
+ * Display a customized tooltip instead of the default one
+ * for every selected element. The tooltip behaviour mimics
+ * the default one, but lets you style the tooltip and
+ * specify the delay before displaying it.
+ *
+ * In addition, it displays the href value, if it is available.
+ *
+ * @example $('li').colorize('#ff1313', '#000000', [1,1,1],"color");
+ * @desc sets the css tag "color" for all li elements, changing the
+ color from #ff1313 to #000000 in alinear way
+ *
+ * @name ColorGradient
+ * @type jQuery
+ * @cat Plugins/ColorGradient
+ * @author Mathias Bank (http://www.mathias-bank.de)
+ */
+jQuery.extend({
+
+ hexDigits: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"],
+
+ /**
+ * generates a rgb value, using a hex value
+ */
+ hex2rgb: function(hex) {
+
+ var rgb = new Array();
+ try {
+ hex = this.checkHex(hex);
+ rgb[0]=parseInt(hex.substr(1, 2), 16);
+ rgb[1]=parseInt(hex.substr(3, 2), 16);
+ rgb[2]=parseInt(hex.substr(5, 2), 16);
+ return rgb;
+ } catch (e) {
+ throw e;
+ }
+ },
+
+ //generates the hex-digits for a color.
+ hex: function(x) {
+ return isNaN(x) ? "00" : this.hexDigits[(x - x % 16) / 16] + this.hexDigits[x % 16];
+ },
+
+
+ /**
+ * generates a hex value, using a rgb value
+ * @param array(red, greed, blue);
+ */
+ rgb2hex: function(rgb) {
+ try {
+ this.checkRGB(rgb);
+
+ return "#" + hex(rgb[0]) + hex(rgb[1]) + hex(rgb[2]);
+ } catch (e) {
+ throw e;
+ }
+ },
+
+
+ /**
+ * checks, if an array of three values is a valid rgb-array
+ */
+ checkRGB: function(rgb) {
+ if (rgb.length!=3) throw "this is not a valid rgb-array";
+
+ if (isNaN(rgb[0]) || isNaN(rgb[1]) || isNaN(rgb[2])) throw "this is not a valid rgb-array";
+
+ if (rgb[0]<0 || rgb[0]>255 || rgb[1]<0 || rgb[1]>255 || rgb[2]<0 || rgb[3]>255) throw "this is not a valid rgb-array";
+
+ return rgb;
+ },
+
+ /**
+ * checks, if a given number is a hexadezimal number
+ */
+ checkHex: function(hex) {
+ if (!hex || hex=="" || hex=="#") throw "No valid hexadecimal given.";
+
+ hex = hex.toUpperCase();
+
+ switch(hex.length) {
+ case 6:
+ hex = "#" + hex;
+ break;
+ case 7:
+ break;
+ case 3:
+ hex = "#" + hex;
+ break;
+ case 4:
+ hex = "#" + hex.substr(1, 1) + hex.substr(1, 1) + hex.substr(2, 1) + hex.substr(2, 1) + hex.substr(3, 1) + hex.substr(3, 1);
+ break;
+ }
+ if(hex.substr(0, 1) != "#" || !this.checkHexDigits(hex.substr(1))) {
+ throw "No valid hexadecimal given.";
+ }
+
+ return hex;
+ },
+
+ /**
+ * checks, if there is any unvalid digit for a hex number
+ */
+ checkHexDigits: function(s) {
+ var j, found;
+ for(var i = 0; i < s.length; i++) {
+ found = false;
+ for(j = 0; j < this.hexDigits.length; j++)
+ if(s.substr(i, 1) == this.hexDigits[j])
+ found = true;
+ if(!found)
+ return false;
+ }
+ return true;
+ },
+
+ /**
+ * calculates an array with hex values.
+ * @param startColor: starting color (hex-format or rgb)
+ * @param endColor: ending color (hex-format or rgb)
+ * @param options: defines, how the color should be generated. The options are defined
+ by an object with:
+ count: specifies, how many colors should be generated
+ type: array for each color. Speciefies, how the missing color should be calculated:
+ 1: linear
+ 2: trigonometrical
+ 3: accidentally
+ 4: ordered accident
+ */
+ calculateColor: function(startColor, endColor, options) {
+ if (!options || !options.type || !options.type[0] || !options.type[1] || !options.type[2] || !options.count)
+ options = this.colorGradientOptions;
+
+ var color = new Array();
+ try {
+ try {
+ var start = this.hex2rgb(startColor);
+ var end = this.hex2rgb(endColor);
+ } catch (e) {
+ //no hex-value => check if rgb
+ this.checkRGB(startColor);
+ var start = startColor;
+ this.checkRGB(endColor);
+ var end = endColor;
+ }
+
+ var rgb = new Array();
+ rgb[0] = this.calculateGradient(start[0], end[0], options.count, options.type[0]);
+ rgb[1] = this.calculateGradient(start[1], end[1], options.count, options.type[1]);
+ rgb[2] = this.calculateGradient(start[2], end[2], options.count, options.type[2]);
+
+ for(var i = 0; i < options.count; i++) {
+ color[i] = "#" + this.hex(rgb[0][i]) + this.hex(rgb[1][i]) + this.hex(rgb[2][i]);
+ }
+ } catch (e) {
+ throw e;
+ }
+ return color;
+ },
+
+ /**
+ * calculateGradient for a color
+ * @param startVal
+ * @param endVal
+ * @param count
+ * @param type: array for each color. Speciefies, how the missing color should be calculated:
+ 1: linear
+ 2: trigonometrical
+ 3: accidentally
+ 4: ordered accident
+ */
+ calculateGradient: function(startVal, endVal,count, type) {
+ var a = new Array();
+ if(!type || !count) {
+ return null;
+ } else if (1<count && count < 3) {
+ a[0] = startVal;
+ a[1] = endVal;
+ return a;
+ } else if (count==1) {
+ a[0] = endVal;
+ return a;
+ }
+
+ switch(type) {
+ case 1: //"linear"
+ var i;
+ for(i = 0; i < count; i++)
+ a[i] = Math.round(startVal + (endVal - startVal) * i / (count - 1));
+ break;
+
+ case 2: //trigonometrical
+ var i;
+ for(i = 0; i < count; i++)
+ a[i] = Math.round(startVal + (endVal - startVal) * ((Math.sin((-Math.PI / 2) + Math.PI * i / (count - 1)) + 1) / 2));
+ break;
+
+ case 3: //accident
+ var i;
+ for(i = 1; i < count - 1; i++)
+ a[i] = Math.round(startVal + (endVal - startVal) * Math.random());
+ a[0] = startVal;
+ a[count - 1] = endVal;
+ break;
+
+ case 4: //ordered accident
+ var i;
+ for(i = 1; i < count - 1; i++)
+ a[i] = Math.round(startVal + (endVal - startVal) * Math.random());
+ a[0] = startVal;
+ a[count - 1] = endVal;
+ if((typeof(a.sort) == "function") && (typeof(a.reverse) == "function"))
+ {
+ a.sort(this.cmp);
+ if(startVal > endVal)
+ a.reverse();
+ }
+ break;
+ }
+ return a;
+ },
+
+ //compares two values to sort
+ cmp: function(a, b) {
+ return a - b;
+ },
+
+ /**
+ *
+ */
+ colorGradientOptions: {
+ count: 5,
+ type: [1,1,1]
+
+ }
+});
+
--- a/ckanext/pdeu/theme/public/js/libs/openlayers/README.txt Thu Jun 09 22:44:44 2011 +0200
+++ b/ckanext/pdeu/theme/public/js/libs/openlayers/README.txt Fri Jun 10 15:38:51 2011 +0100
@@ -14,3 +14,7 @@
python build.py {path-to-ckan.cfg} {output-file}
+The theme used for the OpenLayers controls is the "dark" theme made available
+by Development Seed under the BSD License:
+
+https://github.com/developmentseed/openlayers_themes/blob/master/LICENSE.txt
--- a/ckanext/pdeu/theme/public/js/map.js Thu Jun 09 22:44:44 2011 +0200
+++ b/ckanext/pdeu/theme/public/js/map.js Fri Jun 10 15:38:51 2011 +0100
@@ -5,7 +5,7 @@
var selectControl = null;
var selectedFeature = null;
-
+
var guessBestAnchorPoint = function(geometry){
if (geometry.components.length == 1){
return geometry.getBounds().getCenterLonLat();
@@ -19,7 +19,7 @@
largest_component = geometry.components[i]
}
}
- return largest_component.getBounds().getCenterLonLat();
+ return largest_component.getBounds().getCenterLonLat();
}
}
@@ -30,7 +30,15 @@
var html = "<div class=\"popup\">";
html += "<div class=\"name\">" + feature.attributes.NAME +"</div>";
- html += "<div class=\"address\">" + feature.attributes.NAME_LOCAL+"</div>"
+ html += "<div class=\"local_name\">" + feature.attributes.NAME_LOCAL+"</div>"
+ html += "<div class=\"packages\">";
+ if (feature.attributes.packages){
+ html += "<a href=\"http://localhost:5000/package?extras_eu_country=" + feature.attributes.NUTS + "\">" +
+ feature.attributes.packages+" packages";
+ } else {
+ html += "No packages yet";
+ }
+ html += "</a></div>"
var popup = new OpenLayers.Popup.FramedCloud("Feature Info",
guessBestAnchorPoint(feature.geometry),
@@ -44,10 +52,10 @@
return false;
}
-
+
var onPopupClose = function(event){
avoidNextClick = true;
-
+
selectControl.unselect(selectedFeature);
selectedFeature = null;
}
@@ -57,59 +65,112 @@
event.feature.popup.destroy();
event.feature.popup = null;
}
-
- var getFeatureStyles = function(){
+ var getBreakPoints = function(groups){
+
+ var values = []
+ for (var i = 0; i < featuresLayer.features.length; i++){
+ ft = featuresLayer.features[i]
+ if (ft.attributes.packages != 0)
+ values.push(parseInt(ft.attributes.packages))
+ }
+ values.sort(function(a,b){return a -b})
+ var points = [];
+ var range = (values[values.length-1] - values[0]) / groups;
+ for (var i = 0; i < groups; i++){
+ if (i > 0)
+ points.push(values[0] + i*range)
+ }
+
+ return points;
+
+ }
+
+ var setupStyles = function(){
+ var config = CKAN.EuroMap.config;
+ var groups = 5;
+ var colors = $.calculateColor(config.startColor,config.endColor,config.groups,1);
+ var breakPoints = getBreakPoints(groups);
// Default properties for all rules
var defaultStyle = new OpenLayers.Style({
"cursor":"pointer",
- "fillColor":"#E6E6E6",
- "strokeColor":"#000000"
+ "strokeColor":"#000000",
+ "strokeWidth":"1"
});
var selectStyle = new OpenLayers.Style({
- "cursor":"pointer",
- "fillColor":"#CC0000",
- "strokeWidth":"1.5"
+ "fillColor":"#FFFFA3",
});
- defaultStyle.addRules([
+ // Create rules according to the actual values
+ var rules = []
+
+ // Countries with no packages
+ rules.push(
new OpenLayers.Rule({
filter: new OpenLayers.Filter.Comparison({
type: "==",
- property: "datasets",
+ property: "packages",
value: 0
}),
symbolizer: {
"fillColor":'#FFFFFF'
}
- }),
- new OpenLayers.Rule({
- elseFilter: true,
- symbolizer: {
- "fillColor": "#00FF00"
- }
- })
- ]);
+ }))
- styleMap = new OpenLayers.StyleMap({
+ var min, max;
+ for (var i = 0; i < breakPoints.length; i++) {
+ if (i < breakPoints.length -1){
+ min = (i == 0) ? 1 : breakPoints[i - 1];
+ max = breakPoints [i];
+
+ rules.push(
+ new OpenLayers.Rule({
+ filter: new OpenLayers.Filter.Logical({
+ type: "&&",
+ filters: [
+ new OpenLayers.Filter.Comparison({
+ type: "<=",
+ property: "packages",
+ value: max
+ }),
+ new OpenLayers.Filter.Comparison({
+ type: ">",
+ property: "packages",
+ value: min
+ })]
+ }),
+ symbolizer: {"fillColor": colors[i]}
+ }));
+ } else {
+ min = breakPoints[i]
+ rules.push(
+ new OpenLayers.Rule({
+ filter: new OpenLayers.Filter.Comparison({
+ type: ">",
+ property: "packages",
+ value: min
+ }),
+ symbolizer: {"fillColor": colors[i]}
+ }));
+ }
+ };
+
+
+ defaultStyle.addRules(rules);
+
+ styleMap = new OpenLayers.StyleMap({
"default":defaultStyle,
"select":selectStyle})
-
- return styleMap;
+
+ featuresLayer.styleMap = styleMap;
+ featuresLayer.redraw();
}
-
-
// Public
-
return {
map: null,
setup: function(){
- // Set element positions
- //$("#loading").css("left",$(window).width()/2 - $("#loading").width()/2);
-
-
// Set map div size
var w = $("#content").width()
$("#map").width((w < 600) ? w : 600);
@@ -119,25 +180,28 @@
// Create a new map
var map = new OpenLayers.Map("map" ,
{
- /*
+ /*
projection: new OpenLayers.Projection("EPSG:900913"),
-
displayProjection: new OpenLayers.Projection("EPSG:4326"),
units: "m",
- maxResolution: 156543.0339,
- maxExtent: new OpenLayers.Bounds(-20037508.34, -20037508.34,
- 20037508.34, 20037508.34),
-*/ maxExtent: new OpenLayers.Bounds(-33.32,26.72,47.02,72.23),
+ maxResolution: 156543.0339,
+ maxExtent: new OpenLayers.Bounds(-20037508.34, -20037508.34,
+ 20037508.34, 20037508.34),
+ */
+ maxExtent: new OpenLayers.Bounds(-33.32,26.72,47.02,72.23),
+ maxScale: 30000000,
+ minScale: 6000000,
+ numZoomLevels: 3,
fallThrough: true,
controls: [
- new OpenLayers.Control.Navigation()
+ new OpenLayers.Control.Navigation(),
+ new OpenLayers.Control.PanZoomBar()
],
theme:"/js/libs/openlayers/theme/default/style.css"
});
// Create layers to add
var layers = [
- //osm = new OpenLayers.Layer.OSM("Simple OSM Map"),
euro = new OpenLayers.Layer.Vector("Europa", {
strategies: [new OpenLayers.Strategy.Fixed()],
//projection: new OpenLayers.Projection("EPSG:900913"),
@@ -145,52 +209,37 @@
url: "/map/data.json",
format: new OpenLayers.Format.GeoJSON()
}),
- styleMap: getFeatureStyles(),
isBaseLayer: true
})
];
map.addLayers(layers);
-
+ featuresLayer = euro
+
// Create two selection controls,one for the hover/highlight and one
// for the click/popup
var hoverSelectControl = new OpenLayers.Control.SelectFeature(
[euro],
- {
- "hover": true,
- "multiple": false,
- "highlightOnly":true
- }
- );
+ {"hover": true,"multiple": false,"highlightOnly":true});
map.addControl(hoverSelectControl);
hoverSelectControl.activate();
+
selectControl = new OpenLayers.Control.SelectFeature(
[euro],
- {
- "hover": false,
- "multiple": false,
- }
- );
+ {"hover": false,"multiple": false});
map.addControl(selectControl);
selectControl.activate();
euro.events.register("featureselected",this,onFeatureSelect);
euro.events.register("featureunselected",this,onFeatureUnselect);
+ euro.events.register("featuresadded",this,setupStyles);
-
-
- map.setCenter(
- new OpenLayers.LonLat(8.98,48.74),4
- );
-
- //map.zoomToMaxExtent()
- //map.zoomToExtent(new OpenLayers.Bounds(-33.32,26.72,47.02,72.23),true);
-
+ map.setCenter(new OpenLayers.LonLat(8.98,48.74),3);
+
this.map = map;
}
}
-
}(jQuery);
OpenLayers.ImgPath = "/js/libs/openlayers/img/";
--- a/ckanext/pdeu/theme/templates/home/map.html Thu Jun 09 22:44:44 2011 +0200
+++ b/ckanext/pdeu/theme/templates/home/map.html Fri Jun 10 15:38:51 2011 +0100
@@ -8,17 +8,20 @@
<py:def function="body_class">hide-sidebar</py:def><py:def function="optional_head">
+ <link rel="stylesheet" href="${g.site_url}/css/map.css" />
+ </py:def>
+ <py:def function="optional_footer"><!-- Map -->
- <script type="text/javascript" src="${g.site_url}/js/libs/jquery-1.5.1.min.js"></script>
- <!--<script type="text/javascript" src="http://localhost/euro/lib/openlayers-2.10/OpenLayers.js"></script> -->
+ <script type="text/javascript" src="${g.site_url}/js/libs/jquery.color.js"></script><script type="text/javascript" src="${g.site_url}/js/libs/openlayers/OpenLayers_pdeu.js"></script><script type="text/javascript" src="${g.site_url}/js/map.js"></script>
- <link rel="stylesheet" href="${g.site_url}/css/map.css" />
+ <script type="text/javascript">
+ CKAN.EuroMap.config = {"startColor": "${c.startColor}","endColor": "${c.endColor}","groups": ${c.groups}}
+ </script></py:def><div py:match="content">
- <p i18n:msg="">Bla Bla</p><div id="map"></div>
--- a/ckanext/pdeu/theme/templates/layout_base.html Thu Jun 09 22:44:44 2011 +0200
+++ b/ckanext/pdeu/theme/templates/layout_base.html Fri Jun 10 15:38:51 2011 +0100
@@ -188,6 +188,10 @@
<!-- scripts concatenated and minified via ant build script--><script type="text/javascript" src="http://assets.okfn.org/ext/jquery.cookie/jquery.cookie.min.js"></script><script type="text/javascript" src="http://assets.okfn.org/ext/jquery.placeholder/jquery.placeholder.js"></script>
+ <py:if test="defined('optional_footer')">
+ ${optional_footer()}
+ </py:if>
+
<!--script type="text/javascript" src="${g.site_url}/scripts/application.js"></script--><!-- end scripts-->
http://bitbucket.org/okfn/ckanext-pdeu/changeset/3bc5bee4ffec/
changeset: 3bc5bee4ffec
user: amercader
date: 2011-06-10 17:45:20
summary: Clean up harvesters and make sure all use sha1 to generate the ids
affected #: 3 files (306 bytes)
--- a/ckanext/pdeu/harvesters/data_publica.py Fri Jun 10 15:38:51 2011 +0100
+++ b/ckanext/pdeu/harvesters/data_publica.py Fri Jun 10 16:45:20 2011 +0100
@@ -2,13 +2,10 @@
import urllib2, urllib
import string
from datetime import datetime
-
+from hashlib import sha1
import logging
-from ckan import model
-from ckan.model import Session
-from ckan.logic import ValidationError, NotFound
-
+from ckanext.harvest.model import HarvestObject
from ckanext.harvest.harvesters import HarvesterBase
from lxml import html
from cookielib import CookieJar
@@ -27,6 +24,8 @@
}
gathered_ids = []
+ object_ids = []
+ job = None
page = 1
def _gather_ids(self,url = None, jar= None):
@@ -45,10 +44,14 @@
id = href.split('/').pop()
if not id in self.gathered_ids:
log.debug('Got Id: %s' % id)
- #self.queue(DataPublicaDatasetCrawler, url=href)
+ obj = HarvestObject(guid=sha1(id).hexdigest(), job=self.job, content=id)
+ obj.save()
+
+ self.object_ids.append(obj.id)
+
new_ids.append(id)
- if len(new_ids) == 0: # or self.page == 2:
+ if len(new_ids) == 0 or self.page == 2:
return self.gathered_ids
else:
self.gathered_ids.extend(new_ids)
@@ -63,10 +66,9 @@
def gather_stage(self,harvest_job):
log.debug('In DataPublica gather_stage (%s)' % harvest_job.source.url)
-
+ self.job = harvest_job
remote_ids = self._gather_ids(self.INITIAL_INDEX)
- #remote_ids = ['20110524-36F426','20110524-10821AB','20110523-10DACE3']
-
+ return self.object_ids
return self._create_harvest_objects(remote_ids,harvest_job)
@@ -74,7 +76,7 @@
log.debug('In DataPublicaHarvester fetch_stage')
# Get URL
url = harvest_object.source.url.rstrip('/')
- url = url + '/en/data_set_module/' + harvest_object.guid
+ url = url + '/en/data_set_module/' + harvest_object.content
# Get contents
try:
--- a/ckanext/pdeu/harvesters/london.py Fri Jun 10 15:38:51 2011 +0100
+++ b/ckanext/pdeu/harvesters/london.py Fri Jun 10 16:45:20 2011 +0100
@@ -4,8 +4,8 @@
from datetime import datetime
from csv import DictReader
import logging
+from hashlib import sha1
-from ckan.logic.action.update import package_update_rest
from ckan.lib.helpers import json
from ckanext.harvest.model import HarvestObject
@@ -14,6 +14,7 @@
log = logging.getLogger(__name__)
class DataLondonGovUkHarvester(HarvesterBase):
+ CATALOGUE_URL = "http://data.london.gov.uk"
CATALOGUE_CSV_URL = "http://data.london.gov.uk/datafiles/datastore-catalogue.csv"
def info(self):
@@ -30,7 +31,7 @@
csv = DictReader(csvfh)
ids = []
for row in csv:
- id = row.get('DRUPAL_NODE')
+ id = sha1('%s/%s' % (self.CATALOGUE_URL,row.get('DRUPAL_NODE'))).hexdigest()
row = dict([(k, v.decode('latin-1')) for k, v in row.items()])
obj = HarvestObject(guid=id, job=harvest_job,
content=json.dumps(row))
--- a/ckanext/pdeu/harvesters/opengov_se.py Fri Jun 10 15:38:51 2011 +0100
+++ b/ckanext/pdeu/harvesters/opengov_se.py Fri Jun 10 16:45:20 2011 +0100
@@ -7,10 +7,12 @@
except ImportError:
from StringIO import StringIO
from lxml import html, etree
+from hashlib import sha1
from ckanext.rdf.consume import consume_one
from ckanext.rdf.vocab import Graph
from ckanext.harvest.harvesters import HarvesterBase
+from ckanext.harvest.model import HarvestObject
log = logging.getLogger(__name__)
@@ -29,17 +31,20 @@
log.debug('In OpenGovSeHarvester gahter_stage')
# Get feed contents
doc = etree.parse(self.INDEX_URL)
- remote_ids = []
+ ids = []
for id_element in doc.findall('//{%(ns)s}entry/{%(ns)s}id' % {'ns':self.ATOM_NS}):
- id = id_element.text.strip()
- log.debug('Got id: %s' % id)
- remote_ids.append(id)
+ link = id_element.text.strip()
+ log.debug('Got link: %s' % link)
+ id = sha1(link).hexdigest()
+ obj = HarvestObject(guid=id, job=harvest_job, content=link)
+ obj.save()
- return self._create_harvest_objects(remote_ids,harvest_job)
+ ids.append(obj.id)
+ return ids
def fetch_stage(self,harvest_object):
log.debug('In OpenGovSeHarvester fetch_stage')
- url = harvest_object.guid.strip('/') + '/rdf/'
+ url = harvest_object.content.strip('/') + '/rdf/'
try:
fh = urllib2.urlopen(url)
harvest_object.content = fh.read()
http://bitbucket.org/okfn/ckanext-pdeu/changeset/180d18cdb79a/
changeset: 180d18cdb79a
user: amercader
date: 2011-06-10 18:58:28
summary: Fix dataset_url in Data Publica harvester
affected #: 1 file (11 bytes)
--- a/ckanext/pdeu/harvesters/data_publica.py Fri Jun 10 16:45:20 2011 +0100
+++ b/ckanext/pdeu/harvesters/data_publica.py Fri Jun 10 17:58:28 2011 +0100
@@ -51,7 +51,7 @@
new_ids.append(id)
- if len(new_ids) == 0 or self.page == 2:
+ if len(new_ids) == 0: #or self.page == 2:
return self.gathered_ids
else:
self.gathered_ids.extend(new_ids)
@@ -103,7 +103,6 @@
package_dict = {}
extras_dict = {}
- #TODO: Avoid collisions?
package_dict['id'] = harvest_object.guid
doc = html.document_fromstring(harvest_object.content)
for field in doc.findall(".//div"):
@@ -179,11 +178,13 @@
'description':resource_descriptions[i]
})
+ base = doc.find('.//head/base')
+ dataset_url = base.get('href')
# Common extras
extras_dict['harvest_catalogue_name'] = u'Data Publica'
extras_dict['harvest_catalogue_url'] = u'http://www.data-publica.com'
- extras_dict['harvest_dataset_url'] = u'http://www.data-publica.com/en/data_set_module/%s' % harvest_object.guid
+ extras_dict['harvest_dataset_url'] = dataset_url
extras_dict['eu_country'] = u'FR'
package_dict['name'] = self._gen_new_name(package_dict['title'])
http://bitbucket.org/okfn/ckanext-pdeu/changeset/29eeb7c25152/
changeset: 29eeb7c25152
user: amercader
date: 2011-06-10 18:59:11
summary: Add harvester for Catalan catalogue. TODO: Languages get mixed
affected #: 3 files (3.6 KB)
--- a/ckanext/pdeu/harvesters/__init__.py Fri Jun 10 17:58:28 2011 +0100
+++ b/ckanext/pdeu/harvesters/__init__.py Fri Jun 10 17:59:11 2011 +0100
@@ -7,5 +7,6 @@
from paris import OpendataParisFrHarvester
from digitaliser_dk import DigitaliserDkHarvester
from piemonte import DatiPiemonteItHarvester
+from opendata_cat import OpenDataCatHarvester
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckanext/pdeu/harvesters/opendata_cat.py Fri Jun 10 17:59:11 2011 +0100
@@ -0,0 +1,101 @@
+#coding: utf-8
+import urllib2
+import logging
+
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+from lxml import html, etree
+from hashlib import sha1
+
+from ckanext.rdf.consume import consume_one
+from ckanext.rdf.vocab import Graph
+from ckanext.harvest.harvesters import HarvesterBase
+from ckanext.harvest.model import HarvestObject
+
+
+
+log = logging.getLogger(__name__)
+
+class OpenDataCatHarvester(HarvesterBase):
+ INDEX_URL = "http://dadesobertes.gencat.cat/recursos/datasets/cataleg_ca.xml"
+ RDF_URL = "http://dadesobertes.gencat.cat/recursos/datasets/%s.rdf"
+ DATASET_URL = "http://dadesobertes.gencat.cat/ca/dades-obertes/%s.html"
+
+ def info(self):
+ return {
+ 'name': 'opendata_cat',
+ 'title': 'Catalan Government catalogue',
+ 'description': 'Harvester for the Catalan Government Catalogue (http://opendata.gencat.cat)'
+ }
+
+ def gather_stage(self,harvest_job):
+ log.debug('In OpenDataCatHarvester gahter_stage')
+ # Get feed contents
+ doc = etree.parse(self.INDEX_URL)
+ ids = []
+ for link_element in doc.findall('//item/link'):
+ link = link_element.text.strip()
+ id = sha1(link).hexdigest()
+ obj = HarvestObject(guid=id, job=harvest_job, content=link)
+ obj.save()
+
+ ids.append(obj.id)
+ return ids
+
+ def fetch_stage(self,harvest_object):
+ log.debug('In OpenDataCatHarvester fetch_stage')
+
+ identifier = harvest_object.content.split('/').pop().split('.')[0]
+ url = self.RDF_URL % identifier
+ try:
+ fh = urllib2.urlopen(url)
+ harvest_object.content = fh.read().decode('iso-8859-1')
+ harvest_object.save()
+ fh.close()
+ return True
+ except Exception, e:
+
+ import pdb; pdb.set_trace()
+ log.exception(e)
+ self._save_object_error('Unable to get content for dataset: %s: %r' % \
+ (url, e), harvest_object)
+
+ def import_stage(self,harvest_object):
+ log.debug('In OpenDataCatHarvester import_stage')
+ if not harvest_object:
+ log.error('No harvest object received')
+ return False
+
+ if harvest_object.content is None:
+ self._save_object_error('Empty content for object %s' % harvest_object.id,harvest_object,'Import')
+ return False
+
+ try:
+ graph = Graph()
+ graph.parse(StringIO(harvest_object.content.encode('utf-8')))
+
+ url = harvest_object.guid
+ package_dict = consume_one(graph)
+ except Exception, e:
+ log.exception(e)
+ self._save_object_error('%r'%e,harvest_object,'Import')
+
+ package_dict['id'] = harvest_object.guid
+ package_dict['name'] = self._gen_new_name(package_dict['title'])
+
+ # Set the modification date
+ if 'date_modified' in package_dict['extras']:
+ package_dict['metadata_modified'] = package_dict['extras']['date_modified']
+
+ # Common extras
+ package_dict['extras']['harvest_catalogue_name'] = u'Dades Obertes Gencat'
+ package_dict['extras']['harvest_catalogue_url'] = u'http://dadesobertes.gencat.cat'
+ package_dict['extras']['harvest_dataset_url'] = self.DATASET_URL % package_dict['extras']['rdf_source_id'].strip('#')
+ package_dict['extras']['eu_country'] = u'ES'
+ package_dict['extras']['eu_nuts2'] = u'ES51'
+
+ return self._create_or_update_package(package_dict,harvest_object)
+
+
--- a/setup.py Fri Jun 10 17:58:28 2011 +0100
+++ b/setup.py Fri Jun 10 17:59:11 2011 +0100
@@ -35,5 +35,6 @@
opendata_paris_fr_harvester=ckanext.pdeu.harvesters:OpendataParisFrHarvester
digitaliser_dk_harvester=ckanext.pdeu.harvesters:DigitaliserDkHarvester
piemonte_harvester=ckanext.pdeu.harvesters:DatiPiemonteItHarvester
+ opendata_cat_harvester=ckanext.pdeu.harvesters:OpenDataCatHarvester
""",
)
Repository URL: https://bitbucket.org/okfn/ckanext-pdeu/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list