[ckan-changes] commit/ckanext-inspire: 2 new changesets

Bitbucket commits-noreply at bitbucket.org
Tue Sep 6 17:26:09 UTC 2011


2 new changesets in ckanext-inspire:

http://bitbucket.org/okfn/ckanext-inspire/changeset/14013dda871a/
changeset:   14013dda871a
user:        amercader
date:        2011-09-06 11:18:32
summary:     Avoid mailto links in WAF
affected #:  1 file (58 bytes)

--- a/ckanext/inspire/harvesters.py	Sat Jun 11 12:56:55 2011 +0100
+++ b/ckanext/inspire/harvesters.py	Tue Sep 06 10:18:32 2011 +0100
@@ -686,6 +686,8 @@
                 continue
             if '#' in url:
                 continue
+            if 'mailto:' in url:
+                continue
             urls.append(url)
         base_url = base_url.rstrip('/').split('/')
         if 'index' in base_url[-1]:


http://bitbucket.org/okfn/ckanext-inspire/changeset/216a7f72f9bf/
changeset:   216a7f72f9bf
user:        amercader
date:        2011-09-06 18:05:15
summary:     Better log messages
affected #:  1 file (142 bytes)

--- a/ckanext/inspire/harvesters.py	Tue Sep 06 10:18:32 2011 +0100
+++ b/ckanext/inspire/harvesters.py	Tue Sep 06 17:05:15 2011 +0100
@@ -136,7 +136,7 @@
             if self.validator is not None:
                 valid, messages = self.validator.isvalid(xml)
                 if not valid:
-                    self._save_object_error('Content is not a valid Gemini document %r'%messages,self.obj,'Import')
+                    self._save_object_error('Validation error %r'%messages,self.obj,'Import')
 
             unicode_gemini_string = etree.tostring(xml, encoding=unicode, pretty_print=True)
 
@@ -408,7 +408,7 @@
 
         return package
 
-    def get_gemini_string_and_guid(self,content):
+    def get_gemini_string_and_guid(self,content,url=None):
         try:
             xml = etree.fromstring(content)
 
@@ -425,7 +425,10 @@
             if self.validator is not None:
                 valid, messages = self.validator.isvalid(gemini_xml)
                 if not valid:
-                    self._save_gather_error('Content is not a valid Gemini document %r'%messages,self.harvest_job)
+                    if url:
+                        self._save_gather_error('Validation error for %s %r'% (url,messages),self.harvest_job)
+                    else:
+                        self._save_gather_error('Validation error for %r'%messages,self.harvest_job)
 
             gemini_string = etree.tostring(gemini_xml)
             gemini_document = GeminiDocument(gemini_string)
@@ -628,7 +631,7 @@
                 else:
                     # We need to extract the guid to pass it to the next stage
                     try:
-                        gemini_string, gemini_guid = self.get_gemini_string_and_guid(content)
+                        gemini_string, gemini_guid = self.get_gemini_string_and_guid(content,url)
                         if gemini_guid:
                             log.debug('Got GUID %s' % gemini_guid)
                             # Create a new HarvestObject for this identifier

Repository URL: https://bitbucket.org/okfn/ckanext-inspire/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list