[ckan-changes] commit/ckanext-inspire: jame... at okfn.org: [harvesting] Added tracebacks to the error logs, tweaked error message if there is no error text available

Bitbucket commits-noreply at bitbucket.org
Thu Nov 10 17:14:45 UTC 2011


1 new commit in ckanext-inspire:


https://bitbucket.org/okfn/ckanext-inspire/changeset/40049a5b3236/
changeset:   40049a5b3236
user:        jame... at okfn.org
date:        2011-11-10 18:11:50
summary:     [harvesting] Added tracebacks to the error logs, tweaked error message if there is no error text available
affected #:  1 file

diff -r 824544e1d940f4491d9b9d5fe1b1fff11aec89ff -r 40049a5b3236de9fc92148cc8c4d4feb7b7fffd5 ckanext/inspire/harvesters.py
--- a/ckanext/inspire/harvesters.py
+++ b/ckanext/inspire/harvesters.py
@@ -14,7 +14,7 @@
 from datetime import datetime
 from string import Template
 from numbers import Number
-
+import sys
 import logging
 log = logging.getLogger(__name__)
 
@@ -48,6 +48,15 @@
     log.error('No CSW support installed -- install ckanext-csw')
     raise
 
+import cgitb
+import warnings
+def text_traceback():
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        res = 'the original traceback:'.join(
+            cgitb.text(sys.exc_info()).split('the original traceback:')[1:]
+        ).strip()
+    return res
 
 class InspireHarvester(object):
     csw=None
@@ -64,8 +73,8 @@
         try:
             s = wms.WebMapService(url)
             return isinstance(s.contents, dict) and s.contents != {}
-        except:
-            pass
+        except Exception, e:
+            log.error('WMS check for %s failed with exception: %s'%(url, str(e)))
         return False
 
     def _setup_csw_server(self,url):
@@ -103,12 +112,9 @@
             log.error(message)
 
     def _get_content(self, url):
-        try:
-            url = url.replace(' ','%20')
-            http_response = urllib2.urlopen(url)
-            return http_response.read()
-        except Exception, e:
-            raise e
+        url = url.replace(' ','%20')
+        http_response = urllib2.urlopen(url)
+        return http_response.read()
 
 
     # All three harvesters share the same import stage
@@ -125,34 +131,32 @@
             self._save_object_error('Empty content for object %s' % harvest_object.id,harvest_object,'Import')
             return False
         try:
-
             self.import_gemini_object(harvest_object.content)
             return True
         except Exception, e:
-            self._save_object_error('Error importing Gemini document: %s' % str(e),harvest_object,'Import')
+            log.error(text_traceback())
+            if not str(e).strip():
+                self._save_object_error('Error importing Gemini document.', harvest_object, 'Import')
+            else:
+                self._save_object_error('Error importing Gemini document: %s' % str(e), harvest_object, 'Import')
 
     def import_gemini_object(self, gemini_string):
-        try:
-            xml = etree.fromstring(gemini_string)
+        xml = etree.fromstring(gemini_string)
 
-            if not self.validator:
-                self._get_validator()
+        if not self.validator:
+            self._get_validator()
 
-            if self.validator is not None:
-                valid, messages = self.validator.isvalid(xml)
-                if not valid:
-                    log.error('Errors found for object with GUID %s:' % self.obj.guid)
-                    out = messages[0] + ':\n' + '\n'.join(messages[1:])
-                    self._save_object_error(out,self.obj,'Import')
+        if self.validator is not None:
+            valid, messages = self.validator.isvalid(xml)
+            if not valid:
+                log.error('Errors found for object with GUID %s:' % self.obj.guid)
+                out = messages[0] + ':\n' + '\n'.join(messages[1:])
+                self._save_object_error(out,self.obj,'Import')
 
-            unicode_gemini_string = etree.tostring(xml, encoding=unicode, pretty_print=True)
+        unicode_gemini_string = etree.tostring(xml, encoding=unicode, pretty_print=True)
 
-            package = self.write_package_from_gemini_string(unicode_gemini_string)
+        package = self.write_package_from_gemini_string(unicode_gemini_string)
 
-        except Exception, e:
-            raise
-        else:
-            pass
 
     def write_package_from_gemini_string(self, content):
         '''Create or update a Package based on some content that has
@@ -437,46 +441,41 @@
             package_dict = action_function(context, package_dict)
         except ValidationError,e:
             raise Exception('Validation Error: %s' % str(e.error_summary))
-        except Exception, e:
-            raise e
 
         # Return the actual package object
         return context['package']
 
     def get_gemini_string_and_guid(self,content,url=None):
-        try:
-            xml = etree.fromstring(content)
+        xml = etree.fromstring(content)
 
-            # The validator and GeminiDocument don't like the container
-            metadata_tag = '{http://www.isotc211.org/2005/gmd}MD_Metadata'
-            if xml.tag == metadata_tag:
-                gemini_xml = xml
-            else:
-                gemini_xml = xml.find(metadata_tag)
+        # The validator and GeminiDocument don't like the container
+        metadata_tag = '{http://www.isotc211.org/2005/gmd}MD_Metadata'
+        if xml.tag == metadata_tag:
+            gemini_xml = xml
+        else:
+            gemini_xml = xml.find(metadata_tag)
 
-            if not gemini_xml:
-                self._save_gather_error('Content is not a valid Gemini document',self.harvest_job)
+        if not gemini_xml:
+            self._save_gather_error('Content is not a valid Gemini document',self.harvest_job)
 
-            if not self.validator:
-                self._get_validator()
+        if not self.validator:
+            self._get_validator()
 
-            if self.validator is not None:
-                valid, messages = self.validator.isvalid(gemini_xml)
-                if not valid:
-                    out = messages[0] + ':\n' + '\n'.join(messages[1:])
-                    if url:
-                        self._save_gather_error('Validation error for %s %r'% (url,out),self.harvest_job)
-                    else:
-                        self._save_gather_error('Validation error. %r'%out,self.harvest_job)
+        if self.validator is not None:
+            valid, messages = self.validator.isvalid(gemini_xml)
+            if not valid:
+                out = messages[0] + ':\n' + '\n'.join(messages[1:])
+                if url:
+                    self._save_gather_error('Validation error for %s %r'% (url,out),self.harvest_job)
+                else:
+                    self._save_gather_error('Validation error. %r'%out,self.harvest_job)
 
-            gemini_string = etree.tostring(gemini_xml)
-            gemini_document = GeminiDocument(gemini_string)
-            gemini_values = gemini_document.read_values()
-            gemini_guid = gemini_values['guid']
+        gemini_string = etree.tostring(gemini_xml)
+        gemini_document = GeminiDocument(gemini_string)
+        gemini_values = gemini_document.read_values()
+        gemini_guid = gemini_values['guid']
 
-            return gemini_string, gemini_guid
-        except Exception,e:
-            raise e
+        return gemini_string, gemini_guid
 
 class GeminiHarvester(InspireHarvester,SingletonPlugin):
     '''

Repository URL: https://bitbucket.org/okfn/ckanext-inspire/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list