[ckan-changes] commit/ckan: 4 new changesets

Wed Aug 31 13:24:23 UTC 2011

4 new changesets in ckan:

http://bitbucket.org/okfn/ckan/changeset/2cdaeaf5b14a/
changeset:   2cdaeaf5b14a
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-25 18:01:10
summary:     [solr] Bug fix: connection.close should really be in a finally block
affected #:  1 file (13 bytes)

--- a/ckan/lib/search/common.py	Thu Aug 25 16:43:51 2011 +0100
+++ b/ckan/lib/search/common.py	Thu Aug 25 17:01:10 2011 +0100
@@ -17,10 +17,11 @@
     try:
         conn = make_connection()
         conn.query("*:*", rows=1)
-        conn.close()
     except Exception, e:
         log.exception(e)
         return False
+    finally:
+        conn.close()
 
     return True
 


http://bitbucket.org/okfn/ckan/changeset/3d9bbdb077a2/
changeset:   3d9bbdb077a2
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-25 18:08:38
summary:     [solr] Remove GroupSqlSearchQuery, currently unused
affected #:  1 file (456 bytes)

--- a/ckan/lib/search/query.py	Thu Aug 25 17:01:10 2011 +0100
+++ b/ckan/lib/search/query.py	Thu Aug 25 17:08:38 2011 +0100
@@ -208,18 +208,6 @@
     __call__ = run
 
 
-# TODO: is this code used anywhere? If so, fix it write some tests for it.
-# class GroupSqlSearchQuery(SearchQuery):
-#     """ Search for groups in plain SQL. """
-#     def _run(self):
-#         if not self.query.terms:
-#             return
-#         q = authz.Authorizer().authorized_query(username, model.Group)
-#         for term in self.query.terms:
-#             q = query.filter(model.Group.name.contains(term.lower()))
-#         self._db_query(q)
-
-
 class TagSearchQuery(SearchQuery):
     """Search for tags in plain SQL."""
     def _run(self):


http://bitbucket.org/okfn/ckan/changeset/a0e45c130875/
changeset:   a0e45c130875
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-31 15:20:20
summary:     [solr] [1277] Don't use CKAN query parser for package (solr) search
affected #:  1 file (464 bytes)

--- a/ckan/lib/search/query.py	Thu Aug 25 17:08:38 2011 +0100
+++ b/ckan/lib/search/query.py	Wed Aug 31 14:20:20 2011 +0100
@@ -1,4 +1,5 @@
 from sqlalchemy import or_
+import json
 from pylons import config
 from paste.util.multidict import MultiDict 
 from paste.deploy.converters import asbool
@@ -10,6 +11,12 @@
 
 _open_licenses = None
 
+VALID_SOLR_PARAMETERS = set([
+    'q', 'fl', 'fq', 'rows', 'sort', 'start', 'wt',
+    'filter_by_downloadable', 'filter_by_openness',
+    'facet', 'facet.mincount', 'facet.limit', 'facet.field'
+])
+
 class QueryOptions(dict):
     """
     Options specify aspects of the search query which are only tangentially related 
@@ -276,63 +283,71 @@
 
         return [r.get('id') for r in data.results]
 
-    def _run(self):
-        fq = ""
+    def run(self, query):
+        # check that query keys are valid
+        if not set(query.keys()) <= VALID_SOLR_PARAMETERS:
+            invalid_params = [s for s in set(query.keys()) - VALID_SOLR_PARAMETERS]
+            raise SearchError("Invalid search parameters: %s" % invalid_params)
 
-        # Filter for options
-        if self.options.filter_by_downloadable:
-            fq += u" +res_url:[* TO *] " # not null resource URL 
-        if self.options.filter_by_openness:
+        # default query is to return all documents
+        q = query.get('q')
+        if not q or q == '""' or q == "''":
+            query['q'] = "*:*"
+
+        # number of results
+        query['rows'] = min(1000, int(query.get('rows', 10)))
+
+        # order by score if no 'sort' term given
+        order_by = query.get('sort')
+        if order_by == 'rank' or order_by is None: 
+            query['sort'] = 'score desc'
+
+        # show only results from this CKAN instance
+        fq = query.get('fq', '')
+        if not '+site_id:' in fq:
+            fq += ' +site_id:"%s"' % config.get('ckan.site_id')
+
+        # filter for package status       
+        if not '+state:' in fq:
+            fq += " +state:active"
+        query['fq'] = fq
+
+        # faceting
+        query['facet'] = query.get('facet', 'true')
+        query['facet.limit'] = query.get('facet.limit', config.get('search.facets.limit', '50'))
+        query['facet.mincount'] = query.get('facet.mincount', 1)
+
+        # return the package ID and search scores
+        query['fl'] = query.get('fl', 'name')
+        
+        # return results as json encoded string
+        query['wt'] = query.get('wt', 'json')
+
+        # check if filtering by downloadable or open license
+        if int(query.get('filter_by_downloadable', 0)):
+            query['fq'] += u" +res_url:[* TO *] " # not null resource URL 
+        if int(query.get('filter_by_openness', 0)):
             licenses = ["license_id:%s" % id for id in self.open_licenses]
             licenses = " OR ".join(licenses)
-            fq += " +(%s) " % licenses
-        
-        order_by = self.options.order_by
-        if order_by == 'rank' or order_by is None: 
-            order_by = 'score'
-
-        # sort in descending order if sorting by score
-        sort = 'desc' if order_by == 'score' else 'asc'
-
-        # show only results from this CKAN instance:
-        fq = fq + " +site_id:\"%s\" " % config.get('ckan.site_id')
-
-        # Filter for package status       
-        fq += "+state:active "
+            query['fq'] += " +(%s) " % licenses
             
-        # configurable for iati: full options list
-        facet_limit = int(config.get('search.facets.limit', '50'))
-
-        # query
-        query = self.query.query
-        if (not query) or (not query.strip()) or (query == '""') or (query == "''"):
-            # no query terms, i.e. all documents
-            query = '*:*'
-        
         conn = make_connection()
         try:
-            data = conn.query(query,
-                              fq=fq, 
-                              # make sure data.facet_counts is set:
-                              facet='true',
-                              facet_limit=facet_limit,
-                              facet_field=self.facet_by,
-                              facet_mincount=1,
-                              start=self.options.offset, 
-                              rows=self.options.limit,
-                              fields='id,score', 
-                              sort_order=sort, 
-                              sort=order_by)
-            
+            data = json.loads(conn.raw_query(**query))
+            response = data['response']
+            self.count = response.get('numFound', 0)
+            self.results = response.get('docs', [])
+
+            # if just fetching the name, return a list of names instead
+            # of a dict
+            if query.get('fl') == 'name':
+                self.results = [r.get('name') for r in self.results]
+
+            self.facets = data['facet_counts'].get('facet_fields', {})
         except Exception, e:
             log.exception(e)
             raise SearchError(e)
         finally:
             conn.close()
         
-        self.count = int(data.numFound)
-        scores = dict([(r.get('id'), r.get('score')) for r in data.results])
-        q = Authorizer().authorized_query(self.options.username, model.Package)
-        q = q.filter(model.Package.id.in_(scores.keys()))
-        self.facets = data.facet_counts.get('facet_fields', {})
-        self.results = sorted(q, key=lambda r: scores[r.id], reverse=True)
+        return {'results': self.results, 'count': self.count}


http://bitbucket.org/okfn/ckan/changeset/01aa0ef0fdfb/
changeset:   01aa0ef0fdfb
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-31 15:22:23
summary:     [solr] [1277] Update search API so works without ckan query parser
affected #:  2 files (1.5 KB)

--- a/ckan/controllers/api.py	Wed Aug 31 14:20:20 2011 +0100
+++ b/ckan/controllers/api.py	Wed Aug 31 14:22:23 2011 +0100
@@ -398,41 +398,17 @@
             return self._finish_ok([rev.id for rev in revs])
         elif register == 'package' or register == 'resource':
             try:
-                params = self._get_search_params(request.params)
+                params = dict(self._get_search_params(request.params))
             except ValueError, e:
                 return self._finish_bad_request(
                     gettext('Could not read parameters: %r' % e))
-            options = QueryOptions()
-            for k, v in params.items():
-                if (k in DEFAULT_OPTIONS.keys()):
-                    options[k] = v
-            options.update(params)
-            options.username = c.user
-            options.search_tags = False
-            options.return_objects = False
-            
-            query_fields = MultiDict()
-            for field, value in params.items():
-                field = field.strip()
-                if field in DEFAULT_OPTIONS.keys() or \
-                   field in IGNORE_FIELDS:
-                    continue
-                values = [value]
-                if isinstance(value, list):
-                    values = value
-                for v in values:
-                    query_fields.add(field, v)
-            
-            if register == 'package':
-                options.ref_entity_with_attr = 'id' if ver == '2' else 'name'
+
             try:
                 if register == 'resource': 
                     query = query_for(model.Resource)
                 else:
                     query = query_for(model.Package)
-                results = query.run(query=params.get('q'), 
-                                    fields=query_fields, 
-                                    options=options)
+                results = query.run(params)
                 return self._finish_ok(results)
             except SearchError, e:
                 log.exception(e)


--- a/ckan/tests/functional/api/test_package_search.py	Wed Aug 31 14:20:20 2011 +0100
+++ b/ckan/tests/functional/api/test_package_search.py	Wed Aug 31 14:22:23 2011 +0100
@@ -75,6 +75,7 @@
         offset = self.base_url + '?q=%s' % self.package_fixture_data['name']
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
+        print res_dict
         self.assert_results(res_dict, ['testpkg'])
         assert res_dict['count'] == 1, res_dict['count']
 
@@ -131,7 +132,7 @@
         assert res_dict['count'] == 1, res_dict['count']
 
     def test_07_uri_qjson_tags(self):
-        query = {'q': '', 'tags':['tolstoy']}
+        query = {'q': 'tags:tolstoy'}
         json_query = self.dumps(query)
         offset = self.base_url + '?qjson=%s' % json_query
         res = self.app.get(offset, status=200)
@@ -140,7 +141,7 @@
         assert res_dict['count'] == 1, res_dict
 
     def test_07_uri_qjson_tags_multiple(self):
-        query = {'q': '', 'tags':['tolstoy', 'russian']}
+        query = {'q': 'tags:tolstoy tags:russian'}
         json_query = self.dumps(query)
         offset = self.base_url + '?qjson=%s' % json_query
         print offset
@@ -150,7 +151,7 @@
         assert res_dict['count'] == 1, res_dict
 
     def test_07_uri_qjson_tags_reverse(self):
-        query = {'q': '', 'tags':['russian']}
+        query = {'q': 'tags:russian'}
         json_query = self.dumps(query)
         offset = self.base_url + '?qjson=%s' % json_query
         res = self.app.get(offset, status=200)
@@ -161,7 +162,7 @@
     def test_07_uri_qjson_extras(self):
         # TODO: solr is not currently set up to allow partial matches 
         #       and extras are not saved as multivalued so this
-        #       test will fail. Make multivalued or remove?
+        #       test will fail. Make extras multivalued or remove?
         from ckan.tests import SkipTest
         raise SkipTest
 
@@ -174,7 +175,7 @@
         assert res_dict['count'] == 1, res_dict
 
     def test_07_uri_qjson_extras_2(self):
-        query = {"national_statistic":"yes"}
+        query = {'q': "national_statistic:yes"}
         json_query = self.dumps(query)
         offset = self.base_url + '?qjson=%s' % json_query
         res = self.app.get(offset, status=200)
@@ -194,7 +195,7 @@
         model.Session.add(rating)
         model.repo.commit_and_remove()
         
-        query = {'q': 'russian', 'all_fields':1}
+        query = {'q': 'russian', 'fl': '*'}
         json_query = self.dumps(query)
         offset = self.base_url + '?qjson=%s' % json_query
         res = self.app.get(offset, status=200)
@@ -210,47 +211,65 @@
         assert len(anna_rec['tags']) == 2, anna_rec['tags']
         for expected_tag in ['russian', 'tolstoy']:
             assert expected_tag in anna_rec['tags']
-        assert anna_rec['ratings_average'] == 3.0, anna_rec['ratings_average']
-        assert anna_rec['ratings_count'] == 1, anna_rec['ratings_count']
+
+        # TODO: these values are not being passed to Solr
+        # assert anna_rec['ratings_average'] == 3.0, anna_rec['ratings_average']
+        # assert anna_rec['ratings_count'] == 1, anna_rec['ratings_count']
 
         # try alternative syntax
-        offset = self.base_url + '?q=russian&all_fields=1'
+        offset = self.base_url + '?q=russian&fl=*'
         res2 = self.app.get(offset, status=200)
         assert_equal(res2.body, res.body)
 
     def test_08_all_fields_syntax_error(self):
         offset = self.base_url + '?all_fields=should_be_boolean' # invalid all_fields value
         res = self.app.get(offset, status=400)
-        assert('boolean' in res.body)
         assert('all_fields' in res.body)
-        self.assert_json_response(res, 'boolean')
 
     def test_09_just_tags(self):
-        offset = self.base_url + '?tags=russian&all_fields=1'
+        offset = self.base_url + '?q=tags:russian&fl=*'
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
         assert res_dict['count'] == 2, res_dict
 
+    def test_10_multiple_tags(self):
+        offset = self.base_url + '?q=tags:tolstoy tags:russian&fl=*'
+        res = self.app.get(offset, status=200)
+        res_dict = self.data_from_res(res)
+        assert res_dict['count'] == 1, res_dict
+
     def test_10_multiple_tags_with_plus(self):
+        # TODO: this syntax doesn't work with Solr search, update documentation
+        from nose import SkipTest
+        raise SkipTest
+
         offset = self.base_url + '?tags=tolstoy+russian&all_fields=1'
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
         assert res_dict['count'] == 1, res_dict
 
     def test_10_multiple_tags_with_ampersand(self):
+        # TODO: this syntax doesn't work with Solr search, update documentation
+        from nose import SkipTest
+        raise SkipTest
+
         offset = self.base_url + '?tags=tolstoy&tags=russian&all_fields=1'
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
         assert res_dict['count'] == 1, res_dict
 
     def test_10_many_tags_with_ampersand(self):
+        # TODO: this syntax doesn't work with Solr search, update documentation
+        from nose import SkipTest
+        raise SkipTest
+
         offset = self.base_url + '?tags=tolstoy&tags=russian&tags=tolstoy'
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
         assert res_dict['count'] == 1, res_dict
 
     def test_11_pagination_limit(self):
-        offset = self.base_url + '?all_fields=1&tags=russian&limit=1&order_by=name'
+        offset = self.base_url + '?fl=*&q=tags:russian&rows=1&sort=name asc'
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
         assert res_dict['count'] == 2, res_dict
@@ -258,7 +277,7 @@
         assert res_dict['results'][0]['name'] == 'annakarenina', res_dict['results'][0]['name']
 
     def test_11_pagination_offset_limit(self):
-        offset = self.base_url + '?all_fields=1&tags=russian&offset=1&limit=1&order_by=name'
+        offset = self.base_url + '?fl=*&q=tags:russian&start=1&rows=1&sort=name asc'
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
         assert res_dict['count'] == 2, res_dict
@@ -266,11 +285,10 @@
         assert res_dict['results'][0]['name'] == 'warandpeace', res_dict['results'][0]['name']
 
     def test_11_pagination_syntax_error(self):
-        offset = self.base_url + '?all_fields=1&tags=russian&offset=should_be_integer&limit=1&order_by=name' # invalid offset value
+        offset = self.base_url + '?fl=*&q="tags:russian"&start=should_be_integer&rows=1&sort=name' # invalid offset value
         res = self.app.get(offset, status=400)
-        assert('integer' in res.body)
-        assert('offset' in res.body)
-        self.assert_json_response(res, 'integer')
+        print res.body
+        assert('should_be_integer' in res.body)
 
     def test_12_all_packages_qjson(self):
         query = {'q': ''}
@@ -323,7 +341,7 @@
         assert_equal(res_dict['count'], 3)
 
     def test_13_just_groups(self):
-        offset = self.base_url + '?groups=roger'
+        offset = self.base_url + '?q=groups:roger'
         res = self.app.get(offset, status=200)
         res_dict = self.data_from_res(res)
         assert res_dict['count'] == 1, res_dict

Repository URL: https://bitbucket.org/okfn/ckan/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.