[ckan-changes] commit/ckan: 2 new changesets

Wed Sep 21 11:32:32 UTC 2011

2 new changesets in ckan:

http://bitbucket.org/okfn/ckan/changeset/a1fb2c686ae5/
changeset:   a1fb2c686ae5
branch:      feature-1302-resource-tag-search
user:        John Glover
date:        2011-09-21 13:31:19
summary:     [search] closing branch
affected #:  0 files (-1 bytes)

http://bitbucket.org/okfn/ckan/changeset/56c79e3fc44c/
changeset:   56c79e3fc44c
user:        John Glover
date:        2011-09-21 13:32:09
summary:     merge with feature-1302-resource-tag-search
affected #:  5 files (-1 bytes)

--- a/ckan/lib/search/query.py	Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/lib/search/query.py	Wed Sep 21 12:32:09 2011 +0100
@@ -1,9 +1,8 @@
-from sqlalchemy import or_
 import json
 from pylons import config
-from paste.util.multidict import MultiDict 
 from paste.deploy.converters import asbool
 from ckan import model
+from ckan.logic import get_action
 from common import make_connection, SearchError
 import logging
 log = logging.getLogger(__name__)
@@ -60,91 +59,6 @@
         self[name] = value
 
 
-class QueryParser(object):
-    """
-    The query parser will take any incoming query specifications and turn 
-    them into field-specific and general query parts. 
-    """
-    
-    def __init__(self, query, terms, fields):
-        self._query = query
-        self._terms = terms
-        self._fields = MultiDict(fields)
-    
-    @property    
-    def query(self):
-        if not hasattr(self, '_combined_query'):
-            parts = [self._query if self._query is not None else '']
-            
-            for term in self._terms:
-                if term.find(u' ') != -1:
-                    term = u"\"%s\"" % term
-                parts.append(term.strip())
-                
-            for field, value in self._fields.items():
-                if field != 'tags' and value.find(' ') != -1:
-                    value = u"\"%s\"" % value
-                parts.append(u"%s:%s" % (field.strip(), value.strip()))
-                
-            self._combined_query = u' '.join(parts)
-        return self._combined_query
-    
-    def _query_tokens(self):
-        """ Split the query string, leaving quoted strings intact. """
-        if self._query:
-            inside_quote = False
-            buf = u''
-            for ch in self._query:
-                if ch == u' ' and not inside_quote:
-                    if len(buf):
-                        yield buf.strip()
-                    buf = u''
-                elif ch == inside_quote:
-                    inside_quote = False
-                elif ch in [u"\"", u"'"]:
-                    inside_quote = ch
-                else:
-                    buf += ch
-            if len(buf):
-                yield buf.strip()
-    
-    def _parse_query(self):
-        """ Decompose the query string into fields and terms. """
-        self._combined_fields = MultiDict(self._fields)
-        self._combined_terms = list(self._terms)
-        for token in self._query_tokens():
-            colon_pos = token.find(u':')
-            if colon_pos != -1:
-                field = token[:colon_pos]
-                value = token[colon_pos+1:]
-                value = value.strip('"').strip("'").strip()
-                self._combined_fields.add(field, value)
-            else:
-                self._combined_terms.append(token)
-    
-    @property
-    def fields(self):
-        if not hasattr(self, '_combined_fields'):
-            self._parse_query()
-        return self._combined_fields
-    
-    @property
-    def terms(self):
-        if not hasattr(self, '_combined_terms'):
-            self._parse_query()
-        return self._combined_terms
-    
-    def validate(self):
-        """ Check that this is a valid query. """
-        pass
-    
-    def __str__(self):
-        return self.query
-        
-    def __repr__(self):
-        return "Query(%r)" % self.query
-
-
 class SearchQuery(object):
     """
     A query is ... when you ask the search engine things. SearchQuery is intended 
@@ -168,14 +82,6 @@
                     _open_licenses.append(license.id)
         return _open_licenses
     
-    def _format_results(self):
-        if not self.options.return_objects and len(self.results):
-            if self.options.all_fields:
-                self.results = [r.as_dict() for r in self.results]
-            else:
-                attr_name = self.options.ref_entity_with_attr
-                self.results = [getattr(entity, attr_name) for entity in self.results]
-
     def get_all_entity_ids(self, max_results=1000):
         """
         Return a list of the IDs of all indexed packages.
@@ -183,90 +89,70 @@
         return []
     
     def run(self, query=None, terms=[], fields={}, facet_by=[], options=None, **kwargs):
+        raise SearchError("SearchQuery.run() not implemented!")
+        
+    # convenience, allows to query(..)
+    __call__ = run
+
+
+class TagSearchQuery(SearchQuery):
+    """Search for tags."""
+    def run(self, query=[], fields={}, options=None, **kwargs):
         if options is None:
             options = QueryOptions(**kwargs) 
         else:
             options.update(kwargs)
-        self.options = options
-        self.options.validate()
-        self.facet_by = facet_by
-        self.facets = dict()
-        self.query = QueryParser(query, terms, fields)
-        self.query.validate()
-        self._run()
-        self._format_results()
-        return {'results': self.results, 'count': self.count}
+
+        context = {'model': model, 'session': model.Session}
+        data_dict = {
+            'query': query, 
+            'fields': fields,
+            'offset': options.get('offset'),
+            'limit': options.get('limit')
+        }
+        results = get_action('tag_search')(context, data_dict)
+
+        if not options.return_objects:
+            # if options.all_fields is set, return a dict
+            # if not, return a list of resource IDs
+            if options.all_fields:
+                results['results'] = [r.as_dict() for r in results['results']]
+            else:
+                results['results'] = [r.name for r in results['results']]
         
-    def _run(self):
-        raise SearchError("SearchQuery._run() not implemented!")
-
-    def _db_query(self, q):
-        # Run the query
-        self.count = q.count()
-        q = q.offset(self.options.get('offset'))
-        q = q.limit(self.options.get('limit'))
-        
-        self.results = []
-        for result in q:
-            if isinstance(result, tuple) and isinstance(result[0], model.DomainObject):
-                # This is the case for order_by rank due to the add_column.
-                self.results.append(result[0])
-            else:
-                self.results.append(result)
-        
-    # convenience, allows to query(..)
-    __call__ = run
-
-
-class TagSearchQuery(SearchQuery):
-    """Search for tags in plain SQL."""
-    def _run(self):
-        q = model.Session.query(model.Tag)
-        q = q.distinct().join(model.Tag.package_tags)
-        terms = list(self.query.terms)
-        for field, value in self.query.fields.items():
-            if field in ('tag', 'tags'):
-                terms.append(value)
-        if not len(terms):
-            return
-        for term in terms:
-            q = q.filter(model.Tag.name.contains(term.lower()))
-        self._db_query(q)
+        self.count = results['count']
+        self.results = results['results']
+        return results
 
 
 class ResourceSearchQuery(SearchQuery):
-    """ Search for resources in plain SQL. """
-    def _run(self):
-        q = model.Session.query(model.Resource) # TODO authz
-        if self.query.terms:
-            raise SearchError('Only field specific terms allowed in resource search.')
-        self.options.ref_entity_with_attr = 'id' # has no name
-        resource_fields = model.Resource.get_columns()
-        for field, terms in self.query.fields.items():
-            if isinstance(terms, basestring):
-                terms = terms.split()
-            if field not in resource_fields:
-                raise SearchError('Field "%s" not recognised in Resource search.' % field)
-            for term in terms:
-                model_attr = getattr(model.Resource, field)
-                if field == 'hash':                
-                    q = q.filter(model_attr.ilike(unicode(term) + '%'))
-                elif field in model.Resource.get_extra_columns():
-                    model_attr = getattr(model.Resource, 'extras')
+    """Search for resources."""
+    def run(self, fields={}, options=None, **kwargs):
+        if options is None:
+            options = QueryOptions(**kwargs) 
+        else:
+            options.update(kwargs)
 
-                    like = or_(
-                        model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' % (field, term)),
-                        model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term))
-                    )
-                    q = q.filter(like)
-                else:
-                    q = q.filter(model_attr.ilike('%' + unicode(term) + '%'))
-        
-        order_by = self.options.order_by
-        if order_by is not None:
-            if hasattr(model.Resource, order_by):
-                q = q.order_by(getattr(model.Resource, order_by))
-        self._db_query(q)
+        context = {'model':model, 'session': model.Session}
+        data_dict = {
+            'fields': fields,
+            'offset': options.get('offset'),
+            'limit': options.get('limit'),
+            'order_by': options.get('order_by')
+        }
+        results = get_action('resource_search')(context, data_dict)
+
+        if not options.return_objects:
+            # if options.all_fields is set, return a dict
+            # if not, return a list of resource IDs
+            if options.all_fields:
+                results['results'] = [r.as_dict() for r in results['results']]
+            else:
+                results['results'] = [r.id for r in results['results']]
+
+        self.count = results['count']
+        self.results = results['results']
+        return results
 
 
 class PackageSearchQuery(SearchQuery):


--- a/ckan/logic/action/get.py	Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/logic/action/get.py	Wed Sep 21 12:32:09 2011 +0100
@@ -21,7 +21,7 @@
                                                 group_to_api2,
                                                 tag_to_api1,
                                                 tag_to_api2)
-from ckan.lib.search import query_for
+from ckan.lib.search import query_for, SearchError
 
 def site_read(context,data_dict=None):
     check_access('site_read',context,data_dict)
@@ -500,16 +500,14 @@
 
     check_access('tag_autocomplete', context, data_dict)
 
-    q = data_dict.get('q',None)
+    q = data_dict.get('q', None)
     if not q:
         return []
 
     limit = data_dict.get('limit',10)
 
-    like_q = u"%s%%" % q
-
     query = query_for('tag')
-    query.run(query=like_q,
+    query.run(query=q,
               return_objects=True,
               limit=10,
               username=user)
@@ -625,3 +623,84 @@
         package_dict['isopen'] = False
 
     return package_dict
+
+def resource_search(context, data_dict):
+    model = context['model']
+    session = context['session']
+
+    fields = data_dict['fields']
+    order_by = data_dict.get('order_by')
+    offset = data_dict.get('offset')
+    limit = data_dict.get('limit')
+
+    # TODO: should we check for user authentication first?
+    q = model.Session.query(model.Resource)
+    resource_fields = model.Resource.get_columns()
+
+    for field, terms in fields.items():
+        if isinstance(terms, basestring):
+            terms = terms.split()
+        if field not in resource_fields:
+            raise SearchError('Field "%s" not recognised in Resource search.' % field)
+        for term in terms:
+            model_attr = getattr(model.Resource, field)
+            if field == 'hash':                
+                q = q.filter(model_attr.ilike(unicode(term) + '%'))
+            elif field in model.Resource.get_extra_columns():
+                model_attr = getattr(model.Resource, 'extras')
+
+                like = or_(
+                    model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' % (field, term)),
+                    model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term))
+                )
+                q = q.filter(like)
+            else:
+                q = q.filter(model_attr.ilike('%' + unicode(term) + '%'))
+    
+    if order_by is not None:
+        if hasattr(model.Resource, order_by):
+            q = q.order_by(getattr(model.Resource, order_by))
+
+    count = q.count()
+    q = q.offset(offset)
+    q = q.limit(limit)
+    
+    results = []
+    for result in q:
+        if isinstance(result, tuple) and isinstance(result[0], model.DomainObject):
+            # This is the case for order_by rank due to the add_column.
+            results.append(result[0])
+        else:
+            results.append(result)
+
+    return {'count': count, 'results': results}
+
+def tag_search(context, data_dict):
+    model = context['model']
+    session = context['session']
+
+    query = data_dict.get('query')
+    terms = [query] if query else []
+
+    fields = data_dict.get('fields', {})
+    offset = data_dict.get('offset')
+    limit = data_dict.get('limit')
+
+    # TODO: should we check for user authentication first?
+    q = model.Session.query(model.Tag)
+    q = q.distinct().join(model.Tag.package_tags)
+    for field, value in fields.items():
+        if field in ('tag', 'tags'):
+            terms.append(value)
+
+    if not len(terms):
+        return
+
+    for term in terms:
+        q = q.filter(model.Tag.name.contains(term.lower()))
+
+    count = q.count()
+    q = q.offset(offset)
+    q = q.limit(limit)
+    results = [r for r in q]
+    return {'count': count, 'results': results}


--- a/ckan/tests/functional/api/test_action.py	Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/tests/functional/api/test_action.py	Wed Sep 21 12:32:09 2011 +0100
@@ -459,7 +459,6 @@
         postparams = '%s=1' % json.dumps({'q':'r'})
         res = self.app.post('/api/action/tag_autocomplete', params=postparams)
         res_obj = json.loads(res.body)
-        print res_obj
         assert res_obj == {
             'help': 'Returns tags containing the provided string', 
             'result': ['russian'], 


--- a/ckan/tests/lib/test_resource_search.py	Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/tests/lib/test_resource_search.py	Wed Sep 21 12:32:09 2011 +0100
@@ -148,7 +148,7 @@
         resources = result['results']
         count = result['count']
         assert len(resources) == 2, resources
-        assert count == all_resource_count
+        assert count == all_resource_count, (count, all_resource_count)
         assert resources == all_resources[:2], '%r, %r' % (resources, all_resources)
 
         # offset
@@ -182,5 +182,3 @@
         # can't be searched
         fields = {'size_extra':'100'}
         assert_raises(search.SearchError, search.query_for(model.Resource).run, fields=fields)
-
-


--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ckan/tests/lib/test_tag_search.py	Wed Sep 21 12:32:09 2011 +0100
@@ -0,0 +1,45 @@
+from nose.tools import assert_raises
+from ckan.tests import *
+from ckan.tests import is_search_supported
+import ckan.lib.search as search
+from ckan import model
+from ckan.lib.create_test_data import CreateTestData
+
+class TestTagSearch(object):
+    @classmethod
+    def setup_class(self):
+        if not is_search_supported():
+            raise SkipTest("Search not supported")
+        CreateTestData.create()
+
+    @classmethod
+    def teardown_class(self):
+        model.repo.rebuild_db()
+
+    def test_good_search_query(self):
+        result = search.query_for(model.Tag).run(query=u'ru')
+        assert result['count'] == 1, result
+        assert 'russian' in result['results'], result
+
+        result = search.query_for(model.Tag).run(query=u's')
+        assert result['count'] == 2, result
+        assert 'russian' in result['results'], result
+        assert 'tolstoy' in result['results'], result
+
+    def test_bad_search_query(self):
+        result = search.query_for(model.Tag).run(query=u'asdf')
+        assert result['count'] == 0, result
+
+    def test_good_search_fields(self):
+        result = search.query_for(model.Tag).run(fields={'tags': u'ru'})
+        assert result['count'] == 1, result
+        assert 'russian' in result['results'], result
+
+        result = search.query_for(model.Tag).run(fields={'tags': u's'})
+        assert result['count'] == 2, result
+        assert 'russian' in result['results'], result
+        assert 'tolstoy' in result['results'], result
+
+    def test_bad_search_fields(self):
+        result = search.query_for(model.Tag).run(fields={'tags': u'asdf'})
+        assert result['count'] == 0, result

Repository URL: https://bitbucket.org/okfn/ckan/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.