[ckan-changes] commit/ckan: 2 new changesets
Bitbucket
commits-noreply at bitbucket.org
Wed Sep 21 11:32:32 UTC 2011
2 new changesets in ckan:
http://bitbucket.org/okfn/ckan/changeset/a1fb2c686ae5/
changeset: a1fb2c686ae5
branch: feature-1302-resource-tag-search
user: John Glover
date: 2011-09-21 13:31:19
summary: [search] closing branch
affected #: 0 files (-1 bytes)
http://bitbucket.org/okfn/ckan/changeset/56c79e3fc44c/
changeset: 56c79e3fc44c
user: John Glover
date: 2011-09-21 13:32:09
summary: merge with feature-1302-resource-tag-search
affected #: 5 files (-1 bytes)
--- a/ckan/lib/search/query.py Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/lib/search/query.py Wed Sep 21 12:32:09 2011 +0100
@@ -1,9 +1,8 @@
-from sqlalchemy import or_
import json
from pylons import config
-from paste.util.multidict import MultiDict
from paste.deploy.converters import asbool
from ckan import model
+from ckan.logic import get_action
from common import make_connection, SearchError
import logging
log = logging.getLogger(__name__)
@@ -60,91 +59,6 @@
self[name] = value
-class QueryParser(object):
- """
- The query parser will take any incoming query specifications and turn
- them into field-specific and general query parts.
- """
-
- def __init__(self, query, terms, fields):
- self._query = query
- self._terms = terms
- self._fields = MultiDict(fields)
-
- @property
- def query(self):
- if not hasattr(self, '_combined_query'):
- parts = [self._query if self._query is not None else '']
-
- for term in self._terms:
- if term.find(u' ') != -1:
- term = u"\"%s\"" % term
- parts.append(term.strip())
-
- for field, value in self._fields.items():
- if field != 'tags' and value.find(' ') != -1:
- value = u"\"%s\"" % value
- parts.append(u"%s:%s" % (field.strip(), value.strip()))
-
- self._combined_query = u' '.join(parts)
- return self._combined_query
-
- def _query_tokens(self):
- """ Split the query string, leaving quoted strings intact. """
- if self._query:
- inside_quote = False
- buf = u''
- for ch in self._query:
- if ch == u' ' and not inside_quote:
- if len(buf):
- yield buf.strip()
- buf = u''
- elif ch == inside_quote:
- inside_quote = False
- elif ch in [u"\"", u"'"]:
- inside_quote = ch
- else:
- buf += ch
- if len(buf):
- yield buf.strip()
-
- def _parse_query(self):
- """ Decompose the query string into fields and terms. """
- self._combined_fields = MultiDict(self._fields)
- self._combined_terms = list(self._terms)
- for token in self._query_tokens():
- colon_pos = token.find(u':')
- if colon_pos != -1:
- field = token[:colon_pos]
- value = token[colon_pos+1:]
- value = value.strip('"').strip("'").strip()
- self._combined_fields.add(field, value)
- else:
- self._combined_terms.append(token)
-
- @property
- def fields(self):
- if not hasattr(self, '_combined_fields'):
- self._parse_query()
- return self._combined_fields
-
- @property
- def terms(self):
- if not hasattr(self, '_combined_terms'):
- self._parse_query()
- return self._combined_terms
-
- def validate(self):
- """ Check that this is a valid query. """
- pass
-
- def __str__(self):
- return self.query
-
- def __repr__(self):
- return "Query(%r)" % self.query
-
-
class SearchQuery(object):
"""
A query is ... when you ask the search engine things. SearchQuery is intended
@@ -168,14 +82,6 @@
_open_licenses.append(license.id)
return _open_licenses
- def _format_results(self):
- if not self.options.return_objects and len(self.results):
- if self.options.all_fields:
- self.results = [r.as_dict() for r in self.results]
- else:
- attr_name = self.options.ref_entity_with_attr
- self.results = [getattr(entity, attr_name) for entity in self.results]
-
def get_all_entity_ids(self, max_results=1000):
"""
Return a list of the IDs of all indexed packages.
@@ -183,90 +89,70 @@
return []
def run(self, query=None, terms=[], fields={}, facet_by=[], options=None, **kwargs):
+ raise SearchError("SearchQuery.run() not implemented!")
+
+ # convenience, allows to query(..)
+ __call__ = run
+
+
+class TagSearchQuery(SearchQuery):
+ """Search for tags."""
+ def run(self, query=[], fields={}, options=None, **kwargs):
if options is None:
options = QueryOptions(**kwargs)
else:
options.update(kwargs)
- self.options = options
- self.options.validate()
- self.facet_by = facet_by
- self.facets = dict()
- self.query = QueryParser(query, terms, fields)
- self.query.validate()
- self._run()
- self._format_results()
- return {'results': self.results, 'count': self.count}
+
+ context = {'model': model, 'session': model.Session}
+ data_dict = {
+ 'query': query,
+ 'fields': fields,
+ 'offset': options.get('offset'),
+ 'limit': options.get('limit')
+ }
+ results = get_action('tag_search')(context, data_dict)
+
+ if not options.return_objects:
+ # if options.all_fields is set, return a dict
+ # if not, return a list of resource IDs
+ if options.all_fields:
+ results['results'] = [r.as_dict() for r in results['results']]
+ else:
+ results['results'] = [r.name for r in results['results']]
- def _run(self):
- raise SearchError("SearchQuery._run() not implemented!")
-
- def _db_query(self, q):
- # Run the query
- self.count = q.count()
- q = q.offset(self.options.get('offset'))
- q = q.limit(self.options.get('limit'))
-
- self.results = []
- for result in q:
- if isinstance(result, tuple) and isinstance(result[0], model.DomainObject):
- # This is the case for order_by rank due to the add_column.
- self.results.append(result[0])
- else:
- self.results.append(result)
-
- # convenience, allows to query(..)
- __call__ = run
-
-
-class TagSearchQuery(SearchQuery):
- """Search for tags in plain SQL."""
- def _run(self):
- q = model.Session.query(model.Tag)
- q = q.distinct().join(model.Tag.package_tags)
- terms = list(self.query.terms)
- for field, value in self.query.fields.items():
- if field in ('tag', 'tags'):
- terms.append(value)
- if not len(terms):
- return
- for term in terms:
- q = q.filter(model.Tag.name.contains(term.lower()))
- self._db_query(q)
+ self.count = results['count']
+ self.results = results['results']
+ return results
class ResourceSearchQuery(SearchQuery):
- """ Search for resources in plain SQL. """
- def _run(self):
- q = model.Session.query(model.Resource) # TODO authz
- if self.query.terms:
- raise SearchError('Only field specific terms allowed in resource search.')
- self.options.ref_entity_with_attr = 'id' # has no name
- resource_fields = model.Resource.get_columns()
- for field, terms in self.query.fields.items():
- if isinstance(terms, basestring):
- terms = terms.split()
- if field not in resource_fields:
- raise SearchError('Field "%s" not recognised in Resource search.' % field)
- for term in terms:
- model_attr = getattr(model.Resource, field)
- if field == 'hash':
- q = q.filter(model_attr.ilike(unicode(term) + '%'))
- elif field in model.Resource.get_extra_columns():
- model_attr = getattr(model.Resource, 'extras')
+ """Search for resources."""
+ def run(self, fields={}, options=None, **kwargs):
+ if options is None:
+ options = QueryOptions(**kwargs)
+ else:
+ options.update(kwargs)
- like = or_(
- model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' % (field, term)),
- model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term))
- )
- q = q.filter(like)
- else:
- q = q.filter(model_attr.ilike('%' + unicode(term) + '%'))
-
- order_by = self.options.order_by
- if order_by is not None:
- if hasattr(model.Resource, order_by):
- q = q.order_by(getattr(model.Resource, order_by))
- self._db_query(q)
+ context = {'model':model, 'session': model.Session}
+ data_dict = {
+ 'fields': fields,
+ 'offset': options.get('offset'),
+ 'limit': options.get('limit'),
+ 'order_by': options.get('order_by')
+ }
+ results = get_action('resource_search')(context, data_dict)
+
+ if not options.return_objects:
+ # if options.all_fields is set, return a dict
+ # if not, return a list of resource IDs
+ if options.all_fields:
+ results['results'] = [r.as_dict() for r in results['results']]
+ else:
+ results['results'] = [r.id for r in results['results']]
+
+ self.count = results['count']
+ self.results = results['results']
+ return results
class PackageSearchQuery(SearchQuery):
--- a/ckan/logic/action/get.py Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/logic/action/get.py Wed Sep 21 12:32:09 2011 +0100
@@ -21,7 +21,7 @@
group_to_api2,
tag_to_api1,
tag_to_api2)
-from ckan.lib.search import query_for
+from ckan.lib.search import query_for, SearchError
def site_read(context,data_dict=None):
check_access('site_read',context,data_dict)
@@ -500,16 +500,14 @@
check_access('tag_autocomplete', context, data_dict)
- q = data_dict.get('q',None)
+ q = data_dict.get('q', None)
if not q:
return []
limit = data_dict.get('limit',10)
- like_q = u"%s%%" % q
-
query = query_for('tag')
- query.run(query=like_q,
+ query.run(query=q,
return_objects=True,
limit=10,
username=user)
@@ -625,3 +623,84 @@
package_dict['isopen'] = False
return package_dict
+
+def resource_search(context, data_dict):
+ model = context['model']
+ session = context['session']
+
+ fields = data_dict['fields']
+ order_by = data_dict.get('order_by')
+ offset = data_dict.get('offset')
+ limit = data_dict.get('limit')
+
+ # TODO: should we check for user authentication first?
+ q = model.Session.query(model.Resource)
+ resource_fields = model.Resource.get_columns()
+
+ for field, terms in fields.items():
+ if isinstance(terms, basestring):
+ terms = terms.split()
+ if field not in resource_fields:
+ raise SearchError('Field "%s" not recognised in Resource search.' % field)
+ for term in terms:
+ model_attr = getattr(model.Resource, field)
+ if field == 'hash':
+ q = q.filter(model_attr.ilike(unicode(term) + '%'))
+ elif field in model.Resource.get_extra_columns():
+ model_attr = getattr(model.Resource, 'extras')
+
+ like = or_(
+ model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' % (field, term)),
+ model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term))
+ )
+ q = q.filter(like)
+ else:
+ q = q.filter(model_attr.ilike('%' + unicode(term) + '%'))
+
+ if order_by is not None:
+ if hasattr(model.Resource, order_by):
+ q = q.order_by(getattr(model.Resource, order_by))
+
+ count = q.count()
+ q = q.offset(offset)
+ q = q.limit(limit)
+
+ results = []
+ for result in q:
+ if isinstance(result, tuple) and isinstance(result[0], model.DomainObject):
+ # This is the case for order_by rank due to the add_column.
+ results.append(result[0])
+ else:
+ results.append(result)
+
+ return {'count': count, 'results': results}
+
+def tag_search(context, data_dict):
+ model = context['model']
+ session = context['session']
+
+ query = data_dict.get('query')
+ terms = [query] if query else []
+
+ fields = data_dict.get('fields', {})
+ offset = data_dict.get('offset')
+ limit = data_dict.get('limit')
+
+ # TODO: should we check for user authentication first?
+ q = model.Session.query(model.Tag)
+ q = q.distinct().join(model.Tag.package_tags)
+ for field, value in fields.items():
+ if field in ('tag', 'tags'):
+ terms.append(value)
+
+ if not len(terms):
+ return
+
+ for term in terms:
+ q = q.filter(model.Tag.name.contains(term.lower()))
+
+ count = q.count()
+ q = q.offset(offset)
+ q = q.limit(limit)
+ results = [r for r in q]
+ return {'count': count, 'results': results}
--- a/ckan/tests/functional/api/test_action.py Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/tests/functional/api/test_action.py Wed Sep 21 12:32:09 2011 +0100
@@ -459,7 +459,6 @@
postparams = '%s=1' % json.dumps({'q':'r'})
res = self.app.post('/api/action/tag_autocomplete', params=postparams)
res_obj = json.loads(res.body)
- print res_obj
assert res_obj == {
'help': 'Returns tags containing the provided string',
'result': ['russian'],
--- a/ckan/tests/lib/test_resource_search.py Tue Sep 20 19:32:12 2011 +0100
+++ b/ckan/tests/lib/test_resource_search.py Wed Sep 21 12:32:09 2011 +0100
@@ -148,7 +148,7 @@
resources = result['results']
count = result['count']
assert len(resources) == 2, resources
- assert count == all_resource_count
+ assert count == all_resource_count, (count, all_resource_count)
assert resources == all_resources[:2], '%r, %r' % (resources, all_resources)
# offset
@@ -182,5 +182,3 @@
# can't be searched
fields = {'size_extra':'100'}
assert_raises(search.SearchError, search.query_for(model.Resource).run, fields=fields)
-
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ckan/tests/lib/test_tag_search.py Wed Sep 21 12:32:09 2011 +0100
@@ -0,0 +1,45 @@
+from nose.tools import assert_raises
+from ckan.tests import *
+from ckan.tests import is_search_supported
+import ckan.lib.search as search
+from ckan import model
+from ckan.lib.create_test_data import CreateTestData
+
+class TestTagSearch(object):
+ @classmethod
+ def setup_class(self):
+ if not is_search_supported():
+ raise SkipTest("Search not supported")
+ CreateTestData.create()
+
+ @classmethod
+ def teardown_class(self):
+ model.repo.rebuild_db()
+
+ def test_good_search_query(self):
+ result = search.query_for(model.Tag).run(query=u'ru')
+ assert result['count'] == 1, result
+ assert 'russian' in result['results'], result
+
+ result = search.query_for(model.Tag).run(query=u's')
+ assert result['count'] == 2, result
+ assert 'russian' in result['results'], result
+ assert 'tolstoy' in result['results'], result
+
+ def test_bad_search_query(self):
+ result = search.query_for(model.Tag).run(query=u'asdf')
+ assert result['count'] == 0, result
+
+ def test_good_search_fields(self):
+ result = search.query_for(model.Tag).run(fields={'tags': u'ru'})
+ assert result['count'] == 1, result
+ assert 'russian' in result['results'], result
+
+ result = search.query_for(model.Tag).run(fields={'tags': u's'})
+ assert result['count'] == 2, result
+ assert 'russian' in result['results'], result
+ assert 'tolstoy' in result['results'], result
+
+ def test_bad_search_fields(self):
+ result = search.query_for(model.Tag).run(fields={'tags': u'asdf'})
+ assert result['count'] == 0, result
Repository URL: https://bitbucket.org/okfn/ckan/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the ckan-changes
mailing list