[ckan-changes] commit/ckan: 5 new changesets

Bitbucket commits-noreply at bitbucket.org
Thu Aug 18 13:03:35 UTC 2011


5 new changesets in ckan:

http://bitbucket.org/okfn/ckan/changeset/13d4c5b081d3/
changeset:   13d4c5b081d3
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-18 12:07:57
summary:     [solr] Move search indexing code to index.py
affected #:  3 files (7.1 KB)

--- a/ckan/lib/search/__init__.py	Wed Aug 17 15:45:52 2011 +0100
+++ b/ckan/lib/search/__init__.py	Thu Aug 18 11:07:57 2011 +0100
@@ -1,9 +1,13 @@
 import logging
 import pkg_resources
 from pylons import config
-from common import QueryOptions, SearchError, SearchQuery, SearchBackend, SearchIndex
-from solr_backend import SolrSearchBackend
-from worker import dispatch_by_operation
+from ckan import model
+from ckan.model import DomainObjectOperation
+from ckan.plugins import SingletonPlugin, implements, IDomainObjectModification
+from ckan.lib.dictization.model_dictize import package_to_api1
+from common import QueryOptions, SearchQuery, SearchBackend, SearchIndex
+# from solr_backend import SolrSearchBackend
+from index import PackageSearchIndex, NoopSearchIndex
 
 log = logging.getLogger(__name__)
 
@@ -21,29 +25,68 @@
     'callback': None, # simply passed through
     }
 
-# TODO make sure all backends are thread-safe! 
-INSTANCE_CACHE = {}
+_INDICES = {
+    'package': PackageSearchIndex
+}
 
-def get_backend(backend=None):
-    if backend is None:
-        backend = config.get('search_backend', 'sql')
-    klass = None
-    for ep in pkg_resources.iter_entry_points("ckan.search", backend.strip().lower()):
-        klass = ep.load()
-    if klass is None:
-        raise KeyError("No search backend called %s" % (backend,))
-    if not klass in INSTANCE_CACHE.keys():
-        log.debug("Creating search backend: %s" % klass.__name__)
-        INSTANCE_CACHE[klass] = klass()
-    return INSTANCE_CACHE.get(klass)
+def _normalize_type(_type):
+    if isinstance(_type, model.DomainObject):
+        _type = _type.__class__
+    if isinstance(_type, type):
+        _type = _type.__name__
+    return _type.strip().lower()
+
+def index_for(_type):
+    """ Get a SearchIndex instance sub-class suitable for the specified type. """
+    try:
+        _type_n = _normalize_type(_type)
+        return _INDICES[_type_n]()
+    except KeyError, ke:
+        log.warn("Unknown search type: %s" % _type)
+        return NoopSearchIndex()
+
+def query_for(_type):
+    """ Query for entities of a specified type (name, class, instance). """
+    raise Exception("NotYetImplemented")
+
+def dispatch_by_operation(entity_type, entity, operation):
+    """Call the appropriate index method for a given notification."""
+    try:
+        index = index_for(entity_type)
+        if operation == DomainObjectOperation.new:
+            index.insert_dict(entity)
+        elif operation == DomainObjectOperation.changed:
+            index.update_dict(entity)
+        elif operation == DomainObjectOperation.deleted:
+            index.remove_dict(entity)
+        else:
+            log.warn("Unknown operation: %s" % operation)
+    except Exception, ex:
+        log.exception(ex)
+
+class SearchError(Exception): pass
+
+class SynchronousSearchPlugin(SingletonPlugin):
+    """Update the search index automatically."""
+    implements(IDomainObjectModification, inherit=True)
+
+    def notify(self, entity, operation):
+        if operation != DomainObjectOperation.deleted:
+            dispatch_by_operation(entity.__class__.__name__, 
+                                  package_to_api1(entity, {'model': model}),
+                                  operation)
+        elif operation == DomainObjectOperation.deleted:
+            dispatch_by_operation(entity.__class__.__name__, 
+                                  {'id': entity.id}, operation)
+        else:
+            log.warn("Discarded Sync. indexing for: %s" % entity)
 
 def rebuild():
     from ckan import model
-    backend = get_backend()
     log.debug("Rebuilding search index...")
     
     # Packages
-    package_index = backend.index_for(model.Package)
+    package_index = index_for(model.Package)
     package_index.clear()
     for pkg in model.Session.query(model.Package).all():
         package_index.insert_entity(pkg)
@@ -51,8 +94,7 @@
 
 def check():
     from ckan import model
-    backend = get_backend()
-    package_index = backend.index_for(model.Package)
+    package_index = index_for(model.Package)
 
     log.debug("Checking packages search index...")
     pkgs_q = model.Session.query(model.Package).filter_by(state=model.State.ACTIVE)
@@ -66,18 +108,11 @@
 
 def show(package_reference):
     from ckan import model
-    backend = get_backend()
-    package_index = backend.index_for(model.Package)
+    package_index = index_for(model.Package)
     print package_index.get_index(package_reference)
 
 def clear():
     from ckan import model
-    backend = get_backend()
     log.debug("Clearing search index...")
-    package_index = backend.index_for(model.Package)
+    package_index = index_for(model.Package)
     package_index.clear()
-
-def query_for(_type, backend=None):
-    """ Query for entities of a specified type (name, class, instance). """
-    return get_backend(backend=backend).query_for(_type)
-


--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ckan/lib/search/index.py	Thu Aug 18 11:07:57 2011 +0100
@@ -0,0 +1,161 @@
+from pylons import config
+import itertools
+import string
+from solr import SolrConnection # == solrpy
+import logging
+log = logging.getLogger(__name__)
+
+TYPE_FIELD = "entity_type"
+PACKAGE_TYPE = "package"
+KEY_CHARS = string.digits + string.letters + "_-"
+SOLR_FIELDS = [TYPE_FIELD, "res_url", "text", "urls", "indexed_ts", "site_id"]
+RESERVED_FIELDS = SOLR_FIELDS + ["tags", "groups", "res_description", 
+                                 "res_format", "res_url"]
+# HACK: this is copied over from model.PackageRelationship 
+RELATIONSHIP_TYPES = [
+    (u'depends_on', u'dependency_of'),
+    (u'derives_from', u'has_derivation'),
+    (u'links_to', u'linked_from'),
+    (u'child_of', u'parent_of'),
+]
+
+def make_connection(config):
+    url = config.get('solr_url', 'http://localhost:8983/solr')
+    user = config.get('solr_user')
+    password = config.get('solr_password')
+
+    if user is not None and password is not None:
+        return SolrConnection(url, http_user=user, http_pass=password)
+    else:
+        return SolrConnection(url)
+    
+def clear_index(config):
+    conn = make_connection(config)
+    query = "+site_id:\"%s\"" % (config.get('ckan.site_id'))
+    try:
+        conn.delete_query(query)
+        conn.commit()
+    finally:
+        conn.close()
+
+class SearchIndex(object):
+    """ 
+    A search index handles the management of documents of a specific type in the 
+    index, but no queries. 
+    The default implementation maps many of the methods, so most subclasses will 
+    only have to implement ``update_dict`` and ``remove_dict``. 
+    """    
+    
+    def __init__(self):
+        pass
+    
+    def insert_dict(self, data):
+        """ Insert new data from a dictionary. """
+        return self.update_dict(data)
+        
+    def insert_entity(self, entity):
+        """ Insert new data from a domain object. """
+        return self.insert_dict(entity.as_dict())
+    
+    def update_dict(self, data):
+        """ Update data from a dictionary. """
+        log.debug("NOOP Index: %s" % ",".join(data.keys()))
+    
+    def update_entity(self, entity):
+        """ Update data from a domain object. """
+        # in convention we trust:
+        return self.update_dict(entity.as_dict())
+    
+    def remove_dict(self, data):
+        """ Delete an index entry uniquely identified by ``data``. """
+        log.debug("NOOP Delete: %s" % ",".join(data.keys()))
+        
+    def remove_entity(self, entity):
+        """ Delete ``entity``. """
+        return self.remove_dict(entity.as_dict())
+        
+    def clear(self):
+        """ Delete the complete index. """
+        clear_index(config)
+
+    def get_all_entity_ids(self):
+        """ Return a list of entity IDs in the index. """
+        raise NotImplemented
+        
+class NoopSearchIndex(SearchIndex): pass
+
+class PackageSearchIndex(SearchIndex):
+    def remove_dict(self, pkg_dict):
+        self.delete_package(pkg_dict, config)
+    
+    def update_dict(self, pkg_dict):
+        self.index_package(pkg_dict, config)
+
+    def index_package(self, pkg_dict, config):
+        if pkg_dict is None:  
+            return 
+        if (not pkg_dict.get('state')) or ('active' not in pkg_dict.get('state')):
+            return self.delete_package(pkg_dict, config)
+        conn = make_connection(config)
+        index_fields = RESERVED_FIELDS + pkg_dict.keys()
+            
+        # include the extras in the main namespace
+        extras = pkg_dict.get('extras', {})
+        for (key, value) in extras.items():
+            if isinstance(value, (tuple, list)):
+                value = " ".join(map(unicode, value))
+            key = ''.join([c for c in key if c in KEY_CHARS])
+            pkg_dict['extras_' + key] = value
+            if key not in index_fields:
+                pkg_dict[key] = value
+        if 'extras' in pkg_dict:
+            del pkg_dict['extras']
+
+        # flatten the structure for indexing: 
+        for resource in pkg_dict.get('resources', []):
+            for (okey, nkey) in [('description', 'res_description'),
+                                 ('format', 'res_format'),
+                                 ('url', 'res_url')]:
+                pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')]
+        if 'resources' in pkg_dict:
+            del pkg_dict['resources']
+        
+        # index relationships as <type>:<object>
+        rel_dict = {}
+        rel_types = list(itertools.chain(RELATIONSHIP_TYPES))
+        for rel in pkg_dict.get('relationships', []):
+            _type = rel.get('type', 'rel')
+            if (_type in pkg_dict.keys()) or (_type not in rel_types): 
+                continue
+            rel_dict[_type] = rel_dict.get(_type, []) + [rel.get('object')]
+        
+        pkg_dict.update(rel_dict)
+        
+        if 'relationships' in pkg_dict:
+            del pkg_dict['relationships']
+
+        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE
+        pkg_dict = dict([(k.encode('ascii', 'ignore'), v) for (k, v) in pkg_dict.items()])
+        
+        # mark this CKAN instance as data source:
+        pkg_dict['site_id'] = config.get('ckan.site_id')
+        
+        # send to solr:  
+        try:
+            conn.add_many([pkg_dict])
+            conn.commit(wait_flush=False, wait_searcher=False)
+        finally:
+            conn.close()  
+        
+        log.debug("Updated index for %s" % pkg_dict.get('name'))
+
+    def delete_package(self, pkg_dict, config):
+        conn = make_connection(config)
+        query = "+%s:%s +id:\"%s\" +site_id:\"%s\"" % (TYPE_FIELD, PACKAGE_TYPE,
+                                                       pkg_dict.get('id'),
+                                                       config.get('ckan.site_id'))
+        try:
+            conn.delete_query(query)
+            conn.commit()
+        finally:
+            conn.close()


--- a/ckan/tests/lib/test_solr_search_index.py	Wed Aug 17 15:45:52 2011 +0100
+++ b/ckan/tests/lib/test_solr_search_index.py	Thu Aug 18 11:07:57 2011 +0100
@@ -12,17 +12,13 @@
     def setup_class(cls):
         config['search_backend'] = 'solr'
 
-    def test_solr_backend_returned(self):
-        assert isinstance(search.get_backend(), search.SolrSearchBackend),\
-            search.get_backend()
-
     def test_solr_url_exists(self):
         assert config.get('solr_url')
         # solr.SolrConnection will throw an exception if it can't connect
         solr.SolrConnection(config.get('solr_url'))
 
 
-class TestSearchIndex(TestController):
+class TestSolrSearchIndex(TestController):
     """
     Tests that a package is indexed when the packagenotification is
     received by the indexer.
@@ -41,7 +37,7 @@
 
     def teardown(self):
         # clear the search index after every test
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
 
     def test_index(self):
         pkg_dict = {
@@ -71,7 +67,7 @@
         search.dispatch_by_operation('Package', pkg_dict, 'new')
         response = self.solr.query('title:penguin', fq=self.fq)
         assert len(response) == 1, len(response)
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
         response = self.solr.query('title:penguin', fq=self.fq)
         assert len(response) == 0
 
@@ -88,7 +84,7 @@
     def teardown_class(cls):
         model.repo.rebuild_db()
         cls.solr.close()
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
 
     def test_0_indexing(self):
         """


http://bitbucket.org/okfn/ckan/changeset/bbc7508b7047/
changeset:   bbc7508b7047
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-18 14:41:00
summary:     [solr] move solr connection function to comon.py
affected #:  2 files (10.0 KB)

--- a/ckan/lib/search/common.py	Thu Aug 18 11:07:57 2011 +0100
+++ b/ckan/lib/search/common.py	Thu Aug 18 13:41:00 2011 +0100
@@ -1,296 +1,15 @@
+from solr import SolrConnection
 import logging
-
-from paste.util.multidict import MultiDict 
-from paste.deploy.converters import asbool
-from ckan import model
-
 log = logging.getLogger(__name__)
 
 class SearchError(Exception): pass
 
-class SearchBackend(object):
-    """
-    A search backend describes the engine used to actually maintain data. This can be 
-    something like Solr, Xapian, or just a mapping onto SQL queries. 
-    
-    The backend stores a mapping of ``SearchIndex``, ``SearchQuery`` pairs for all 
-    entity types that are supposed to be queried using this engine. 
-    
-    Entity types can be given as classes, objects or strings that uniquely identify a 
-    ``DomainObject`` type used in CKAN.
-    """
-    
-    def __init__(self):
-        self._typed_queries = {}
-        self._typed_indices = {}
-        self._setup()
-        
-    def _setup(self):
-        """ This method is overridden by subclasses to actually register handlers """
-        pass
-    
-    def _normalize_type(self, _type):
-        if isinstance(_type, model.DomainObject):
-            _type = _type.__class__
-        if isinstance(_type, type):
-            _type = _type.__name__
-        return _type.strip().lower()
-    
-    def register(self, _type, index_class, query_class):
-        """ Register a type by setting both query and index classes. """
-        _type = self._normalize_type(_type)
-        self._typed_queries[_type] = query_class
-        self._typed_indices[_type] = index_class
-        
-    def unregister(self, _type):
-        """ TODO: Find out what would possibly use this. """
-        _type = self._normalize_type(_type)
-        if _type in self._typed_queries:
-            del self._typed_queries[_type]
-        if _type in self._typed_indices:
-            del self._typed_indices[_type]
-    
-    def query_for(self, _type):
-        """ Get a SearchQuery instance sub-class suitable for the specified type. """
-        try:
-            _type_n = self._normalize_type(_type)
-            return self._typed_queries[_type_n](self)
-        except KeyError, ke:
-            raise SearchError("Unknown search type: %s" % _type)
-            
-    def index_for(self, _type):
-        """ Get a SearchIndex instance sub-class suitable for the specified type. """
-        try:
-            _type_n = self._normalize_type(_type)
-            return self._typed_indices[_type_n](self)
-        except KeyError, ke:
-            log.warn("Unknown search type: %s" % _type)
-            return NoopSearchIndex(self)
-            
-    def types(self):
-        return self._typed_queries.keys()
-            
+def make_connection(config):
+    url = config.get('solr_url', 'http://localhost:8983/solr')
+    user = config.get('solr_user')
+    password = config.get('solr_password')
 
-class SearchQuery(object):
-    """
-    A query is ... when you ask the search engine things. SearchQuery is intended 
-    to be used for only one query, i.e. it sets state. Definitely not thread-safe.
-    """
-    
-    def __init__(self, backend):
-        self.backend = backend
-        self.results = []
-        self.count = 0
-    
-    @property
-    def open_licenses(self):
-        # backend isn't exactly the very best place to put these, but they stay
-        # there persistently. 
-        # TODO: figure out if they change during run-time. 
-        if not hasattr(self.backend, '_open_licenses'):
-            self.backend._open_licenses = []
-            for license in model.Package.get_license_register().values():
-                if license and license.isopen():
-                    self.backend._open_licenses.append(license.id)
-        return self.backend._open_licenses
-    
-    def _format_results(self):
-        if not self.options.return_objects and len(self.results):
-            if self.options.all_fields:
-                self.results = [r.as_dict() for r in self.results]
-            else:
-                attr_name = self.options.ref_entity_with_attr
-                self.results = [getattr(entity, attr_name) for entity in self.results]
-    
-    def run(self, query=None, terms=[], fields={}, facet_by=[], options=None, **kwargs):
-        if options is None:
-            options = QueryOptions(**kwargs) 
-        else:
-            options.update(kwargs)
-        self.options = options
-        self.options.validate()
-        self.facet_by = facet_by
-        self.facets = dict()
-        self.query = QueryParser(query, terms, fields)
-        self.query.validate()
-        self._run()
-        self._format_results()
-        return {'results': self.results, 'count': self.count}
-        
-    def _run(self):
-        raise SearchError("SearchQuery._run() not implemented!")
-        
-    # convenience, allows to query(..)
-    __call__ = run
-
-
-class QueryOptions(dict):
-    """
-    Options specify aspects of the search query which are only tangentially related 
-    to the query terms (such as limits, etc.).
-    """
-    
-    BOOLEAN_OPTIONS = ['filter_by_downloadable', 'filter_by_openness', 'all_fields']
-    INTEGER_OPTIONS = ['offset', 'limit']
-
-    def __init__(self, **kwargs):
-        from ckan.lib.search import DEFAULT_OPTIONS
-        
-        # set values according to the defaults
-        for option_name, default_value in DEFAULT_OPTIONS.items():
-            if not option_name in self:
-                self[option_name] = default_value
-        
-        super(QueryOptions, self).__init__(**kwargs)
-    
-    def validate(self):
-        for key, value in self.items():
-            if key in self.BOOLEAN_OPTIONS:
-                try:
-                    value = asbool(value)
-                except ValueError:
-                    raise SearchError('Value for search option %r must be True or False (1 or 0) but received %r' % (key, value))
-            elif key in self.INTEGER_OPTIONS:
-                try:
-                    value = int(value)
-                except ValueError:
-                    raise SearchError('Value for search option %r must be an integer but received %r' % (key, value))
-            self[key] = value    
-    
-    def __getattr__(self, name):
-        return self.get(name)
-        
-    def __setattr__(self, name, value):
-        self[name] = value
-
-
-class QueryParser(object):
-    """
-    The query parser will take any incoming query specifications and turn 
-    them into field-specific and general query parts. 
-    """
-    
-    def __init__(self, query, terms, fields):
-        self._query = query
-        self._terms = terms
-        self._fields = MultiDict(fields)
-    
-    @property    
-    def query(self):
-        if not hasattr(self, '_combined_query'):
-            parts = [self._query if self._query is not None else '']
-            
-            for term in self._terms:
-                if term.find(u' ') != -1:
-                    term = u"\"%s\"" % term
-                parts.append(term.strip())
-                
-            for field, value in self._fields.items():
-                if value.find(' ') != -1:
-                    value = u"\"%s\"" % value
-                parts.append(u"%s:%s" % (field.strip(), value.strip()))
-                
-            self._combined_query = u' '.join(parts)
-        return self._combined_query
-    
-    def _query_tokens(self):
-        """ Split the query string, leaving quoted strings intact. """
-        if self._query:
-            inside_quote = False
-            buf = u''
-            for ch in self._query:
-                if ch == u' ' and not inside_quote:
-                    if len(buf):
-                        yield buf.strip()
-                    buf = u''
-                elif ch == inside_quote:
-                    inside_quote = False
-                elif ch in [u"\"", u"'"]:
-                    inside_quote = ch
-                else:
-                    buf += ch
-            if len(buf):
-                yield buf.strip()
-    
-    def _parse_query(self):
-        """ Decompose the query string into fields and terms. """
-        self._combined_fields = MultiDict(self._fields)
-        self._combined_terms = list(self._terms)
-        for token in self._query_tokens():
-            colon_pos = token.find(u':')
-            if colon_pos != -1:
-                field = token[:colon_pos]
-                value = token[colon_pos+1:]
-                value = value.strip('"').strip("'").strip()
-                self._combined_fields.add(field, value)
-            else:
-                self._combined_terms.append(token)
-    
-    @property
-    def fields(self):
-        if not hasattr(self, '_combined_fields'):
-            self._parse_query()
-        return self._combined_fields
-    
-    @property
-    def terms(self):
-        if not hasattr(self, '_combined_terms'):
-            self._parse_query()
-        return self._combined_terms
-    
-    def validate(self):
-        """ Check that this is a valid query. """
-        pass
-    
-    def __str__(self):
-        return self.query
-        
-    def __repr__(self):
-        return "Query(%r)" % self.query
-
-
-class SearchIndex(object):
-    """ 
-    A search index handles the management of documents of a specific type in the 
-    index, but no queries. 
-    The default implementation maps many of the methods, so most subclasses will 
-    only have to implement ``update_dict`` and ``remove_dict``. 
-    """    
-    
-    def __init__(self, backend):
-        self.backend = backend
-    
-    def insert_dict(self, data):
-        """ Insert new data from a dictionary. """
-        return self.update_dict(data)
-        
-    def insert_entity(self, entity):
-        """ Insert new data from a domain object. """
-        return self.insert_dict(entity.as_dict())
-    
-    def update_dict(self, data):
-        """ Update data from a dictionary. """
-        log.debug("NOOP Index: %s" % ",".join(data.keys()))
-    
-    def update_entity(self, entity):
-        """ Update data from a domain object. """
-        # in convention we trust:
-        return self.update_dict(entity.as_dict())
-    
-    def remove_dict(self, data):
-        """ Delete an index entry uniquely identified by ``data``. """
-        log.debug("NOOP Delete: %s" % ",".join(data.keys()))
-        
-    def remove_entity(self, entity):
-        """ Delete ``entity``. """
-        return self.remove_dict(entity.as_dict())
-        
-    def clear(self):
-        """ Delete the complete index. """
-        log.debug("NOOP Index reset")
-
-    def get_all_entity_ids(self):
-        """ Return a list of entity IDs in the index. """
-        raise NotImplemented
-        
-class NoopSearchIndex(SearchIndex): pass
+    if user is not None and password is not None:
+        return SolrConnection(url, http_user=user, http_pass=password)
+    else:
+        return SolrConnection(url)


--- a/ckan/lib/search/index.py	Thu Aug 18 11:07:57 2011 +0100
+++ b/ckan/lib/search/index.py	Thu Aug 18 13:41:00 2011 +0100
@@ -1,7 +1,7 @@
 from pylons import config
 import itertools
 import string
-from solr import SolrConnection # == solrpy
+from common import make_connection
 import logging
 log = logging.getLogger(__name__)
 
@@ -19,16 +19,6 @@
     (u'child_of', u'parent_of'),
 ]
 
-def make_connection(config):
-    url = config.get('solr_url', 'http://localhost:8983/solr')
-    user = config.get('solr_user')
-    password = config.get('solr_password')
-
-    if user is not None and password is not None:
-        return SolrConnection(url, http_user=user, http_pass=password)
-    else:
-        return SolrConnection(url)
-    
 def clear_index(config):
     conn = make_connection(config)
     query = "+site_id:\"%s\"" % (config.get('ckan.site_id'))


http://bitbucket.org/okfn/ckan/changeset/bb0c85b88b44/
changeset:   bb0c85b88b44
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-18 14:42:11
summary:     [solr] update synchronous search plugin entry point
affected #:  1 file (7 bytes)

--- a/setup.py	Thu Aug 18 13:41:00 2011 +0100
+++ b/setup.py	Thu Aug 18 13:42:11 2011 +0100
@@ -84,7 +84,7 @@
     solr = ckan.lib.search.solr_backend:SolrSearchBackend
 
     [ckan.plugins]
-    synchronous_search = ckan.lib.search.worker:SynchronousSearchPlugin
+    synchronous_search = ckan.lib.search:SynchronousSearchPlugin
 
     [ckan.system_plugins]
     domain_object_mods = ckan.model.modification:DomainObjectModificationExtension


http://bitbucket.org/okfn/ckan/changeset/a522091287c7/
changeset:   a522091287c7
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-18 14:42:59
summary:     [solr] move all search query code to query.py, remove references to get_backend()
affected #:  7 files (9.2 KB)

--- a/ckan/lib/search/__init__.py	Thu Aug 18 13:42:11 2011 +0100
+++ b/ckan/lib/search/__init__.py	Thu Aug 18 13:42:59 2011 +0100
@@ -1,13 +1,11 @@
 import logging
-import pkg_resources
-from pylons import config
 from ckan import model
 from ckan.model import DomainObjectOperation
 from ckan.plugins import SingletonPlugin, implements, IDomainObjectModification
 from ckan.lib.dictization.model_dictize import package_to_api1
-from common import QueryOptions, SearchQuery, SearchBackend, SearchIndex
-# from solr_backend import SolrSearchBackend
+from common import SearchError
 from index import PackageSearchIndex, NoopSearchIndex
+from query import PackageSearchQuery, QueryOptions
 
 log = logging.getLogger(__name__)
 
@@ -29,6 +27,10 @@
     'package': PackageSearchIndex
 }
 
+_QUERIES = {
+    'package': PackageSearchQuery
+}
+
 def _normalize_type(_type):
     if isinstance(_type, model.DomainObject):
         _type = _type.__class__
@@ -45,9 +47,13 @@
         log.warn("Unknown search type: %s" % _type)
         return NoopSearchIndex()
 
-def query_for(_type):
-    """ Query for entities of a specified type (name, class, instance). """
-    raise Exception("NotYetImplemented")
+def query_for( _type):
+    """ Get a SearchQuery instance sub-class suitable for the specified type. """
+    try:
+        _type_n = _normalize_type(_type)
+        return _QUERIES[_type_n]()
+    except KeyError, ke:
+        raise SearchError("Unknown search type: %s" % _type)
 
 def dispatch_by_operation(entity_type, entity, operation):
     """Call the appropriate index method for a given notification."""
@@ -64,8 +70,6 @@
     except Exception, ex:
         log.exception(ex)
 
-class SearchError(Exception): pass
-
 class SynchronousSearchPlugin(SingletonPlugin):
     """Update the search index automatically."""
     implements(IDomainObjectModification, inherit=True)


--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ckan/lib/search/query.py	Thu Aug 18 13:42:59 2011 +0100
@@ -0,0 +1,248 @@
+from pylons import config
+from paste.util.multidict import MultiDict 
+from paste.deploy.converters import asbool
+from ckan import model
+from ckan.authz import Authorizer
+from common import make_connection, SearchError
+import logging
+log = logging.getLogger(__name__)
+
+_open_licenses = None
+
+class QueryOptions(dict):
+    """
+    Options specify aspects of the search query which are only tangentially related 
+    to the query terms (such as limits, etc.).
+    """
+    
+    BOOLEAN_OPTIONS = ['filter_by_downloadable', 'filter_by_openness', 'all_fields']
+    INTEGER_OPTIONS = ['offset', 'limit']
+
+    def __init__(self, **kwargs):
+        from ckan.lib.search import DEFAULT_OPTIONS
+        
+        # set values according to the defaults
+        for option_name, default_value in DEFAULT_OPTIONS.items():
+            if not option_name in self:
+                self[option_name] = default_value
+        
+        super(QueryOptions, self).__init__(**kwargs)
+    
+    def validate(self):
+        for key, value in self.items():
+            if key in self.BOOLEAN_OPTIONS:
+                try:
+                    value = asbool(value)
+                except ValueError:
+                    raise SearchError('Value for search option %r must be True or False (1 or 0) but received %r' % (key, value))
+            elif key in self.INTEGER_OPTIONS:
+                try:
+                    value = int(value)
+                except ValueError:
+                    raise SearchError('Value for search option %r must be an integer but received %r' % (key, value))
+            self[key] = value    
+    
+    def __getattr__(self, name):
+        return self.get(name)
+        
+    def __setattr__(self, name, value):
+        self[name] = value
+
+
+class QueryParser(object):
+    """
+    The query parser will take any incoming query specifications and turn 
+    them into field-specific and general query parts. 
+    """
+    
+    def __init__(self, query, terms, fields):
+        self._query = query
+        self._terms = terms
+        self._fields = MultiDict(fields)
+    
+    @property    
+    def query(self):
+        if not hasattr(self, '_combined_query'):
+            parts = [self._query if self._query is not None else '']
+            
+            for term in self._terms:
+                if term.find(u' ') != -1:
+                    term = u"\"%s\"" % term
+                parts.append(term.strip())
+                
+            for field, value in self._fields.items():
+                if value.find(' ') != -1:
+                    value = u"\"%s\"" % value
+                parts.append(u"%s:%s" % (field.strip(), value.strip()))
+                
+            self._combined_query = u' '.join(parts)
+        return self._combined_query
+    
+    def _query_tokens(self):
+        """ Split the query string, leaving quoted strings intact. """
+        if self._query:
+            inside_quote = False
+            buf = u''
+            for ch in self._query:
+                if ch == u' ' and not inside_quote:
+                    if len(buf):
+                        yield buf.strip()
+                    buf = u''
+                elif ch == inside_quote:
+                    inside_quote = False
+                elif ch in [u"\"", u"'"]:
+                    inside_quote = ch
+                else:
+                    buf += ch
+            if len(buf):
+                yield buf.strip()
+    
+    def _parse_query(self):
+        """ Decompose the query string into fields and terms. """
+        self._combined_fields = MultiDict(self._fields)
+        self._combined_terms = list(self._terms)
+        for token in self._query_tokens():
+            colon_pos = token.find(u':')
+            if colon_pos != -1:
+                field = token[:colon_pos]
+                value = token[colon_pos+1:]
+                value = value.strip('"').strip("'").strip()
+                self._combined_fields.add(field, value)
+            else:
+                self._combined_terms.append(token)
+    
+    @property
+    def fields(self):
+        if not hasattr(self, '_combined_fields'):
+            self._parse_query()
+        return self._combined_fields
+    
+    @property
+    def terms(self):
+        if not hasattr(self, '_combined_terms'):
+            self._parse_query()
+        return self._combined_terms
+    
+    def validate(self):
+        """ Check that this is a valid query. """
+        pass
+    
+    def __str__(self):
+        return self.query
+        
+    def __repr__(self):
+        return "Query(%r)" % self.query
+
+
+class SearchQuery(object):
+    """
+    A query is ... when you ask the search engine things. SearchQuery is intended 
+    to be used for only one query, i.e. it sets state. Definitely not thread-safe.
+    """
+    
+    def __init__(self):
+        self.results = []
+        self.count = 0
+    
+    @property
+    def open_licenses(self):
+        # this isn't exactly the very best place to put these, but they stay
+        # there persistently. 
+        # TODO: figure out if they change during run-time. 
+        global _open_licenses
+        if not isinstance(_open_licenses, list):
+            _open_licenses = []
+            for license in model.Package.get_license_register().values():
+                if license and license.isopen():
+                    _open_licenses.append(license.id)
+        return _open_licenses
+    
+    def _format_results(self):
+        if not self.options.return_objects and len(self.results):
+            if self.options.all_fields:
+                self.results = [r.as_dict() for r in self.results]
+            else:
+                attr_name = self.options.ref_entity_with_attr
+                self.results = [getattr(entity, attr_name) for entity in self.results]
+    
+    def run(self, query=None, terms=[], fields={}, facet_by=[], options=None, **kwargs):
+        if options is None:
+            options = QueryOptions(**kwargs) 
+        else:
+            options.update(kwargs)
+        self.options = options
+        self.options.validate()
+        self.facet_by = facet_by
+        self.facets = dict()
+        self.query = QueryParser(query, terms, fields)
+        self.query.validate()
+        self._run()
+        self._format_results()
+        return {'results': self.results, 'count': self.count}
+        
+    def _run(self):
+        raise SearchError("SearchQuery._run() not implemented!")
+        
+    # convenience, allows to query(..)
+    __call__ = run
+
+
+class PackageSearchQuery(SearchQuery):
+    def _run(self):
+        fq = ""
+
+        # Filter for options
+        if self.options.filter_by_downloadable:
+            fq += u" +res_url:[* TO *] " # not null resource URL 
+        if self.options.filter_by_openness:
+            licenses = ["license_id:%s" % id for id in self.open_licenses]
+            licenses = " OR ".join(licenses)
+            fq += " +(%s) " % licenses
+        
+        order_by = self.options.order_by
+        if order_by == 'rank' or order_by is None: 
+            order_by = 'score'
+
+        # show only results from this CKAN instance:
+        fq = fq + " +site_id:\"%s\" " % config.get('ckan.site_id')
+
+        # Filter for package status       
+        fq += "+state:active "
+            
+        # configurable for iati: full options list
+        facet_limit = int(config.get('search.facets.limit', '50'))
+
+        # query
+        query = self.query.query
+        if (not query) or (not query.strip()):
+            # no query terms, i.e. all documents
+            query = '*:*'
+        
+        conn = make_connection(config)
+        try:
+            data = conn.query(query,
+                              fq=fq, 
+                              # make sure data.facet_counts is set:
+                              facet='true',
+                              facet_limit=facet_limit,
+                              facet_field=self.facet_by,
+                              facet_mincount=1,
+                              start=self.options.offset, 
+                              rows=self.options.limit,
+                              fields='id,score', 
+                              sort_order='desc', 
+                              sort=order_by)
+            
+        except Exception, e:
+            # this wrapping will be caught further up in the WUI.
+            log.exception(e)
+            raise SearchError(e)
+        finally:
+            conn.close()
+        
+        self.count = int(data.numFound)
+        scores = dict([(r.get('id'), r.get('score')) for r in data.results])
+        q = Authorizer().authorized_query(self.options.username, model.Package)
+        q = q.filter(model.Package.id.in_(scores.keys()))
+        self.facets = data.facet_counts.get('facet_fields', {})
+        self.results = sorted(q, key=lambda r: scores[r.id], reverse=True)


--- a/ckan/lib/search/solr_backend.py	Thu Aug 18 13:42:11 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-from pylons import config
-from ckan.lib.search import SearchBackend, SearchQuery, SearchIndex, \
-    SearchError
-from ckan.authz import Authorizer
-from ckan import model
-from solr_indexing import make_connection, index_package, delete_package, \
-    clear_index
-import logging
-log = logging.getLogger(__name__)
-
-
-class SolrSearchBackend(SearchBackend):
-    
-    def _setup(self):
-        self.register(model.Package.__name__, PackageSolrSearchIndex, PackageSolrSearchQuery)    
-
-class PackageSolrSearchQuery(SearchQuery):
-    
-    def _run(self):
-        fq = ""
-
-        # Filter for options
-        if self.options.filter_by_downloadable:
-            fq += u" +res_url:[* TO *] " # not null resource URL 
-        if self.options.filter_by_openness:
-            licenses = ["license_id:%s" % id for id in self.open_licenses]
-            licenses = " OR ".join(licenses)
-            fq += " +(%s) " % licenses
-        
-        order_by = self.options.order_by
-        if order_by == 'rank' or order_by is None: 
-            order_by = 'score'
-
-        # show only results from this CKAN instance:
-        fq = fq + " +site_id:\"%s\" " % config.get('ckan.site_id')
-
-        # Filter for package status       
-        fq += "+state:active "
-            
-        # configurable for iati: full options list
-        facet_limit = int(config.get('search.facets.limit', '50'))
-
-        # query
-        query = self.query.query
-        if (not query) or (not query.strip()):
-            # no query terms, i.e. all documents
-            query = '*:*'
-        
-        conn = make_connection(config)
-        try:
-            data = conn.query(query,
-                              fq=fq, 
-                              # make sure data.facet_counts is set:
-                              facet='true',
-                              facet_limit=facet_limit,
-                              facet_field=self.facet_by,
-                              facet_mincount=1,
-                              start=self.options.offset, 
-                              rows=self.options.limit,
-                              fields='id,score', 
-                              sort_order='desc', 
-                              sort=order_by)
-            
-        except Exception, e:
-            # this wrapping will be caught further up in the WUI.
-            log.exception(e)
-            raise SearchError(e)
-        finally:
-            conn.close()
-        
-        self.count = int(data.numFound)
-        scores = dict([(r.get('id'), r.get('score')) for r in data.results])
-        q = Authorizer().authorized_query(self.options.username, model.Package)
-        q = q.filter(model.Package.id.in_(scores.keys()))
-        self.facets = data.facet_counts.get('facet_fields', {})
-        self.results = sorted(q, key=lambda r: scores[r.id], reverse=True)
-
-    
-class SolrSearchIndex(SearchIndex):
-    
-    def clear(self):
-        clear_index(config)
-
-class PackageSolrSearchIndex(SolrSearchIndex):
-    
-    def remove_dict(self, pkg_dict):
-        delete_package(pkg_dict, config)
-    
-    def update_dict(self, pkg_dict):
-        index_package(pkg_dict, config)


--- a/ckan/lib/search/solr_indexing.py	Thu Aug 18 13:42:11 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-import itertools
-import string
-from solr import SolrConnection # == solrpy
-import logging
-log = logging.getLogger(__name__)
-
-TYPE_FIELD = "entity_type"
-PACKAGE_TYPE = "package"
-KEY_CHARS = string.digits + string.letters + "_-"
-
-SOLR_FIELDS = [TYPE_FIELD, "res_url", "text", "urls", "indexed_ts", "site_id"]
-
-RESERVED_FIELDS = SOLR_FIELDS + ["tags", "groups", "res_description", 
-                                 "res_format", "res_url"]
-                                 
-# HACK: this is copied over from model.PackageRelationship 
-RELATIONSHIP_TYPES = [(u'depends_on', u'dependency_of'),
-                      (u'derives_from', u'has_derivation'),
-                      (u'links_to', u'linked_from'),
-                      (u'child_of', u'parent_of'),
-                     ]
-                     
-def make_connection(config):
-    url = config.get('solr_url', 'http://localhost:8983/solr')
-    user = config.get('solr_user')
-    password = config.get('solr_password')
-
-    if user is not None and password is not None:
-        return SolrConnection(url, http_user=user, http_pass=password)
-    else:
-        return SolrConnection(url)
-
-
-def index_package(pkg_dict, config):
-    if pkg_dict is None:  
-        return 
-    if (not pkg_dict.get('state')) or ('active' not in pkg_dict.get('state')):
-        return delete_package(pkg_dict, config)
-    conn = make_connection(config)
-    index_fields = RESERVED_FIELDS + pkg_dict.keys()
-        
-    # include the extras in the main namespace
-    extras = pkg_dict.get('extras', {})
-    for (key, value) in extras.items():
-        if isinstance(value, (tuple, list)):
-            value = " ".join(map(unicode, value))
-        key = ''.join([c for c in key if c in KEY_CHARS])
-        pkg_dict['extras_' + key] = value
-        if key not in index_fields:
-            pkg_dict[key] = value
-    if 'extras' in pkg_dict:
-        del pkg_dict['extras']
-
-    # flatten the structure for indexing: 
-    for resource in pkg_dict.get('resources', []):
-        for (okey, nkey) in [('description', 'res_description'),
-                             ('format', 'res_format'),
-                             ('url', 'res_url')]:
-            pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')]
-    if 'resources' in pkg_dict:
-        del pkg_dict['resources']
-    
-    # index relationships as <type>:<object>
-    rel_dict = {}
-    rel_types = list(itertools.chain(RELATIONSHIP_TYPES))
-    for rel in pkg_dict.get('relationships', []):
-        _type = rel.get('type', 'rel')
-        if (_type in pkg_dict.keys()) or (_type not in rel_types): 
-            continue
-        rel_dict[_type] = rel_dict.get(_type, []) + [rel.get('object')]
-    
-    pkg_dict.update(rel_dict)
-    
-    if 'relationships' in pkg_dict:
-        del pkg_dict['relationships']
-
-    pkg_dict[TYPE_FIELD] = PACKAGE_TYPE
-    pkg_dict = dict([(k.encode('ascii', 'ignore'), v) for (k, v) in pkg_dict.items()])
-    
-    # mark this CKAN instance as data source:
-    pkg_dict['site_id'] = config.get('ckan.site_id')
-    
-    # send to solr:  
-    try:
-        conn.add_many([pkg_dict])
-        conn.commit(wait_flush=False, wait_searcher=False)
-    finally:
-        conn.close()  
-    
-    log.debug("Updated index for %s" % pkg_dict.get('name'))
-
-
-def delete_package(pkg_dict, config):
-    conn = make_connection(config)
-    query = "+%s:%s +id:\"%s\" +site_id:\"%s\"" % (TYPE_FIELD, PACKAGE_TYPE,
-                                                   pkg_dict.get('id'),
-                                                   config.get('ckan.site_id'))
-    try:
-        conn.delete_query(query)
-        conn.commit()
-    finally:
-        conn.close()
-
-    
-def clear_index(config):
-    conn = make_connection(config)
-    query = "+site_id:\"%s\"" % (config.get('ckan.site_id'))
-    try:
-        conn.delete_query(query)
-        conn.commit()
-    finally:
-        conn.close()
-    


--- a/ckan/lib/search/worker.py	Thu Aug 18 13:42:11 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-import logging
-
-import ckan.model as model
-from ckan.model import DomainObjectOperation
-from ckan.plugins import SingletonPlugin, implements, IDomainObjectModification
-from ckan.lib.dictization.model_dictize import package_to_api1
-# Needed for SolrIndexingWorker:
-# from ckanext.queue.worker import Worker 
-# from indexing import index_package, delete_package
-
-log = logging.getLogger(__name__)
-
-         
-def dispatch_by_operation(entity_type, entity, operation, backend=None):
-    """ Call the appropriate index method for a given notification. """
-    if backend is None: 
-        from ckan.lib.search import get_backend
-        backend = get_backend()
-    try:
-        index = backend.index_for(entity_type)
-        if operation == DomainObjectOperation.new:
-            index.insert_dict(entity)
-        elif operation == DomainObjectOperation.changed:
-            index.update_dict(entity)
-        elif operation == DomainObjectOperation.deleted:
-            index.remove_dict(entity)
-        else:
-            log.warn("Unknown operation: %s" % operation)
-    except Exception, ex:
-        log.exception(ex)
-
-
-class SynchronousSearchPlugin(SingletonPlugin):
-
-    implements(IDomainObjectModification, inherit=True)
-
-    def notify(self, entity, operation):
-
-        if operation != DomainObjectOperation.deleted:
-            dispatch_by_operation(entity.__class__.__name__, 
-                                  package_to_api1(entity, {'model': model}),
-                                  operation)
-        elif operation == DomainObjectOperation.deleted:
-            dispatch_by_operation(entity.__class__.__name__, 
-                                  {'id': entity.id}, operation)
-        else:
-            log.warn("Discarded Sync. indexing for: %s" % entity)
-            
-
-# class SolrIndexingWorker(Worker):
-#     """
-#     SolrIndexingWorker. Requires ckanext-queue >= 0.1.
-#     """
-    
-#     def consume(self, routing_key, operation, payload):
-#         assert 'solr_url' in self.config
-#         assert 'ckan.site_id' in self.config
-        
-#         if routing_key == 'Package':
-#             if operation in ['new', 'changed']:
-#                 index_package(payload, self.config) 
-#             elif operation == 'deleted':
-#                 delete_package(payload, self.config) 


--- a/ckan/tests/lib/test_solr_package_search.py	Thu Aug 18 13:42:11 2011 +0100
+++ b/ckan/tests/lib/test_solr_package_search.py	Thu Aug 18 13:42:59 2011 +0100
@@ -1,4 +1,3 @@
-from pylons import config
 from ckan.tests import TestController, CreateTestData
 from ckan import model
 import ckan.lib.search as search
@@ -20,15 +19,12 @@
         idx = [t.name for t in gils.tags].index(cls.tagname)
         del gils.tags[idx]
         model.repo.commit_and_remove()
-        # solr
-        config['search_backend'] = 'solr'
         search.rebuild()
-        cls.backend = search.get_backend()
 
     @classmethod
     def teardown_class(cls):
         model.repo.rebuild_db()
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
 
     def _pkg_names(self, result):
         return ' '.join(result['results'])
@@ -41,91 +37,91 @@
         return True
 
     def test_1_all_records(self):
-        result = self.backend.query_for(model.Package).run(query=self.q_all)
+        result = search.query_for(model.Package).run(query=self.q_all)
         assert 'gils' in result['results'], result['results']
         assert result['count'] == 6, result['count']
 
     def test_1_name(self):
         # exact name
-        result = self.backend.query_for(model.Package).run(query=u'gils')
+        result = search.query_for(model.Package).run(query=u'gils')
         assert result['count'] == 1, result
         assert self._pkg_names(result) == 'gils', result
 
     def test_1_name_multiple_results(self):
-        result = self.backend.query_for(model.Package).run(query=u'gov')
+        result = search.query_for(model.Package).run(query=u'gov')
         assert self._check_entity_names(result, ('us-gov-images', 'usa-courts-gov')), self._pkg_names(result)
         assert result['count'] == 4, self._pkg_names(result)
 
     def test_1_name_token(self):
-        result = self.backend.query_for(model.Package).run(query=u'name:gils')
+        result = search.query_for(model.Package).run(query=u'name:gils')
         assert self._pkg_names(result) == 'gils', self._pkg_names(result)
-        result = self.backend.query_for(model.Package).run(query=u'title:gils')
+        result = search.query_for(model.Package).run(query=u'title:gils')
         assert not self._check_entity_names(result, ('gils')), self._pkg_names(result)
 
     def test_2_title(self):
         # exact title, one word
-        result = self.backend.query_for(model.Package).run(query=u'Opengov.se')
+        result = search.query_for(model.Package).run(query=u'Opengov.se')
         assert self._pkg_names(result) == 'se-opengov', self._pkg_names(result)
         # multiple words
-        result = self.backend.query_for(model.Package).run(query=u'Government Expenditure')
+        result = search.query_for(model.Package).run(query=u'Government Expenditure')
         assert self._pkg_names(result) == 'uk-government-expenditure', self._pkg_names(result)
         # multiple words wrong order
-        result = self.backend.query_for(model.Package).run(query=u'Expenditure Government')
+        result = search.query_for(model.Package).run(query=u'Expenditure Government')
         assert self._pkg_names(result) == 'uk-government-expenditure', self._pkg_names(result)
         # multiple words, one doesn't match
-        result = self.backend.query_for(model.Package).run(query=u'Expenditure Government China')
+        result = search.query_for(model.Package).run(query=u'Expenditure Government China')
         assert len(result['results']) == 0, self._pkg_names(result)
 
     def test_3_licence(self):
         # this should result, but it is here to check that at least it does not error
-        result = self.backend.query_for(model.Package).run(query=u'license:"OKD::Other (PublicsDomain)"')
+        result = search.query_for(model.Package).run(query=u'license:"OKD::Other (PublicsDomain)"')
         assert result['count'] == 0, result
 
     def test_quotation(self):
         # multiple words quoted
-        result = self.backend.query_for(model.Package).run(query=u'"Government Expenditure"')
+        result = search.query_for(model.Package).run(query=u'"Government Expenditure"')
         assert self._pkg_names(result) == 'uk-government-expenditure', self._pkg_names(result)
         # multiple words quoted wrong order
-        result = self.backend.query_for(model.Package).run(query=u'"Expenditure Government"')
+        result = search.query_for(model.Package).run(query=u'"Expenditure Government"')
         assert self._pkg_names(result) == '', self._pkg_names(result)
 
     def test_string_not_found(self):
-        result = self.backend.query_for(model.Package).run(query=u'randomthing')
+        result = search.query_for(model.Package).run(query=u'randomthing')
         assert self._pkg_names(result) == '', self._pkg_names(result)
 
     def test_tags_field(self):
-        result = self.backend.query_for(model.Package).run(query=u'country-sweden')
+        result = search.query_for(model.Package).run(query=u'country-sweden')
         assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)
 
     def test_tags_token_simple(self):
-        result = self.backend.query_for(model.Package).run(query=u'tags:country-sweden')
+        result = search.query_for(model.Package).run(query=u'tags:country-sweden')
         assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)
-        result = self.backend.query_for(model.Package).run(query=u'tags:wildlife')
+        result = search.query_for(model.Package).run(query=u'tags:wildlife')
         assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result)
 
     def test_tags_token_simple_with_deleted_tag(self):
         # registry has been deleted
-        result = self.backend.query_for(model.Package).run(query=u'tags:registry')
+        result = search.query_for(model.Package).run(query=u'tags:registry')
         assert self._pkg_names(result) == '', self._pkg_names(result)
 
     def test_tags_token_multiple(self):
-        result = self.backend.query_for(model.Package).run(query=u'tags:country-sweden tags:format-pdf')
+        result = search.query_for(model.Package).run(query=u'tags:country-sweden tags:format-pdf')
         assert self._pkg_names(result) == 'se-publications', self._pkg_names(result)
 
     def test_tags_token_complicated(self):
-        result = self.backend.query_for(model.Package).run(query=u'tags:country-sweden tags:somethingrandom')
+        result = search.query_for(model.Package).run(query=u'tags:country-sweden tags:somethingrandom')
         assert self._pkg_names(result) == '', self._pkg_names(result)
 
     def test_pagination(self):
         # large search
-        all_results = self.backend.query_for(model.Package).run(query=self.q_all)
+        all_results = search.query_for(model.Package).run(query=self.q_all)
         all_pkgs = all_results['results']
         all_pkg_count = all_results['count']
 
         # limit
         options = search.QueryOptions()
         options.limit = 2
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
+        result = search.query_for(model.Package).run(query=self.q_all, options=options)
         pkgs = result['results']
         count = result['count']
         assert len(pkgs) == 2, pkgs
@@ -136,7 +132,7 @@
         options = search.QueryOptions()
         options.limit = 2
         options.offset = 2
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
+        result = search.query_for(model.Package).run(query=self.q_all, options=options)
         pkgs = result['results']
         assert len(pkgs) == 2, pkgs
         assert pkgs == all_pkgs[2:4]
@@ -145,14 +141,14 @@
         options = search.QueryOptions()
         options.limit = 2
         options.offset = 4
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
+        result = search.query_for(model.Package).run(query=self.q_all, options=options)
         pkgs = result['results']
         assert len(pkgs) == 2, pkgs
         assert pkgs == all_pkgs[4:6]
 
     def test_order_by(self):
         # large search
-        all_results = self.backend.query_for(model.Package).run(query=self.q_all)
+        all_results = search.query_for(model.Package).run(query=self.q_all)
         all_pkgs = all_results['results']
         all_pkg_count = all_results['count']
 
@@ -160,7 +156,7 @@
         # TODO: fix this test
         # options = search.QueryOptions()
         # options.order_by = 'rank'
-        # result = self.backend.query_for(model.Package).run(query='penguin', options=options)
+        # result = search.query_for(model.Package).run(query='penguin', options=options)
         # pkgs = result['results']
         # fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
         # assert fields[0] == 'usa-courts-gov', fields # has penguin three times
@@ -169,7 +165,7 @@
         # name
         options = search.QueryOptions()
         options.order_by = 'name'
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
+        result = search.query_for(model.Package).run(query=self.q_all, options=options)
         pkgs = result['results']
         fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
         sorted_fields = fields; sorted_fields.sort()
@@ -178,7 +174,7 @@
         # title
         options = search.QueryOptions()
         options.order_by = 'title'
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
+        result = search.query_for(model.Package).run(query=self.q_all, options=options)
         pkgs = result['results']
         fields = [model.Package.by_name(pkg_name).title for pkg_name in pkgs]
         sorted_fields = fields; sorted_fields.sort()
@@ -187,7 +183,7 @@
         # notes
         options = search.QueryOptions()
         options.order_by = 'notes'
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
+        result = search.query_for(model.Package).run(query=self.q_all, options=options)
         pkgs = result['results']
         fields = [model.Package.by_name(pkg_name).notes for pkg_name in pkgs]
         sorted_fields = fields; sorted_fields.sort()
@@ -196,7 +192,7 @@
         # extra field
         options = search.QueryOptions()
         options.order_by = 'date_released'
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
+        result = search.query_for(model.Package).run(query=self.q_all, options=options)
         pkgs = result['results']
         fields = [model.Package.by_name(pkg_name) for pkg_name in pkgs]
         fields = [field.extras.get('date_released') for field in fields]
@@ -204,45 +200,43 @@
         assert fields == sorted_fields, repr(fields) + repr(sorted_fields)
 
     def test_search_notes_on(self):
-        result = self.backend.query_for(model.Package).run(query=u'restrictions')
+        result = search.query_for(model.Package).run(query=u'restrictions')
         pkgs = result['results']
         count = result['count']
         assert len(pkgs) == 2, pkgs
         
     def test_search_foreign_chars(self):
-        result = self.backend.query_for(model.Package).run(query='umlaut')
+        result = search.query_for(model.Package).run(query='umlaut')
         assert result['results'] == ['gils'], result['results']
-        result = self.backend.query_for(model.Package).run(query=u'thumb')
+        result = search.query_for(model.Package).run(query=u'thumb')
         assert result['count'] == 0, result['results']
-        result = self.backend.query_for(model.Package).run(query=u'th\xfcmb')
+        result = search.query_for(model.Package).run(query=u'th\xfcmb')
         assert result['results'] == ['gils'], result['results']
 
     def test_groups(self):
-        result = self.backend.query_for(model.Package).run(query=u'groups:random')
+        result = search.query_for(model.Package).run(query=u'groups:random')
         assert self._pkg_names(result) == '', self._pkg_names(result)
-        result = self.backend.query_for(model.Package).run(query=u'groups:ukgov')
+        result = search.query_for(model.Package).run(query=u'groups:ukgov')
         assert result['count'] == 4, self._pkg_names(result)
-        result = self.backend.query_for(model.Package).run(query=u'groups:ukgov tags:us')
+        result = search.query_for(model.Package).run(query=u'groups:ukgov tags:us')
         assert result['count'] == 2, self._pkg_names(result)
 
 class TestSearchOverall(TestController):
     @classmethod
     def setup_class(cls):
         CreateTestData.create()
-        config['search_backend'] = 'solr'
         search.rebuild()
-        cls.backend = search.get_backend()
 
     @classmethod
     def teardown_class(cls):
         model.repo.rebuild_db()
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
 
     def _check_search_results(self, terms, expected_count, expected_packages=[], only_open=False, only_downloadable=False):
         options = search.QueryOptions()
         options.filter_by_openness = only_open
         options.filter_by_downloadable = only_downloadable
-        result = self.backend.query_for(model.Package).run(query=unicode(terms))
+        result = search.query_for(model.Package).run(query=unicode(terms))
         pkgs = result['results']
         count = result['count']
         assert count == expected_count, (count, expected_count)
@@ -281,19 +275,17 @@
              'extras':{'geographic_coverage':'000000:'},},
         ]
         CreateTestData.create_arbitrary(init_data)
-        config['search_backend'] = 'solr'
         search.rebuild()
-        cls.backend = search.get_backend()
 
     @classmethod
     def teardown_class(self):
         model.repo.rebuild_db()
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
     
     def _do_search(self, q, expected_pkgs, count=None):
         options = search.QueryOptions()
         options.order_by = 'rank'
-        result = self.backend.query_for(model.Package).run(query=q, options=options)
+        result = search.query_for(model.Package).run(query=q, options=options)
         pkgs = result['results']
         fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
         if not (count is None):
@@ -304,7 +296,7 @@
     def _filtered_search(self, value, expected_pkgs, count=None):
         options = search.QueryOptions()
         options.order_by = 'rank'
-        result = self.backend.query_for(model.Package).run(fields={'geographic_coverage':value}, options=options)
+        result = search.query_for(model.Package).run(fields={'geographic_coverage':value}, options=options)
         pkgs = result['results']
         fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
         if not (count is None):
@@ -318,11 +310,14 @@
         self._do_search(u'united kingdom', ['uk'], 1)
         self._do_search(u'great britain', ['gb'], 1)
 
-    # TODO: solr is not currently set up to allow partial matches 
-    #       and extras are not saved as multivalued so this
-    #       test will fail. Make multivalued or remove?
-    # def test_1_filtered(self):
-    #     self._filtered_search(u'england', ['eng', 'eng_ni', 'uk', 'gb'], 4)
+    def test_1_filtered(self):
+        # TODO: solr is not currently set up to allow partial matches 
+        #       and extras are not saved as multivalued so this
+        #       test will fail. Make multivalued or remove?
+        from ckan.tests import SkipTest
+        raise SkipTest
+
+        self._filtered_search(u'england', ['eng', 'eng_ni', 'uk', 'gb'], 4)
 
 class TestExtraFields(TestController):
     @classmethod
@@ -340,17 +335,15 @@
              'extras':{'department':''},},
             ]
         CreateTestData.create_arbitrary(init_data)
-        config['search_backend'] = 'solr'
         search.rebuild()
-        cls.backend = search.get_backend()
 
     @classmethod
     def teardown_class(self):
         model.repo.rebuild_db()
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
     
     def _do_search(self, department, expected_pkgs, count=None):
-        result = self.backend.query_for(model.Package).run(fields={'department': department})
+        result = search.query_for(model.Package).run(fields={'department': department})
         pkgs = result['results']
         fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
         if not (count is None):
@@ -361,12 +354,17 @@
     def test_0_basic(self):
         self._do_search(u'bcd', 'b', 1)
         self._do_search(u'cde abc', 'c', 1)
+
+    def test_1_partial_matches(self):
         # TODO: solr is not currently set up to allow partial matches 
         #       and extras are not saved as multivalued so these
         #       tests will fail. Make multivalued or remove these?
-        # self._do_search(u'abc', ['a', 'c'], 2)
-        # self._do_search(u'cde', 'c', 1)
-        # self._do_search(u'abc cde', 'c', 1)
+        from ckan.tests import SkipTest
+        raise SkipTest
+
+        self._do_search(u'abc', ['a', 'c'], 2)
+        self._do_search(u'cde', 'c', 1)
+        self._do_search(u'abc cde', 'c', 1)
 
 class TestRank(TestController):
     @classmethod
@@ -381,19 +379,17 @@
             u'test1-penguin-canary',
             u'test2-squirrel-squirrel-canary-goose'
         ]
-        config['search_backend'] = 'solr'
         search.rebuild()
-        cls.backend = search.get_backend()
 
     @classmethod
     def teardown_class(self):
         model.repo.rebuild_db()
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
     
     def _do_search(self, q, wanted_results):
         options = search.QueryOptions()
         options.order_by = 'rank'
-        result = self.backend.query_for(model.Package).run(query=q, options=options)
+        result = search.query_for(model.Package).run(query=q, options=options)
         results = result['results']
         err = 'Wanted %r, got %r' % (wanted_results, results)
         assert wanted_results[0] == results[0], err
@@ -404,7 +400,10 @@
         self._do_search(u'squirrel', self.pkg_names[::-1])
         self._do_search(u'canary', self.pkg_names)
 
-    # TODO: fix this test
-    # def test_1_weighting(self):
-    #     self._do_search(u'penguin', self.pkg_names)
-    #     self._do_search(u'goose', self.pkg_names[::-1])
+    def test_1_weighting(self):
+        # TODO: fix this test
+        from ckan.tests import SkipTest
+        raise SkipTest
+
+        self._do_search(u'penguin', self.pkg_names)
+        self._do_search(u'goose', self.pkg_names[::-1])


--- a/ckan/tests/lib/test_solr_package_search_synchronous_update.py	Thu Aug 18 13:42:11 2011 +0100
+++ b/ckan/tests/lib/test_solr_package_search_synchronous_update.py	Thu Aug 18 13:42:59 2011 +0100
@@ -15,10 +15,8 @@
         gc.collect()
 
         CreateTestData.create()
-        config['search_backend'] = 'solr'
         search.rebuild()
         plugins.load('synchronous_search')
-        cls.backend = search.get_backend()
 
         cls.new_pkg_dict = {
             "name": "council-owned-litter-bins",
@@ -53,7 +51,7 @@
     @classmethod
     def teardown_class(cls):
         model.repo.rebuild_db()
-        search.get_backend().index_for('Package').clear()
+        search.index_for('Package').clear()
 
     def _create_package(self, package=None):
         rev = model.repo.new_revision()


http://bitbucket.org/okfn/ckan/changeset/e3b5137ef6b5/
changeset:   e3b5137ef6b5
branch:      feature-1275-solr-search
user:        John Glover
date:        2011-08-18 15:02:12
summary:     [solr] Remove postgres search
affected #:  5 files (60 bytes)

--- a/ckan/lib/search/sql.py	Thu Aug 18 13:42:59 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,304 +0,0 @@
-import logging
-
-import sqlalchemy
-from sqlalchemy.sql import or_
-from sqlalchemy.exceptions import UnboundExecutionError
-
-from common import SearchBackend, SearchQuery, SearchError
-from common import SearchIndex, NoopSearchIndex
-from ckan import model
-from ckan.model import meta
-from ckan import authz
-
-log = logging.getLogger(__name__)
-
-
-class SqlSearchBackend(SearchBackend):
-    
-    @property
-    def connection(self):
-        return meta.Session.connection()
-       
-    def _setup(self):
-        self.register(model.Package, PackageSqlSearchIndex, PackageSqlSearchQuery)
-        self.register(model.Group, NoopSearchIndex, GroupSqlSearchQuery)
-        self.register(model.Tag, NoopSearchIndex, TagSqlSearchQuery)
-        self.register(model.Resource, NoopSearchIndex, ResourceSqlSearchQuery)
-        
-        
-class SqlSearchQuery(SearchQuery):
-    """ Common functions for queries against the DB. """
-    
-    def _db_query(self, q):
-        # Run the query
-        self.count = q.count()
-        q = q.offset(self.options.get('offset'))
-        q = q.limit(self.options.get('limit'))
-        
-        #print q
-        
-        self.results = []
-        for result in q:
-            if isinstance(result, tuple) and isinstance(result[0], model.DomainObject):
-                # This is the case for order_by rank due to the add_column.
-                self.results.append(result[0])
-            else:
-                self.results.append(result)
-
-
-class GroupSqlSearchQuery(SqlSearchQuery):
-    """ Search for groups in plain SQL. """
-    
-    def _run(self):
-        if not self.query.terms:
-            return
-        q = authz.Authorizer().authorized_query(username, model.Group)
-        for term in self.query.terms:
-            q = query.filter(model.Group.name.contains(term.lower()))
-        self._db_query(q)
-
-
-class TagSqlSearchQuery(SqlSearchQuery):
-    """ Search for tags in plain SQL. """
-
-    def _run(self):
-        q = model.Session.query(model.Tag)
-        q = q.distinct().join(model.Tag.package_tags)
-        terms = list(self.query.terms)
-        for field, value in self.query.fields.items():
-            if field in ('tag', 'tags'):
-                terms.append(value)
-        if not len(terms):
-            return
-        for term in terms:
-            q = q.filter(model.Tag.name.contains(term.lower()))
-        self._db_query(q)
-
-
-class ResourceSqlSearchQuery(SqlSearchQuery):
-    """ Search for resources in plain SQL. """
-
-    def _run(self):
-        q = model.Session.query(model.Resource) # TODO authz
-        if self.query.terms:
-            raise SearchError('Only field specific terms allowed in resource search.')
-        #self._check_options_specified_are_allowed('resource search', ['all_fields', 'offset', 'limit'])
-        self.options.ref_entity_with_attr = 'id' # has no name
-        resource_fields = model.Resource.get_columns()
-        for field, terms in self.query.fields.items():
-            if isinstance(terms, basestring):
-                terms = terms.split()
-            if field not in resource_fields:
-                raise SearchError('Field "%s" not recognised in Resource search.' % field)
-            for term in terms:
-                model_attr = getattr(model.Resource, field)
-                if field == 'hash':                
-                    q = q.filter(model_attr.ilike(unicode(term) + '%'))
-                elif field in model.Resource.get_extra_columns():
-                    model_attr = getattr(model.Resource, 'extras')
-
-                    like = or_(model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' % (field, term)),
-                               model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term))
-                              )
-                    q = q.filter(like)
-                else:
-                    q = q.filter(model_attr.ilike('%' + unicode(term) + '%'))
-        
-        order_by = self.options.order_by
-        if order_by is not None:
-            if hasattr(model.Resource, order_by):
-                q = q.order_by(getattr(model.Resource, order_by))
-        self._db_query(q)
-
-
-class PackageSqlSearchQuery(SqlSearchQuery):
-    """ Search for packages using SQL and Postgres' TS full-text search. """
-
-    def _run(self):
-        q = authz.Authorizer().authorized_query(self.options.get('username'), model.Package)
-        make_like = lambda x,y: x.ilike(u'%' + unicode(y) + u'%')
-        q = q.filter(model.package_search_table.c.package_id==model.Package.id)
-        
-        all_terms = ''
-        if self.query.query != '*:*': 
-            # Full search by general terms (and field specific terms but not by field)
-            terms_set = set(self.query.terms)
-            terms_set.update(self.query.fields.values())
-            all_terms = u' '.join(map(unicode, terms_set))
-            
-            if len(all_terms.strip()): 
-                q = q.filter(u'package_search.search_vector @@ plainto_tsquery(:terms)')
-                q = q.params(terms=all_terms)
-            
-            # Filter by field specific terms
-            for field, terms in self.query.fields.items():
-                if field == 'tags':
-                    q = self._filter_by_tag(q, terms)
-                    continue
-                elif field == 'groups':
-                    q = self._filter_by_group(q, terms)
-                    continue
-                
-                if isinstance(terms, basestring):
-                    terms = terms.split()
-                   
-                if field in model.package_table.c:
-                    model_attr = getattr(model.Package, field)
-                    for term in terms:
-                        q = q.filter(make_like(model_attr, term))
-                else:
-                    q = self._filter_by_extra(q, field, terms)
-            
-        # Filter for options
-        if self.options.filter_by_downloadable:
-            q = q.join('resource_groups_all', 'resources_all', aliased=True)
-            q = q.filter(sqlalchemy.and_(
-                model.Resource.state==model.State.ACTIVE,
-                model.ResourceGroup.package_id==model.Package.id))
-        if self.options.filter_by_openness:
-            q = q.filter(model.Package.license_id.in_(self.open_licenses))
-        
-        order_by = self.options.order_by
-        if order_by is not None:
-            if order_by == 'rank':
-                q = q.add_column(sqlalchemy.func.ts_rank_cd(sqlalchemy.text('package_search.search_vector'), 
-                                                            sqlalchemy.func.plainto_tsquery(all_terms)))
-                q = q.order_by(sqlalchemy.text('ts_rank_cd_1 DESC'))
-            elif hasattr(model.Package, order_by):
-                q = q.order_by(getattr(model.Package, order_by))
-            else:
-                # TODO extras
-                raise NotImplemented
-
-        q = q.distinct()
-        self._db_query(q)
-    
-    def _filter_by_tag(self, q, term):
-        if not self.options.search_tags:
-            return q
-        tag = model.Tag.by_name(unicode(term), autoflush=False)
-        if tag:
-            # need to keep joining for each filter
-            # tag should be active hence state_id requirement
-            q = q.join('package_tags', aliased=True).filter(sqlalchemy.and_(
-                model.PackageTag.state==model.State.ACTIVE,
-                model.PackageTag.tag_id==tag.id))
-        else:
-            # unknown tag, so torpedo search
-            q = q.filter(model.PackageTag.tag_id==u'\x130')
-        return q
-        
-    def _filter_by_group(self, q, term):
-        group = model.Group.by_name(unicode(term), autoflush=False)
-        if group:
-            # need to keep joining for each filter
-            q = q.join('package_group_all', 'group', aliased=True).filter(
-                model.Group.id==group.id)
-        else:
-            # unknown group, so torpedo search
-            q = q.filter(model.Group.id==u'-1')
-        return q
-
-    def _filter_by_extra(self, q, field, terms):
-        make_like = lambda x,y: x.ilike(u'%' + unicode(y) + u'%')
-        for term in terms:
-            q = q.join('_extras', aliased=True)
-            q = q.filter(model.PackageExtra.state==model.State.ACTIVE)
-            q = q.filter(model.PackageExtra.key==unicode(field))
-            q = q.filter(make_like(model.PackageExtra.value, term))
-        return q
-        
-
-class SqlSearchIndex(SearchIndex): pass
-
-
-class PackageSqlSearchIndex(SqlSearchIndex):
-    
-    def _make_vector(self, pkg_dict):
-        if isinstance(pkg_dict.get('tags'), (list, tuple)):
-            pkg_dict['tags'] = ' '.join(pkg_dict.get('tags', []))
-        if isinstance(pkg_dict.get('groups'), (list, tuple)):
-            pkg_dict['groups'] = ' '.join(pkg_dict.get('groups', []))
-
-        document_a = u' '.join((pkg_dict.get('name') or u'', pkg_dict.get('title') or u''))
-        document_b_items = []
-        for field_name in ['notes', 'tags', 'groups', 'author', 'maintainer', 'url']:
-            val = pkg_dict.get(field_name)
-            if val:
-                document_b_items.append(val)
-        extras = pkg_dict.get('extras', {})
-        for key, value in extras.items():
-            if value is not None:
-                document_b_items.append(unicode(value))
-        document_b = u' '.join(document_b_items)
-
-        # Create weighted vector
-        vector_sql = 'setweight(to_tsvector(%s), \'A\') || setweight(to_tsvector(%s), \'D\')'
-        params = [document_a.encode('utf8'), document_b.encode('utf8')]
-        return vector_sql, params
-    
-    def _print_lexemes(self, pkg_dict):
-        sql = "SELECT package_id, search_vector FROM package_search WHERE package_id = %s"
-        res = self.backend.connection.execute(sql, pkg_dict['id'])
-        print res.fetchall()
-        res.close()
-    
-    def _run_sql(self, sql, params):
-        conn = self.backend.connection
-        tx = conn.begin_nested()    
-        try:
-            res = conn.execute(sql, params)
-            results = res.fetchall() if not res.closed else None
-            res.close()
-            tx.commit()
-        except Exception, e:
-            tx.rollback()
-            raise
-        return results
-
-    def insert_dict(self, pkg_dict):
-        if not 'id' in pkg_dict or not 'name' in pkg_dict:
-            return
-        vector_sql, params = self._make_vector(pkg_dict)
-        sql = "INSERT INTO package_search VALUES (%%s, %s)" % vector_sql
-        params = [pkg_dict.get('id')] + params
-        self._run_sql(sql, params)
-        log.debug("Indexed %s" % pkg_dict.get('name'))
-    
-    def update_dict(self, pkg_dict):
-        if not 'id' in pkg_dict or not 'name' in pkg_dict:
-            return 
-        vector_sql, params = self._make_vector(pkg_dict)
-        sql = "UPDATE package_search SET search_vector=%s WHERE package_id=%%s" % vector_sql
-        params.append(pkg_dict['id'])
-        self._run_sql(sql, params)
-        log.debug("Updated index for %s" % pkg_dict.get('name'))
-        
-    def remove_dict(self, pkg_dict):
-        if not 'id' in pkg_dict or not 'name' in pkg_dict:
-            return 
-        sql = "DELETE FROM package_search WHERE package_id=%s"
-        self._run_sql(sql, [pkg_dict.get('id')])
-        log.debug("Delete entry %s from index" % pkg_dict.get('id'))
-        
-
-        # This is currently handled by the foreign key constraint on package_id. 
-        # Once we remove that constraint, manual removal will become necessary.
-        pass
-        
-    def clear(self):
-        self._run_sql("DELETE FROM package_search WHERE 1=1", {})
-
-    def get_all_entity_ids(self):
-        sql = 'SELECT package_id FROM package_search'
-        results = self._run_sql(sql, [])
-        return [res[0] for res in results]
-        
-    def get_index(self, pkg_ref):
-        pkg = model.Package.get(pkg_ref)
-        assert pkg
-        sql = "SELECT package_id, search_vector FROM package_search WHERE package_id = %s"
-        res = self.backend.connection.execute(sql, pkg.id)
-        search_vector = res.fetchall()
-        res.close()
-        return search_vector


--- a/ckan/tests/lib/test_package_search.py	Thu Aug 18 13:42:59 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,451 +0,0 @@
-import time
-
-from ckan.model import Package
-import ckan.lib.search as search
-from ckan.lib.search import get_backend, query_for, QueryOptions
-import ckan.model as model
-from ckan.tests import *
-from ckan.tests import is_search_supported
-from ckan.lib.create_test_data import CreateTestData
-
-class TestSearch(TestController):
-    q_all = u'penguin'
-
-    @classmethod
-    def setup_class(self):
-        if not is_search_supported():
-            raise SkipTest("Search not supported")
-
-        indexer = TestSearchIndexer()
-        model.Session.remove()
-        CreateTestData.create_search_test_data()
-
-        # now remove a tag so we can test search with deleted tags
-        model.repo.new_revision()
-        gils = model.Package.by_name(u'gils')
-        # an existing tag used only by gils
-        self.tagname = u'registry'
-        # we aren't guaranteed it is last ...
-        idx = [ t.name for t in gils.tags].index(self.tagname)
-        del gils.tags[idx]
-        model.repo.commit_and_remove()
-        indexer.index()
-
-        self.gils = model.Package.by_name(u'gils')
-        self.war = model.Package.by_name(u'warandpeace')
-        self.russian = model.Tag.by_name(u'russian')
-        self.tolstoy = model.Tag.by_name(u'tolstoy')
-        
-        self.backend = get_backend(backend='sql')
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-
-    def _pkg_names(self, result):
-        return ' '.join(result['results'])
-
-    def _check_entity_names(self, result, names_in_result):
-        names = result['results']
-        for name in names_in_result:
-            if name not in names:
-                return False
-        return True
-
-    # Can't search for all records in postgres, so search for 'penguin' which
-    # we have put in all the records.
-    def test_1_all_records(self):
-        # all records
-        result = self.backend.query_for(model.Package).run(query=self.q_all)
-        assert 'gils' in result['results'], result['results']
-        assert result['count'] > 5, result['count']
-
-    def test_1_name(self):
-        # exact name
-        result = self.backend.query_for(model.Package).run(query=u'gils')
-        assert self._pkg_names(result) == 'gils', result
-        assert result['count'] == 1, result
-
-    def test_1_name_multiple_results(self):
-        result = self.backend.query_for(model.Package).run(query=u'gov')
-        assert self._check_entity_names(result, ('us-gov-images', 'usa-courts-gov')), self._pkg_names(result)
-        assert result['count'] == 4, self._pkg_names(result)
-
-    def test_1_name_token(self):
-        result = self.backend.query_for(model.Package).run(query=u'name:gils')
-        assert self._pkg_names(result) == 'gils', self._pkg_names(result)
-
-        result = self.backend.query_for(model.Package).run(query=u'title:gils')
-        assert not self._check_entity_names(result, ('gils')), self._pkg_names(result)
-
-    def test_2_title(self):
-        # exact title, one word
-        result = self.backend.query_for(model.Package).run(query=u'Opengov.se')
-        assert self._pkg_names(result) == 'se-opengov', self._pkg_names(result)
-
-##        # part word
-##        result = Search().search(u'gov.se')
-##        assert self._pkg_names(result) == 'se-opengov', self._pkg_names(result)
-
-        # multiple words
-        result = self.backend.query_for(model.Package).run(query=u'Government Expenditure')
-        assert self._pkg_names(result) == 'uk-government-expenditure', self._pkg_names(result)
-
-        # multiple words wrong order
-        result = self.backend.query_for(model.Package).run(query=u'Expenditure Government')
-        assert self._pkg_names(result) == 'uk-government-expenditure', self._pkg_names(result)
-
-        # multiple words, one doesn't match
-        result = self.backend.query_for(model.Package).run(query=u'Expenditure Government China')
-        assert len(result['results']) == 0, self._pkg_names(result)
-
-    def test_3_licence(self):
-        ## this should result, but it is here to check that at least it does not error
-        result = self.backend.query_for(model.Package).run(query=u'license:"OKD::Other (PublicsDomain)"')
-        assert result['count'] == 0, result
-
-# Quotation not supported now
-##        # multiple words quoted
-##        result = Search().search(u'"Government Expenditure"')
-##        assert self._pkg_names(result) == 'uk-government-expenditure', self._pkg_names(result)
-
-##        # multiple words quoted wrong order
-##        result = Search().search(u'Expenditure Government')
-##        assert self._pkg_names(result) == '', self._pkg_names(result)
-
-        # token
-        result = self.backend.query_for(model.Package).run(query=u'title:Opengov.se')
-        assert self._pkg_names(result) == 'se-opengov', self._pkg_names(result)
-
-        # token
-        result = self.backend.query_for(model.Package).run(query=u'name:gils')
-        assert self._pkg_names(result) == 'gils', self._pkg_names(result)
-
-        # token
-        result = self.backend.query_for(model.Package).run(query=u'randomthing')
-        assert self._pkg_names(result) == '', self._pkg_names(result)
-
-    def test_tags_field(self):
-        result = self.backend.query_for(model.Package).run(query=u'country-sweden')
-        assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)
-
-    def test_tags_token_simple(self):
-        result = self.backend.query_for(model.Package).run(query=u'tags:country-sweden')
-        assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)
-
-        result = self.backend.query_for(model.Package).run(query=u'tags:wildlife')
-        assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result)
-
-    def test_tags_token_simple_with_deleted_tag(self):
-        # registry has been deleted
-        result = self.backend.query_for(model.Package).run(query=u'tags:registry')
-        assert self._pkg_names(result) == '', self._pkg_names(result)
-
-    def test_tags_token_multiple(self):
-        result = self.backend.query_for(model.Package).run(query=u'tags:country-sweden tags:format-pdf')
-        assert self._pkg_names(result) == 'se-publications', self._pkg_names(result)
-
-    def test_tags_token_complicated(self):
-        result = self.backend.query_for(model.Package).run(query=u'tags:country-sweden tags:somethingrandom')
-        assert self._pkg_names(result) == '', self._pkg_names(result)
-
-    def test_tag_basic(self):
-        result = self.backend.query_for('tag').run(query=u'gov')
-        assert result['count'] == 2, result
-        assert self._check_entity_names(result, ('gov', 'government')), self._pkg_names(result)
-
-    def test_tag_basic_2(self):
-        result = self.backend.query_for('tag').run(query=u'wildlife')
-        assert self._pkg_names(result) == 'wildlife', self._pkg_names(result)
-
-    def test_tag_with_tags_option(self):
-        result = self.backend.query_for('tag').run(query=u'tags:wildlife')
-        assert self._pkg_names(result) == 'wildlife', self._pkg_names(result)
-
-    def test_tag_with_blank_tags(self):
-        result = self.backend.query_for('tag').run(query=u'tags: wildlife')
-        assert self._pkg_names(result) == 'wildlife', self._pkg_names(result)
-
-    def test_pagination(self):
-        # large search
-        all_results = self.backend.query_for(model.Package).run(query=self.q_all)
-        all_pkgs = all_results['results']
-        all_pkg_count = all_results['count']
-
-        # limit
-        options = QueryOptions()
-        options.limit = 2
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
-        pkgs = result['results']
-        count = result['count']
-        assert len(pkgs) == 2, pkgs
-        assert count == all_pkg_count
-        assert pkgs == all_pkgs[:2]
-
-        # offset
-        options = QueryOptions()
-        options.limit = 2
-        options.offset = 2
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
-        pkgs = result['results']
-        assert len(pkgs) == 2, pkgs
-        assert pkgs == all_pkgs[2:4]
-
-        # larger offset
-        options = QueryOptions()
-        options.limit = 2
-        options.offset = 4
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
-        pkgs = result['results']
-        assert len(pkgs) == 2, pkgs
-        assert pkgs == all_pkgs[4:6]
-
-    def test_order_by(self):
-        # large search
-        all_results = self.backend.query_for(model.Package).run(query=self.q_all)
-        all_pkgs = all_results['results']
-        all_pkg_count = all_results['count']
-
-        # rank
-        options = QueryOptions()
-        options.order_by = 'rank'
-        result = self.backend.query_for(model.Package).run(query='penguin', options=options)
-        pkgs = result['results']
-        fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
-        assert fields[0] == 'usa-courts-gov', fields # has penguin three times
-        assert pkgs == all_pkgs, pkgs #default ordering        
-
-        # name
-        options = QueryOptions()
-        options.order_by = 'name'
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
-        pkgs = result['results']
-        fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
-        sorted_fields = fields; sorted_fields.sort()
-        assert fields == sorted_fields, repr(fields) + repr(sorted_fields)
-
-        # title
-        options = QueryOptions()
-        options.order_by = 'title'
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
-        pkgs = result['results']
-        fields = [model.Package.by_name(pkg_name).title for pkg_name in pkgs]
-        sorted_fields = fields; sorted_fields.sort()
-        assert fields == sorted_fields, repr(fields) + repr(sorted_fields)
-
-        # notes
-        options = QueryOptions()
-        options.order_by = 'notes'
-        result = self.backend.query_for(model.Package).run(query=self.q_all, options=options)
-        pkgs = result['results']
-        fields = [model.Package.by_name(pkg_name).notes for pkg_name in pkgs]
-        sorted_fields = fields; sorted_fields.sort()
-        assert fields == sorted_fields, repr(fields) + repr(sorted_fields)
-
-        # extra field
-## TODO: Get this working
-##        options = SearchOptions({'q':self.q_all})
-##        options.order_by = 'date_released'
-##        result = Search().run(options)
-##        pkgs = result['results']
-##        fields = [model.Package.by_name(pkg_name).extras.get('date_released') for pkg_name in pkgs]
-##        sorted_fields = fields; sorted_fields.sort()
-##        assert fields == sorted_fields, repr(fields) + repr(sorted_fields)
-
-    def test_search_notes_on(self):
-        result = self.backend.query_for(model.Package).run(query=u'restrictions')
-        pkgs = result['results']
-        count = result['count']
-        assert len(pkgs) == 2, pkgs
-        
-    def test_search_foreign_chars(self):
-        result = self.backend.query_for(model.Package).run(query='umlaut')
-        assert result['results'] == ['gils'], result['results']
-        result = self.backend.query_for(model.Package).run(query=u'thumb')
-        assert result['count'] == 0, result['results']
-        result = self.backend.query_for(model.Package).run(query=u'th\xfcmb')
-        assert result['results'] == ['gils'], result['results']
-
-    # Groups searching deprecated for now
-    def _test_groups(self):
-        result = self.backend.query_for(model.Package).run(query=u'groups:random')
-        assert self._pkg_names(result) == '', self._pkg_names(result)
-        
-        result = self.backend.query_for(model.Package).run(query=u'groups:ukgov')
-        assert result['count'] == 4, self._pkg_names(result)
-
-        result = self.backend.query_for(model.Package).run(query=u'groups:ukgov tags:us')
-        assert result['count'] == 2, self._pkg_names(result)
-
-class TestSearchOverall(TestController):
-    @classmethod
-    def setup_class(self):
-        indexer = TestSearchIndexer()
-        CreateTestData.create()
-        indexer.index()
-        self.backend = get_backend(backend='sql')
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-
-    def _check_search_results(self, terms, expected_count, expected_packages=[], only_open=False, only_downloadable=False):
-        options = QueryOptions()
-        options.filter_by_openness = only_open
-        options.filter_by_downloadable = only_downloadable
-        result = self.backend.query_for(model.Package).run(query=unicode(terms))
-        pkgs = result['results']
-        count = result['count']
-        assert count == expected_count, (count, expected_count)
-        for expected_pkg in expected_packages:
-            assert expected_pkg in pkgs, '%s : %s' % (expected_pkg, result)
-
-    def test_overall(self):
-        self._check_search_results('annakarenina', 1, ['annakarenina'] )
-        self._check_search_results('warandpeace', 1, ['warandpeace'] )
-        #self._check_search_results('', 0 )
-        self._check_search_results('A Novel By Tolstoy', 1, ['annakarenina'] )
-        self._check_search_results('title:Novel', 1, ['annakarenina'] )
-        self._check_search_results('title:peace', 0 )
-        self._check_search_results('name:warandpeace', 1 )
-        self._check_search_results('groups:david', 2 )
-        self._check_search_results('groups:roger', 1 )
-        self._check_search_results('groups:lenny', 0 )
-        self._check_search_results('annakarenina', 1, ['annakarenina'], True, False )
-        self._check_search_results('annakarenina', 1, ['annakarenina'], False, True )
-        self._check_search_results('annakarenina', 1, ['annakarenina'], True, True )
-        
-
-class TestGeographicCoverage(TestController):
-    @classmethod
-    def setup_class(self):
-        indexer = TestSearchIndexer()
-        init_data = [
-            {'name':'eng',
-             'extras':{'geographic_coverage':'100000: England'},},
-            {'name':'eng_ni',
-             'extras':{'geographic_coverage':'100100: England, Northern Ireland'},},
-            {'name':'uk',
-             'extras':{'geographic_coverage':'111100: United Kingdom (England, Scotland, Wales, Northern Ireland'},},
-            {'name':'gb',
-             'extras':{'geographic_coverage':'111000: Great Britain (England, Scotland, Wales)'},},
-            {'name':'none',
-             'extras':{'geographic_coverage':'000000:'},},
-            ]
-        CreateTestData.create_arbitrary(init_data)
-        indexer.index()
-        self.backend = get_backend(backend='sql')
-
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-    
-    def _do_search(self, q, expected_pkgs, count=None):
-        options = QueryOptions()
-        options.order_by = 'rank'
-        result = self.backend.query_for(model.Package).run(query=q, options=options)
-        pkgs = result['results']
-        fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
-        if not (count is None):
-            assert result['count'] == count, result['count']
-        for expected_pkg in expected_pkgs:
-            assert expected_pkg in fields, expected_pkg
-
-    def _filtered_search(self, value, expected_pkgs, count=None):
-        options = QueryOptions()
-        options.order_by = 'rank'
-        result = self.backend.query_for(model.Package).run(fields={'geographic_coverage':value}, options=options)
-        pkgs = result['results']
-        fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
-        if not (count is None):
-            assert result['count'] == count, result['count']
-        for expected_pkg in expected_pkgs:
-            assert expected_pkg in fields, expected_pkg
-
-    def test_0_basic(self):
-        self._do_search(u'england', ['eng', 'eng_ni', 'uk', 'gb'], 4)
-        self._do_search(u'northern ireland', ['eng_ni', 'uk'], 2)
-        self._do_search(u'united kingdom', ['uk'], 1)
-        self._do_search(u'great britain', ['gb'], 1)
-
-    def test_1_filtered(self):
-        self._filtered_search(u'england', ['eng', 'eng_ni', 'uk', 'gb'], 4)
-
-class TestExtraFields(TestController):
-    @classmethod
-    def setup_class(self):
-        indexer = TestSearchIndexer()
-        init_data = [
-            {'name':'a',
-             'extras':{'department':'abc',
-                       'agency':'ag-a'},},
-            {'name':'b',
-             'extras':{'department':'bcd',
-                       'agency':'ag-b'},},
-            {'name':'c',
-             'extras':{'department':'cde abc'},},
-            {'name':'none',
-             'extras':{'department':''},},
-            ]
-        CreateTestData.create_arbitrary(init_data)
-        indexer.index()
-        self.backend = get_backend(backend='sql')
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-    
-    def _do_search(self, department, expected_pkgs, count=None):
-        result = self.backend.query_for(model.Package).run(fields={'department':department})
-        pkgs = result['results']
-        fields = [model.Package.by_name(pkg_name).name for pkg_name in pkgs]
-        if not (count is None):
-            assert result['count'] == count, result['count']
-        for expected_pkg in expected_pkgs:
-            assert expected_pkg in fields, expected_pkg
-
-    def test_0_basic(self):
-        self._do_search(u'bcd', 'b', 1)
-        self._do_search(u'abc', ['a', 'c'], 2)
-        self._do_search(u'cde', 'c', 1)
-        self._do_search(u'abc cde', 'c', 1)
-        self._do_search(u'cde abc', 'c', 1)
-
-class TestRank(TestController):
-    @classmethod
-    def setup_class(self):
-        indexer = TestSearchIndexer()
-        init_data = [{'name':u'test1-penguin-canary',
-                      'tags':u'canary goose squirrel wombat wombat'},
-                     {'name':u'test2-squirrel-squirrel-canary-goose',
-                      'tags':u'penguin wombat'},
-                     ]
-        CreateTestData.create_arbitrary(init_data)
-        self.pkg_names = [u'test1-penguin-canary',
-                     u'test2-squirrel-squirrel-canary-goose']
-        indexer.index()
-        self.backend = get_backend(backend='sql')
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-    
-    def _do_search(self, q, wanted_results):
-        options = QueryOptions()
-        options.order_by = 'rank'
-        result = self.backend.query_for(model.Package).run(query=q, options=options)
-        results = result['results']
-        err = 'Wanted %r, got %r' % (wanted_results, results)
-        assert wanted_results[0] == results[0], err
-        assert wanted_results[1] == results[1], err
-
-    def test_0_basic(self):
-        self._do_search(u'wombat', self.pkg_names)
-        self._do_search(u'squirrel', self.pkg_names[::-1])
-        self._do_search(u'canary', self.pkg_names)
-
-    def test_1_weighting(self):
-        self._do_search(u'penguin', self.pkg_names)
-        self._do_search(u'goose', self.pkg_names[::-1])
-


--- a/ckan/tests/lib/test_package_search_synchronous_update.py	Thu Aug 18 13:42:59 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-import json
-
-from ckan.tests import *
-from ckan.tests import is_search_supported
-import ckan.lib.search as search
-from ckan import plugins
-from test_package_search import TestSearchOverall
-from ckan import model
-
-class TestSearchOverallWithSynchronousIndexing(TestSearchOverall):
-    '''Repeat test from test_package_search with synchronous indexing
-    '''
-
-    @classmethod
-    def setup_class(self):
-        if not is_search_supported():
-            raise SkipTest("Search not supported")
-
-        import gc
-        from pylons import config
-
-        # Force a garbage collection to trigger issue #695
-        gc.collect()
-
-        config['search_backend'] = 'sql'
-        self.backend = search.get_backend()
-        plugins.load('synchronous_search')
-        CreateTestData.create()
-
-    def test_01_search_table_count(self):
-
-        assert model.Session.query(model.PackageSearch).count() == 2 
-
-    def test_02_add_package_from_dict(self):
-
-        print self.create_package_from_data.__doc__
-        self.package = self.create_package_from_data(json.loads(str(self.create_package_from_data.__doc__)))
-
-        assert model.Session.query(model.PackageSearch).count() == 3 
-
-        self._check_search_results('wee', 1, ['council-owned-litter-bins'])
-
-    def test_03_update_package_from_dict(self):
-
-        package = model.Package.by_name('council-owned-litter-bins')
-
-
-        update_dict = json.loads(str(self.create_package_from_data.__doc__))
-        update_dict['name'] = 'new_name'
-        update_dict['extras']['published_by'] = 'meeeee'
-
-        self.create_package_from_data(update_dict, package)
-        assert model.Session.query(model.PackageSearch).count() == 3 
-
-        self._check_search_results('meeeee', 1, ['new_name'])
-
-    def test_04_delete_package_from_dict(self):
-
-        package = model.Package.by_name('new_name')
-
-        model.Session.delete(package)
-        model.Session.commit()
-
-        assert model.Session.query(model.PackageSearch).count() == 2 
-
-    def create_package_from_data(self, package_data, package = None):
-        ''' {"extras": {"INSPIRE": "True",
-                    "bbox-east-long": "-3.12442",
-                    "bbox-north-lat": "54.218407",
-                    "bbox-south-lat": "54.039634",
-                    "bbox-west-long": "-3.32485",
-                    "constraint": "conditions unknown; (e) intellectual property rights;",
-                    "dataset-reference-date": [{"type": "creation",
-                                                "value": "2008-10-10"},
-                                               {"type": "revision",
-                                                "value": "2009-10-08"}],
-                    "guid": "00a743bf-cca4-4c19-a8e5-e64f7edbcadd",
-                    "metadata-date": "2009-10-16",
-                    "metadata-language": "eng",
-                    "published_by": 0,
-                    "resource-type": "dataset",
-                    "spatial-reference-system": "wee",
-                    "temporal_coverage-from": "1977-03-10T11:45:30",
-                    "temporal_coverage-to": "2005-01-15T09:10:00"},
-         "name": "council-owned-litter-bins",
-         "notes": "Location of Council owned litter bins within Borough.",
-         "resources": [{"description": "Resource locator",
-                        "format": "Unverified",
-                        "url": "http://www.barrowbc.gov.uk"}],
-         "tags": ["Utility and governmental services"],
-         "title": "Council Owned Litter Bins"}
-        '''
-
-        if not package:
-            package = model.Package()
-
-        rev = model.repo.new_revision()
-        
-        relationship_attr = ['extras', 'resources', 'tags']
-
-        package_properties = {}
-        for key, value in package_data.iteritems():
-            if key not in relationship_attr:
-                setattr(package, key, value)
-
-        tags = package_data.get('tags', [])
-
-        for tag in tags:
-            package.add_tag_by_name(tag, autoflush=False)
-        
-        for resource_dict in package_data.get("resources", []):
-            resource = model.Resource(**resource_dict)
-            package.resources[:] = []
-            package.resources.append(resource)
-
-        for key, value in package_data.get("extras", {}).iteritems():
-            extra = model.PackageExtra(key=key, value=value)
-            package._extras[key] = extra
-
-        model.Session.add(package)
-        model.Session.flush()
-
-        model.setup_default_user_roles(package, [])
-
-
-        model.Session.add(rev)
-        model.Session.commit()
-
-        return package
-
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-
-# Stop parent class tests from running
-#TestSearchOverall = None


--- a/ckan/tests/lib/test_resource_search.py	Thu Aug 18 13:42:59 2011 +0100
+++ b/ckan/tests/lib/test_resource_search.py	Thu Aug 18 14:02:12 2011 +0100
@@ -3,7 +3,7 @@
 
 from ckan.tests import *
 from ckan.tests import is_search_supported
-from ckan.lib.search import get_backend, QueryOptions
+from ckan.lib.search import QueryOptions
 from ckan import model
 from ckan.lib.create_test_data import CreateTestData
 from ckan.lib.search.common import SearchError
@@ -11,6 +11,8 @@
 class TestSearch(object):
     @classmethod
     def setup_class(self):
+        raise SkipTest("Resource search not yet implemented with solr")
+
         if not is_search_supported():
             raise SkipTest("Search not supported")
 


--- a/ckan/tests/lib/test_search_index.py	Thu Aug 18 13:42:59 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-import time
-
-import sqlalchemy as sa
-
-from ckan.tests import *
-from ckan.tests import is_search_supported
-from ckan import model
-import ckan.lib.search as search
-
-class TestSearchIndex(TestController):
-    '''Tests that a package is indexed when the packagenotification is
-    received by the indexer.'''
-    worker = None
-    
-    @classmethod
-    def setup_class(cls):
-        if not is_search_supported():
-            raise SkipTest("Search not supported")
-        CreateTestData.create()
-
-    @classmethod
-    def teardown_class(cls):
-        model.repo.rebuild_db()
-
-    def test_index(self):
-        search.dispatch_by_operation('Package', {'title': 'penguin'}, 'new', 
-            backend=search.get_backend(backend='sql'))
-
-        sql = "select search_vector from package_search where package_id='%s'" % self.anna.id
-        vector = model.Session.execute(sql).fetchone()[0]
-        assert 'annakarenina' in vector, vector
-        assert not 'penguin' in vector, vector
-
-
-class PostgresSearch(object):
-    '''Demo of how postgres search works.'''
-    def filter_by(self, query, terms):
-        q = query
-        q = q.filter(model.package_search_table.c.package_id==model.Package.id)
-        q = q.filter('package_search.search_vector '\
-                                       '@@ plainto_tsquery(:terms)')
-        q = q.params(terms=terms)
-        q = q.add_column(sa.func.ts_rank_cd('package_search.search_vector', sa.func.plainto_tsquery(terms)))
-        return q
-
-    def order_by(self, query):
-        return query.order_by('ts_rank_cd_1')
-        
-    def search(self, terms):
-        import ckan.model as model
-        q = self.filter_by(model.Session.query(model.Package), terms)
-        q = self.order_by(q)
-        q = q.distinct()
-        results = [pkg_tuple[0].name for pkg_tuple in q.all()]
-        return {'results':results, 'count':q.count()}
-
-
-def allow_time_to_create_search_index():
-    time.sleep(0.5)
-
-class TestPostgresSearch:
-    @classmethod
-    def setup_class(self):
-        tsi = TestSearchIndexer()
-        CreateTestData.create_search_test_data()
-        tsi.index()
-
-        self.gils = model.Package.by_name(u'gils')
-        self.war = model.Package.by_name(u'warandpeace')
-        self.russian = model.Tag.by_name(u'russian')
-        self.tolstoy = model.Tag.by_name(u'tolstoy')
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-
-    def test_0_indexing(self):
-        searches = model.metadata.bind.execute('SELECT package_id, search_vector FROM package_search').fetchall()
-        assert searches[0][1], searches
-        q = model.Session.query(model.Package).filter(model.package_search_table.c.package_id==model.Package.id)
-        assert q.count() == 6, q.count()
-        
-    def test_1_basic(self):
-        result = PostgresSearch().search(u'sweden')
-        assert 'se-publications' in result['results'], result['results']
-        assert result['count'] == 2, result['count']
-

Repository URL: https://bitbucket.org/okfn/ckan/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.




More information about the ckan-changes mailing list