[ckan-changes] [okfn/ckan] 521a1a: [#2327] change solr schema and related tests

GitHub noreply at github.com
Wed Apr 25 18:06:59 UTC 2012


  Branch: refs/heads/master
  Home:   https://github.com/okfn/ckan
  Commit: 521a1a0bef4376f900b719adbbfbe2f29464e329
      https://github.com/okfn/ckan/commit/521a1a0bef4376f900b719adbbfbe2f29464e329
  Author: kindly <kindly at gmail.com>
  Date:   2012-04-25 (Wed, 25 Apr 2012)

  Changed paths:
    M ckan/config/solr/CHANGELOG.txt
    M ckan/config/solr/schema-1.4.xml
    M ckan/lib/search/__init__.py
    M ckan/lib/search/index.py
    M ckan/tests/functional/test_search.py
    M ckan/tests/lib/test_solr_package_search.py
    M ckanext/multilingual/solr/schema.xml

  Log Message:
  -----------
  [#2327] change solr schema and related tests


diff --git a/ckan/config/solr/CHANGELOG.txt b/ckan/config/solr/CHANGELOG.txt
index 5fe664f..1e4e67f 100644
--- a/ckan/config/solr/CHANGELOG.txt
+++ b/ckan/config/solr/CHANGELOG.txt
@@ -1,6 +1,14 @@
 CKAN SOLR schemas changelog
 ===========================
 
+v1.4 - (ckan>=1.7)
+--------------------
+* Add Ascii folding filter to text fields.
+* Add capacity field for public, private access.
+* Add title_string so you can sort alphabetically on title.
+* Fields related to analytics, access and view counts.
+* Add data_dict field for the whole package_dict.
+
 v1.3 - (ckan>=1.5.1)
 --------------------
 * Use the index_id (hash of dataset id + site_id) as uniqueKey (#1430)
diff --git a/ckan/config/solr/schema-1.4.xml b/ckan/config/solr/schema-1.4.xml
index 29cb473..0409e71 100644
--- a/ckan/config/solr/schema-1.4.xml
+++ b/ckan/config/solr/schema-1.4.xml
@@ -51,6 +51,7 @@
             <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
             <filter class="solr.LowerCaseFilterFactory"/>
             <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+            <filter class="solr.ASCIIFoldingFilterFactory"/>
         </analyzer>
         <analyzer type="query">
             <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -63,6 +64,7 @@
             <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
             <filter class="solr.LowerCaseFilterFactory"/>
             <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+            <filter class="solr.ASCIIFoldingFilterFactory"/>
         </analyzer>
     </fieldType>
 
@@ -115,6 +117,8 @@
     <field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
     <field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
 
+    <field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
+
     <field name="res_description" type="textgen" indexed="true" stored="true" multiValued="true"/>
     <field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
     <field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
@@ -134,8 +138,8 @@
     <field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
     <field name="views_total" type="int" indexed="true" stored="false"/>
     <field name="views_recent" type="int" indexed="true" stored="false"/>
-    <field name="recources_accessed_total" type="int" indexed="true" stored="false"/>
-    <field name="recources_accessed_recent" type="int" indexed="true" stored="false"/>
+    <field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
+    <field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>
 
     <field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
     <field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>
@@ -144,8 +148,9 @@
 
     <!-- Copy the title field into titleString, and treat as a string
          (rather than text type).  This allows us to sort on the titleString -->
-    <field name="titleString" type="string" indexed="true" stored="false" />
-    <copyField source="title" dest="titleString"/>
+    <field name="title_string" type="string" indexed="true" stored="false" />
+
+    <field name="data_dict" type="string" indexed="false" stored="true" />
 
     <dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
     <dynamicField name="*" type="string" indexed="true"  stored="false"/>
diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py
index fbb924a..b2774fc 100644
--- a/ckan/lib/search/__init__.py
+++ b/ckan/lib/search/__init__.py
@@ -26,7 +26,7 @@ def text_traceback():
 
 SIMPLE_SEARCH = config.get('ckan.simple_search', False)
 
-SUPPORTED_SCHEMA_VERSIONS = ['1.3']
+SUPPORTED_SCHEMA_VERSIONS = ['1.4']
 
 DEFAULT_OPTIONS = {
     'limit': 20,
diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py
index 086a39e..992721f 100644
--- a/ckan/lib/search/index.py
+++ b/ckan/lib/search/index.py
@@ -99,6 +99,11 @@ def index_package(self, pkg_dict):
         if pkg_dict is None:
             return
 
+        # add to string field for sorting
+        title = pkg_dict.get('title')
+        if title:
+            pkg_dict['title_string'] = title
+
         if (not pkg_dict.get('state')) or ('active' not in pkg_dict.get('state')):
             return self.delete_package(pkg_dict)
 
@@ -163,7 +168,7 @@ def index_package(self, pkg_dict):
 
         pkg_dict = dict([(k.encode('ascii', 'ignore'), v) for (k, v) in pkg_dict.items()])
 
-        for k in ('title','notes'):
+        for k in ('title', 'notes', 'title_string'):
             if k in pkg_dict and pkg_dict[k]:
                 pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])
 
diff --git a/ckan/tests/functional/test_search.py b/ckan/tests/functional/test_search.py
index fe1802c..a9a9339 100644
--- a/ckan/tests/functional/test_search.py
+++ b/ckan/tests/functional/test_search.py
@@ -108,7 +108,7 @@ def test_search_foreign_chars(self):
         res = self.app.get(offset)
         assert 'Search - ' in res
         self._check_search_results(res, u'th\xfcmb', ['<strong>1</strong>'])
-        self._check_search_results(res, 'thumb', ['<strong>0</strong>'])
+        self._check_search_results(res, 'thumb', ['<strong>1</strong>'])
 
     @search_related
     def test_search_escape_chars(self):
diff --git a/ckan/tests/lib/test_solr_package_search.py b/ckan/tests/lib/test_solr_package_search.py
index 6ec2b2f..75d54c0 100644
--- a/ckan/tests/lib/test_solr_package_search.py
+++ b/ckan/tests/lib/test_solr_package_search.py
@@ -292,7 +292,7 @@ def test_search_foreign_chars(self):
         result = search.query_for(model.Package).run({'q': 'umlaut'})
         assert result['results'] == ['gils'], result['results']
         result = search.query_for(model.Package).run({'q': u'thumb'})
-        assert result['count'] == 0, result['results']
+        assert result['results'] == ['gils'], result['results']
         result = search.query_for(model.Package).run({'q': u'th\xfcmb'})
         assert result['results'] == ['gils'], result['results']
 
diff --git a/ckanext/multilingual/solr/schema.xml b/ckanext/multilingual/solr/schema.xml
index 8475187..fb957d3 100644
--- a/ckanext/multilingual/solr/schema.xml
+++ b/ckanext/multilingual/solr/schema.xml
@@ -16,7 +16,7 @@
  limitations under the License.
 -->
 
-<schema name="ckan" version="1.3">
+<schema name="ckan" version="1.4">
 
 <types>
     <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
@@ -373,6 +373,8 @@
     <field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
     <field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
 
+    <field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
+
     <field name="res_description" type="textgen" indexed="true" stored="true" multiValued="true"/>
     <field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
     <field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
@@ -390,11 +392,19 @@
     <field name="linked_from" type="text" indexed="true" stored="false" multiValued="true"/>
     <field name="child_of" type="text" indexed="true" stored="false" multiValued="true"/>
     <field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
+    <field name="views_total" type="int" indexed="true" stored="false"/>
+    <field name="views_recent" type="int" indexed="true" stored="false"/>
+    <field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
+    <field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>
 
     <field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
     <field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>
 
     <field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
+
+    <!-- Copy the title field into titleString, and treat as a string
+         (rather than text type).  This allows us to sort on the titleString -->
+    <field name="title_string" type="string" indexed="true" stored="false" />
      
     <!-- Multilingual -->
     <field name="text_en" type="text_en" indexed="true" stored="true"/>
@@ -424,6 +434,8 @@
     <field name="text_pl" type="text_pl" indexed="true" stored="true"/>
     <field name="title_pl" type="text_pl" indexed="true" stored="true"/>
 
+    <field name="data_dict" type="string" indexed="false" stored="true" />
+
     <dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
     <dynamicField name="*" type="string" indexed="true"  stored="false"/>
 </fields>


================================================================



More information about the ckan-changes mailing list