[ckan-changes] [okfn/ckan] 521a1a: [#2327] change solr schema and related tests
GitHub
noreply at github.com
Wed Apr 25 18:06:59 UTC 2012
Branch: refs/heads/master
Home: https://github.com/okfn/ckan
Commit: 521a1a0bef4376f900b719adbbfbe2f29464e329
https://github.com/okfn/ckan/commit/521a1a0bef4376f900b719adbbfbe2f29464e329
Author: kindly <kindly at gmail.com>
Date: 2012-04-25 (Wed, 25 Apr 2012)
Changed paths:
M ckan/config/solr/CHANGELOG.txt
M ckan/config/solr/schema-1.4.xml
M ckan/lib/search/__init__.py
M ckan/lib/search/index.py
M ckan/tests/functional/test_search.py
M ckan/tests/lib/test_solr_package_search.py
M ckanext/multilingual/solr/schema.xml
Log Message:
-----------
[#2327] change solr schema and related tests
diff --git a/ckan/config/solr/CHANGELOG.txt b/ckan/config/solr/CHANGELOG.txt
index 5fe664f..1e4e67f 100644
--- a/ckan/config/solr/CHANGELOG.txt
+++ b/ckan/config/solr/CHANGELOG.txt
@@ -1,6 +1,14 @@
CKAN SOLR schemas changelog
===========================
+v1.4 - (ckan>=1.7)
+--------------------
+* Add Ascii folding filter to text fields.
+* Add capacity field for public, private access.
+* Add title_string so you can sort alphabetically on title.
+* Fields related to analytics, access and view counts.
+* Add data_dict field for the whole package_dict.
+
v1.3 - (ckan>=1.5.1)
--------------------
* Use the index_id (hash of dataset id + site_id) as uniqueKey (#1430)
diff --git a/ckan/config/solr/schema-1.4.xml b/ckan/config/solr/schema-1.4.xml
index 29cb473..0409e71 100644
--- a/ckan/config/solr/schema-1.4.xml
+++ b/ckan/config/solr/schema-1.4.xml
@@ -51,6 +51,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+ <filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -63,6 +64,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+ <filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
@@ -115,6 +117,8 @@
<field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
+ <field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
+
<field name="res_description" type="textgen" indexed="true" stored="true" multiValued="true"/>
<field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
@@ -134,8 +138,8 @@
<field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="views_total" type="int" indexed="true" stored="false"/>
<field name="views_recent" type="int" indexed="true" stored="false"/>
- <field name="recources_accessed_total" type="int" indexed="true" stored="false"/>
- <field name="recources_accessed_recent" type="int" indexed="true" stored="false"/>
+ <field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
+ <field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>
<field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>
@@ -144,8 +148,9 @@
<!-- Copy the title field into titleString, and treat as a string
(rather than text type). This allows us to sort on the titleString -->
- <field name="titleString" type="string" indexed="true" stored="false" />
- <copyField source="title" dest="titleString"/>
+ <field name="title_string" type="string" indexed="true" stored="false" />
+
+ <field name="data_dict" type="string" indexed="false" stored="true" />
<dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*" type="string" indexed="true" stored="false"/>
diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py
index fbb924a..b2774fc 100644
--- a/ckan/lib/search/__init__.py
+++ b/ckan/lib/search/__init__.py
@@ -26,7 +26,7 @@ def text_traceback():
SIMPLE_SEARCH = config.get('ckan.simple_search', False)
-SUPPORTED_SCHEMA_VERSIONS = ['1.3']
+SUPPORTED_SCHEMA_VERSIONS = ['1.4']
DEFAULT_OPTIONS = {
'limit': 20,
diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py
index 086a39e..992721f 100644
--- a/ckan/lib/search/index.py
+++ b/ckan/lib/search/index.py
@@ -99,6 +99,11 @@ def index_package(self, pkg_dict):
if pkg_dict is None:
return
+ # add to string field for sorting
+ title = pkg_dict.get('title')
+ if title:
+ pkg_dict['title_string'] = title
+
if (not pkg_dict.get('state')) or ('active' not in pkg_dict.get('state')):
return self.delete_package(pkg_dict)
@@ -163,7 +168,7 @@ def index_package(self, pkg_dict):
pkg_dict = dict([(k.encode('ascii', 'ignore'), v) for (k, v) in pkg_dict.items()])
- for k in ('title','notes'):
+ for k in ('title', 'notes', 'title_string'):
if k in pkg_dict and pkg_dict[k]:
pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])
diff --git a/ckan/tests/functional/test_search.py b/ckan/tests/functional/test_search.py
index fe1802c..a9a9339 100644
--- a/ckan/tests/functional/test_search.py
+++ b/ckan/tests/functional/test_search.py
@@ -108,7 +108,7 @@ def test_search_foreign_chars(self):
res = self.app.get(offset)
assert 'Search - ' in res
self._check_search_results(res, u'th\xfcmb', ['<strong>1</strong>'])
- self._check_search_results(res, 'thumb', ['<strong>0</strong>'])
+ self._check_search_results(res, 'thumb', ['<strong>1</strong>'])
@search_related
def test_search_escape_chars(self):
diff --git a/ckan/tests/lib/test_solr_package_search.py b/ckan/tests/lib/test_solr_package_search.py
index 6ec2b2f..75d54c0 100644
--- a/ckan/tests/lib/test_solr_package_search.py
+++ b/ckan/tests/lib/test_solr_package_search.py
@@ -292,7 +292,7 @@ def test_search_foreign_chars(self):
result = search.query_for(model.Package).run({'q': 'umlaut'})
assert result['results'] == ['gils'], result['results']
result = search.query_for(model.Package).run({'q': u'thumb'})
- assert result['count'] == 0, result['results']
+ assert result['results'] == ['gils'], result['results']
result = search.query_for(model.Package).run({'q': u'th\xfcmb'})
assert result['results'] == ['gils'], result['results']
diff --git a/ckanext/multilingual/solr/schema.xml b/ckanext/multilingual/solr/schema.xml
index 8475187..fb957d3 100644
--- a/ckanext/multilingual/solr/schema.xml
+++ b/ckanext/multilingual/solr/schema.xml
@@ -16,7 +16,7 @@
limitations under the License.
-->
-<schema name="ckan" version="1.3">
+<schema name="ckan" version="1.4">
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
@@ -373,6 +373,8 @@
<field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
+ <field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
+
<field name="res_description" type="textgen" indexed="true" stored="true" multiValued="true"/>
<field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
@@ -390,11 +392,19 @@
<field name="linked_from" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="child_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
+ <field name="views_total" type="int" indexed="true" stored="false"/>
+ <field name="views_recent" type="int" indexed="true" stored="false"/>
+ <field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
+ <field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>
<field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
+
+ <!-- Copy the title field into titleString, and treat as a string
+ (rather than text type). This allows us to sort on the titleString -->
+ <field name="title_string" type="string" indexed="true" stored="false" />
<!-- Multilingual -->
<field name="text_en" type="text_en" indexed="true" stored="true"/>
@@ -424,6 +434,8 @@
<field name="text_pl" type="text_pl" indexed="true" stored="true"/>
<field name="title_pl" type="text_pl" indexed="true" stored="true"/>
+ <field name="data_dict" type="string" indexed="false" stored="true" />
+
<dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*" type="string" indexed="true" stored="false"/>
</fields>
================================================================
More information about the ckan-changes
mailing list