From f81b2be334441f705a9cdf6375d01c9b3f252821 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 16 Sep 2015 17:10:59 -0400 Subject: [PATCH] major refactor of bleve configuration see #221 for full details --- .../detect_lang_analyzer.go | 49 -- .../ignore/ignore_byte_array_converter.go | 4 +- .../json/json_byte_array_converter.go | 4 +- .../string/string_byte_array_conveter.go | 4 +- .../datetime_optional/datetime_optional.go | 2 +- analysis/language/ckb/analyzer_ckb.go | 8 +- analysis/language/ckb/analyzer_ckb_test.go | 2 - analysis/language/da/analyzer_da.go | 54 -- analysis/language/da/analyzer_da_test.go | 69 -- analysis/language/da/stemmer_da.go | 28 - analysis/language/da/stop_filter_da.go | 28 - analysis/language/da/stop_words_da.go | 134 ---- analysis/language/de/analyzer_de.go | 59 -- analysis/language/de/analyzer_de_test.go | 97 --- analysis/language/de/german_normalize.go | 94 --- analysis/language/de/german_normalize_test.go | 98 --- analysis/language/de/stemmer_de.go | 28 - analysis/language/de/stop_filter_de.go | 28 - analysis/language/de/stop_words_de.go | 318 --------- analysis/language/en/stemmer_en.go | 28 - analysis/language/en/stemmer_en_test.go | 72 -- analysis/language/es/analyzer_es.go | 55 -- analysis/language/es/analyzer_es_test.go | 64 -- analysis/language/es/stemmer_es.go | 28 - analysis/language/es/stop_filter_es.go | 28 - analysis/language/es/stop_words_es.go | 380 ----------- analysis/language/fa/analyzer_fa.go | 8 +- analysis/language/fa/analyzer_fa_test.go | 2 - analysis/language/fi/analyzer_fi.go | 55 -- analysis/language/fi/analyzer_fi_test.go | 68 -- analysis/language/fi/stemmer_fi.go | 28 - analysis/language/fi/stop_filter_fi.go | 28 - analysis/language/fi/stop_words_fi.go | 121 ---- analysis/language/fr/stemmer_fr.go | 28 - analysis/language/hu/analyzer_hu.go | 55 -- analysis/language/hu/analyzer_hu_test.go | 68 -- analysis/language/hu/stemmer_hu.go | 28 - analysis/language/hu/stop_filter_hu.go | 28 - analysis/language/hu/stop_words_hu.go | 235 ------- analysis/language/it/stemmer_it.go | 28 - analysis/language/ja/analyzer_ja.go | 2 - analysis/language/ja/analyzer_ja_test.go | 2 - analysis/language/ja/ja_morph_kagome.go | 2 - analysis/language/ja/ja_morph_kagome_test.go | 2 - analysis/language/nl/analyzer_nl.go | 55 -- analysis/language/nl/analyzer_nl_test.go | 68 -- analysis/language/nl/stemmer_nl.go | 28 - analysis/language/nl/stop_filter_nl.go | 28 - analysis/language/nl/stop_words_nl.go | 143 ---- analysis/language/no/analyzer_no.go | 55 -- analysis/language/no/analyzer_no_test.go | 68 -- analysis/language/no/stemmer_no.go | 28 - analysis/language/no/stop_filter_no.go | 28 - analysis/language/no/stop_words_no.go | 218 ------ analysis/language/porter/stemmer_porter.go | 28 - analysis/language/pt/stemmer_pt.go | 28 - analysis/language/ro/analyzer_ro.go | 55 -- analysis/language/ro/analyzer_ro_test.go | 68 -- analysis/language/ro/stemmer_ro.go | 28 - analysis/language/ro/stop_filter_ro.go | 28 - analysis/language/ro/stop_words_ro.go | 257 ------- analysis/language/ru/analyzer_ru.go | 55 -- analysis/language/ru/analyzer_ru_test.go | 98 --- analysis/language/ru/stemmer_ru.go | 28 - analysis/language/ru/stop_filter_ru.go | 28 - analysis/language/ru/stop_words_ru.go | 267 -------- analysis/language/sv/analyzer_sv.go | 55 -- analysis/language/sv/analyzer_sv_test.go | 68 -- analysis/language/sv/stemmer_sv.go | 28 - analysis/language/sv/stop_filter_sv.go | 28 - analysis/language/sv/stop_words_sv.go | 157 ----- analysis/language/th/analyzer_th.go | 48 -- analysis/language/th/analyzer_th_test.go | 119 ---- analysis/language/th/stop_filter_th.go | 28 - analysis/language/th/stop_words_th.go | 143 ---- analysis/language/th/unicode_tokenizer_th.go | 28 - analysis/language/tr/analyzer_tr.go | 61 -- analysis/language/tr/analyzer_tr_test.go | 88 --- analysis/language/tr/stemmer_tr.go | 28 - analysis/language/tr/stop_filter_tr.go | 28 - analysis/language/tr/stop_words_tr.go | 236 ------- analysis/token_filters/cld2/README.md | 33 - analysis/token_filters/cld2/cld2_filter.cc | 44 -- analysis/token_filters/cld2/cld2_filter.go | 67 -- .../token_filters/cld2/cld2_filter_test.go | 123 ---- analysis/token_filters/cld2/compile_cld2.sh | 10 - .../token_filters/stemmer_filter/README.md | 18 - .../stemmer_filter/stemmer_filter.go | 80 --- .../stemmer_filter/stemmer_filter_test.go | 63 -- analysis/tokenizers/icu/boundary.go | 138 ---- analysis/tokenizers/icu/boundary_test.go | 191 ------ config.go | 135 +--- config/README.md | 11 + config/config.go | 98 +++ .../cld2_filter.h => config/config_cld2.go | 14 +- config_cznicb.go => config/config_cznicb.go | 8 +- .../config_forestdb.go | 8 +- config_icu.go => config/config_icu.go | 6 +- config_kagome.go => config/config_kagome.go | 4 +- config_leveldb.go => config/config_leveldb.go | 11 +- config/config_libstemmer.go | 31 + config_rocksdb.go => config/config_rocksdb.go | 8 +- config_cld2.go | 20 - config_stemmer.go | 18 - examples_test.go | 4 +- index/store/cznicb/batch.go | 88 --- index/store/cznicb/cznicb.go | 112 --- index/store/cznicb/cznicb_test.go | 133 ---- index/store/cznicb/iterator.go | 113 ---- index/store/cznicb/reader.go | 44 -- index/store/cznicb/writer.go | 55 -- index/store/forestdb/batch.go | 86 --- index/store/forestdb/iterator.go | 127 ---- index/store/forestdb/reader.go | 63 -- index/store/forestdb/store.go | 292 -------- index/store/forestdb/store_test.go | 636 ------------------ index/store/forestdb/writer.go | 71 -- index/store/gorocksdb/batch.go | 43 -- index/store/gorocksdb/iterator.go | 76 --- index/store/gorocksdb/reader.go | 48 -- index/store/gorocksdb/store.go | 174 ----- index/store/gorocksdb/store_test.go | 298 -------- index/store/gorocksdb/util.go | 28 - index/store/gorocksdb/writer.go | 64 -- index/store/leveldb/batch.go | 55 -- index/store/leveldb/iterator.go | 78 --- index/store/leveldb/reader.go | 48 -- index/store/leveldb/store.go | 174 ----- index/store/leveldb/store_test.go | 298 -------- index/store/leveldb/util.go | 28 - index/store/leveldb/writer.go | 65 -- index/upside_down/benchmark_cznicb_test.go | 4 +- index/upside_down/benchmark_forestdb_test.go | 2 +- index/upside_down/benchmark_gorocksdb_test.go | 2 +- index/upside_down/benchmark_leveldb_test.go | 4 +- index_impl.go | 3 +- index_test.go | 4 +- mapping_index.go | 9 +- mapping_test.go | 9 +- .../html/fragment_formatter_html.go | 4 +- search/highlight/highlighters/ansi/ansi.go | 45 ++ search/highlight/highlighters/html/html.go | 45 ++ .../highlighters/simple/highlighter_simple.go | 4 +- .../simple/highlighter_simple_test.go | 4 +- test/integration_test.go | 3 + 145 files changed, 314 insertions(+), 9649 deletions(-) delete mode 100644 analysis/analyzers/detect_lang_analyzer/detect_lang_analyzer.go delete mode 100644 analysis/language/da/analyzer_da.go delete mode 100644 analysis/language/da/analyzer_da_test.go delete mode 100644 analysis/language/da/stemmer_da.go delete mode 100644 analysis/language/da/stop_filter_da.go delete mode 100644 analysis/language/da/stop_words_da.go delete mode 100644 analysis/language/de/analyzer_de.go delete mode 100644 analysis/language/de/analyzer_de_test.go delete mode 100644 analysis/language/de/german_normalize.go delete mode 100644 analysis/language/de/german_normalize_test.go delete mode 100644 analysis/language/de/stemmer_de.go delete mode 100644 analysis/language/de/stop_filter_de.go delete mode 100644 analysis/language/de/stop_words_de.go delete mode 100644 analysis/language/en/stemmer_en.go delete mode 100644 analysis/language/en/stemmer_en_test.go delete mode 100644 analysis/language/es/analyzer_es.go delete mode 100644 analysis/language/es/analyzer_es_test.go delete mode 100644 analysis/language/es/stemmer_es.go delete mode 100644 analysis/language/es/stop_filter_es.go delete mode 100644 analysis/language/es/stop_words_es.go delete mode 100644 analysis/language/fi/analyzer_fi.go delete mode 100644 analysis/language/fi/analyzer_fi_test.go delete mode 100644 analysis/language/fi/stemmer_fi.go delete mode 100644 analysis/language/fi/stop_filter_fi.go delete mode 100644 analysis/language/fi/stop_words_fi.go delete mode 100644 analysis/language/fr/stemmer_fr.go delete mode 100644 analysis/language/hu/analyzer_hu.go delete mode 100644 analysis/language/hu/analyzer_hu_test.go delete mode 100644 analysis/language/hu/stemmer_hu.go delete mode 100644 analysis/language/hu/stop_filter_hu.go delete mode 100644 analysis/language/hu/stop_words_hu.go delete mode 100644 analysis/language/it/stemmer_it.go delete mode 100644 analysis/language/nl/analyzer_nl.go delete mode 100644 analysis/language/nl/analyzer_nl_test.go delete mode 100644 analysis/language/nl/stemmer_nl.go delete mode 100644 analysis/language/nl/stop_filter_nl.go delete mode 100644 analysis/language/nl/stop_words_nl.go delete mode 100644 analysis/language/no/analyzer_no.go delete mode 100644 analysis/language/no/analyzer_no_test.go delete mode 100644 analysis/language/no/stemmer_no.go delete mode 100644 analysis/language/no/stop_filter_no.go delete mode 100644 analysis/language/no/stop_words_no.go delete mode 100644 analysis/language/porter/stemmer_porter.go delete mode 100644 analysis/language/pt/stemmer_pt.go delete mode 100644 analysis/language/ro/analyzer_ro.go delete mode 100644 analysis/language/ro/analyzer_ro_test.go delete mode 100644 analysis/language/ro/stemmer_ro.go delete mode 100644 analysis/language/ro/stop_filter_ro.go delete mode 100644 analysis/language/ro/stop_words_ro.go delete mode 100644 analysis/language/ru/analyzer_ru.go delete mode 100644 analysis/language/ru/analyzer_ru_test.go delete mode 100644 analysis/language/ru/stemmer_ru.go delete mode 100644 analysis/language/ru/stop_filter_ru.go delete mode 100644 analysis/language/ru/stop_words_ru.go delete mode 100644 analysis/language/sv/analyzer_sv.go delete mode 100644 analysis/language/sv/analyzer_sv_test.go delete mode 100644 analysis/language/sv/stemmer_sv.go delete mode 100644 analysis/language/sv/stop_filter_sv.go delete mode 100644 analysis/language/sv/stop_words_sv.go delete mode 100644 analysis/language/th/analyzer_th.go delete mode 100644 analysis/language/th/analyzer_th_test.go delete mode 100644 analysis/language/th/stop_filter_th.go delete mode 100644 analysis/language/th/stop_words_th.go delete mode 100644 analysis/language/th/unicode_tokenizer_th.go delete mode 100644 analysis/language/tr/analyzer_tr.go delete mode 100644 analysis/language/tr/analyzer_tr_test.go delete mode 100644 analysis/language/tr/stemmer_tr.go delete mode 100644 analysis/language/tr/stop_filter_tr.go delete mode 100644 analysis/language/tr/stop_words_tr.go delete mode 100644 analysis/token_filters/cld2/README.md delete mode 100644 analysis/token_filters/cld2/cld2_filter.cc delete mode 100644 analysis/token_filters/cld2/cld2_filter.go delete mode 100644 analysis/token_filters/cld2/cld2_filter_test.go delete mode 100755 analysis/token_filters/cld2/compile_cld2.sh delete mode 100644 analysis/token_filters/stemmer_filter/README.md delete mode 100644 analysis/token_filters/stemmer_filter/stemmer_filter.go delete mode 100644 analysis/token_filters/stemmer_filter/stemmer_filter_test.go delete mode 100644 analysis/tokenizers/icu/boundary.go delete mode 100644 analysis/tokenizers/icu/boundary_test.go create mode 100644 config/README.md create mode 100644 config/config.go rename analysis/token_filters/cld2/cld2_filter.h => config/config_cld2.go (76%) rename config_cznicb.go => config/config_cznicb.go (82%) rename config_forestdb.go => config/config_forestdb.go (81%) rename config_icu.go => config/config_icu.go (83%) rename config_kagome.go => config/config_kagome.go (92%) rename config_leveldb.go => config/config_leveldb.go (74%) create mode 100644 config/config_libstemmer.go rename config_rocksdb.go => config/config_rocksdb.go (81%) delete mode 100644 config_cld2.go delete mode 100644 config_stemmer.go delete mode 100644 index/store/cznicb/batch.go delete mode 100644 index/store/cznicb/cznicb.go delete mode 100644 index/store/cznicb/cznicb_test.go delete mode 100644 index/store/cznicb/iterator.go delete mode 100644 index/store/cznicb/reader.go delete mode 100644 index/store/cznicb/writer.go delete mode 100644 index/store/forestdb/batch.go delete mode 100644 index/store/forestdb/iterator.go delete mode 100644 index/store/forestdb/reader.go delete mode 100644 index/store/forestdb/store.go delete mode 100644 index/store/forestdb/store_test.go delete mode 100644 index/store/forestdb/writer.go delete mode 100644 index/store/gorocksdb/batch.go delete mode 100644 index/store/gorocksdb/iterator.go delete mode 100644 index/store/gorocksdb/reader.go delete mode 100644 index/store/gorocksdb/store.go delete mode 100644 index/store/gorocksdb/store_test.go delete mode 100644 index/store/gorocksdb/util.go delete mode 100644 index/store/gorocksdb/writer.go delete mode 100644 index/store/leveldb/batch.go delete mode 100644 index/store/leveldb/iterator.go delete mode 100644 index/store/leveldb/reader.go delete mode 100644 index/store/leveldb/store.go delete mode 100644 index/store/leveldb/store_test.go delete mode 100644 index/store/leveldb/util.go delete mode 100644 index/store/leveldb/writer.go create mode 100644 search/highlight/highlighters/ansi/ansi.go create mode 100644 search/highlight/highlighters/html/html.go diff --git a/analysis/analyzers/detect_lang_analyzer/detect_lang_analyzer.go b/analysis/analyzers/detect_lang_analyzer/detect_lang_analyzer.go deleted file mode 100644 index cb703e6a..00000000 --- a/analysis/analyzers/detect_lang_analyzer/detect_lang_analyzer.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build cld2 full - -package detect_lang_analyzer - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/cld2" - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/single_token" - "github.com/blevesearch/bleve/registry" -) - -const Name = "detect_lang" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - keywordTokenizer, err := cache.TokenizerNamed(single_token.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - detectLangFilter, err := cache.TokenFilterNamed(cld2.Name) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: keywordTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - detectLangFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(Name, AnalyzerConstructor) -} diff --git a/analysis/byte_array_converters/ignore/ignore_byte_array_converter.go b/analysis/byte_array_converters/ignore/ignore_byte_array_converter.go index 84695f32..7e9eb28d 100644 --- a/analysis/byte_array_converters/ignore/ignore_byte_array_converter.go +++ b/analysis/byte_array_converters/ignore/ignore_byte_array_converter.go @@ -14,6 +14,8 @@ import ( "github.com/blevesearch/bleve/registry" ) +const Name = "ignore" + type IgnoreByteArrayConverter struct{} func NewIgnoreByteArrayConverter() *IgnoreByteArrayConverter { @@ -29,5 +31,5 @@ func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis } func init() { - registry.RegisterByteArrayConverter("ignore", Constructor) + registry.RegisterByteArrayConverter(Name, Constructor) } diff --git a/analysis/byte_array_converters/json/json_byte_array_converter.go b/analysis/byte_array_converters/json/json_byte_array_converter.go index 138cf982..e07fa4f9 100644 --- a/analysis/byte_array_converters/json/json_byte_array_converter.go +++ b/analysis/byte_array_converters/json/json_byte_array_converter.go @@ -16,6 +16,8 @@ import ( "github.com/blevesearch/bleve/registry" ) +const Name = "json" + type JSONByteArrayConverter struct{} func NewJSONByteArrayConverter() *JSONByteArrayConverter { @@ -36,5 +38,5 @@ func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis } func init() { - registry.RegisterByteArrayConverter("json", Constructor) + registry.RegisterByteArrayConverter(Name, Constructor) } diff --git a/analysis/byte_array_converters/string/string_byte_array_conveter.go b/analysis/byte_array_converters/string/string_byte_array_conveter.go index 6ab5f658..e3936f97 100644 --- a/analysis/byte_array_converters/string/string_byte_array_conveter.go +++ b/analysis/byte_array_converters/string/string_byte_array_conveter.go @@ -14,6 +14,8 @@ import ( "github.com/blevesearch/bleve/registry" ) +const Name = "string" + type StringByteArrayConverter struct{} func NewStringByteArrayConverter() *StringByteArrayConverter { @@ -29,5 +31,5 @@ func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis } func init() { - registry.RegisterByteArrayConverter("string", Constructor) + registry.RegisterByteArrayConverter(Name, Constructor) } diff --git a/analysis/datetime_parsers/datetime_optional/datetime_optional.go b/analysis/datetime_parsers/datetime_optional/datetime_optional.go index d812c4c0..44a02e76 100644 --- a/analysis/datetime_parsers/datetime_optional/datetime_optional.go +++ b/analysis/datetime_parsers/datetime_optional/datetime_optional.go @@ -7,7 +7,7 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package html_char_filter +package datetime_optional import ( "time" diff --git a/analysis/language/ckb/analyzer_ckb.go b/analysis/language/ckb/analyzer_ckb.go index c33703a4..8e00ca00 100644 --- a/analysis/language/ckb/analyzer_ckb.go +++ b/analysis/language/ckb/analyzer_ckb.go @@ -7,21 +7,19 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build icu full - package ckb import ( "github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" + "github.com/blevesearch/bleve/analysis/tokenizers/unicode" "github.com/blevesearch/bleve/registry" ) const AnalyzerName = "ckb" func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) + unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name) if err != nil { return nil, err } @@ -42,7 +40,7 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) ( return nil, err } rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, + Tokenizer: unicodeTokenizer, TokenFilters: []analysis.TokenFilter{ normCkbFilter, toLowerFilter, diff --git a/analysis/language/ckb/analyzer_ckb_test.go b/analysis/language/ckb/analyzer_ckb_test.go index 4c73801c..2eff9ea3 100644 --- a/analysis/language/ckb/analyzer_ckb_test.go +++ b/analysis/language/ckb/analyzer_ckb_test.go @@ -7,8 +7,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build icu full - package ckb import ( diff --git a/analysis/language/da/analyzer_da.go b/analysis/language/da/analyzer_da.go deleted file mode 100644 index a348e07c..00000000 --- a/analysis/language/da/analyzer_da.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package da - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" - "github.com/blevesearch/bleve/registry" -) - -const AnalyzerName = "da" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopDaFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerDaFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopDaFilter, - stemmerDaFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/da/analyzer_da_test.go b/analysis/language/da/analyzer_da_test.go deleted file mode 100644 index d37af0fc..00000000 --- a/analysis/language/da/analyzer_da_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package da - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestDanishAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("undersøg"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("undersøg"), - Position: 1, - Start: 0, - End: 9, - }, - }, - }, - { - input: []byte("undersøgelse"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("undersøg"), - Position: 1, - Start: 0, - End: 13, - }, - }, - }, - // stop word - { - input: []byte("på"), - output: analysis.TokenStream{}, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if !reflect.DeepEqual(actual, test.output) { - t.Errorf("expected %v, got %v", test.output, actual) - } - } -} diff --git a/analysis/language/da/stemmer_da.go b/analysis/language/da/stemmer_da.go deleted file mode 100644 index 56e6ba02..00000000 --- a/analysis/language/da/stemmer_da.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package da - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_da" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("da") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/da/stop_filter_da.go b/analysis/language/da/stop_filter_da.go deleted file mode 100644 index 7989c6ad..00000000 --- a/analysis/language/da/stop_filter_da.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package da - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/da/stop_words_da.go b/analysis/language/da/stop_words_da.go deleted file mode 100644 index 63a407a0..00000000 --- a/analysis/language/da/stop_words_da.go +++ /dev/null @@ -1,134 +0,0 @@ -package da - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_da" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var DanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -på | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -når | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -også | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sådan | such, like this/like that -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(DanishStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/de/analyzer_de.go b/analysis/language/de/analyzer_de.go deleted file mode 100644 index 1f0169ac..00000000 --- a/analysis/language/de/analyzer_de.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package de - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" - "github.com/blevesearch/bleve/registry" -) - -const AnalyzerName = "de" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopDeFilter, err := cache.TokenFilterNamed(NormalizeName) - if err != nil { - return nil, err - } - normalizeDeFilter, err := cache.TokenFilterNamed(NormalizeName) - if err != nil { - return nil, err - } - stemmerDeFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopDeFilter, - normalizeDeFilter, - stemmerDeFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/de/analyzer_de_test.go b/analysis/language/de/analyzer_de_test.go deleted file mode 100644 index 0d4bf55c..00000000 --- a/analysis/language/de/analyzer_de_test.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package de - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestGermanAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - { - input: []byte("Tisch"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("tisch"), - Position: 1, - Start: 0, - End: 5, - }, - }, - }, - { - input: []byte("Tische"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("tisch"), - Position: 1, - Start: 0, - End: 6, - }, - }, - }, - { - input: []byte("Tischen"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("tisch"), - Position: 1, - Start: 0, - End: 7, - }, - }, - }, - // german specials - { - input: []byte("Schaltflächen"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("schaltflach"), - Position: 1, - Start: 0, - End: 14, - }, - }, - }, - { - input: []byte("Schaltflaechen"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("schaltflach"), - Position: 1, - Start: 0, - End: 14, - }, - }, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if !reflect.DeepEqual(actual, test.output) { - t.Errorf("expected %v, got %v", test.output, actual) - } - } -} diff --git a/analysis/language/de/german_normalize.go b/analysis/language/de/german_normalize.go deleted file mode 100644 index 47cc97b4..00000000 --- a/analysis/language/de/german_normalize.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package de - -import ( - "bytes" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const NormalizeName = "normalize_de" - -const ( - N = 0 /* ordinary state */ - V = 1 /* stops 'u' from entering umlaut state */ - U = 2 /* umlaut state, allows e-deletion */ -) - -type GermanNormalizeFilter struct { -} - -func NewGermanNormalizeFilter() *GermanNormalizeFilter { - return &GermanNormalizeFilter{} -} - -func (s *GermanNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - for _, token := range input { - term := normalize(token.Term) - token.Term = term - } - return input -} - -func normalize(input []byte) []byte { - state := N - runes := bytes.Runes(input) - for i := 0; i < len(runes); i++ { - switch runes[i] { - case 'a', 'o': - state = U - case 'u': - if state == N { - state = U - } else { - state = V - } - case 'e': - if state == U { - runes = analysis.DeleteRune(runes, i) - i-- - } - state = V - case 'i', 'q', 'y': - state = V - case 'ä': - runes[i] = 'a' - state = V - case 'ö': - runes[i] = 'o' - state = V - case 'ü': - runes[i] = 'u' - state = V - case 'ß': - runes[i] = 's' - i++ - // newrunes := make([]rune, len(runes)+1) - // copy(newrunes, runes) - // runes = newrunes - // runes[i] = 's' - runes = analysis.InsertRune(runes, i, 's') - state = N - default: - state = N - } - } - return analysis.BuildTermFromRunes(runes) -} - -func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return NewGermanNormalizeFilter(), nil -} - -func init() { - registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor) -} diff --git a/analysis/language/de/german_normalize_test.go b/analysis/language/de/german_normalize_test.go deleted file mode 100644 index 8315a192..00000000 --- a/analysis/language/de/german_normalize_test.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package de - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" -) - -func TestGermanNormalizeFilter(t *testing.T) { - tests := []struct { - input analysis.TokenStream - output analysis.TokenStream - }{ - // Tests that a/o/u + e is equivalent to the umlaut form - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("Schaltflächen"), - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("Schaltflachen"), - }, - }, - }, - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("Schaltflaechen"), - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("Schaltflachen"), - }, - }, - }, - // Tests the specific heuristic that ue is not folded after a vowel or q. - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("dauer"), - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("dauer"), - }, - }, - }, - // Tests german specific folding of sharp-s - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("weißbier"), - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("weissbier"), - }, - }, - }, - // empty - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte(""), - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte(""), - }, - }, - }, - } - - germanNormalizeFilter := NewGermanNormalizeFilter() - for _, test := range tests { - actual := germanNormalizeFilter.Filter(test.input) - if !reflect.DeepEqual(actual, test.output) { - t.Errorf("expected %#v, got %#v", test.output, actual) - t.Errorf("expected %s(% x), got %s(% x)", test.output[0].Term, test.output[0].Term, actual[0].Term, actual[0].Term) - } - } -} diff --git a/analysis/language/de/stemmer_de.go b/analysis/language/de/stemmer_de.go deleted file mode 100644 index a37c7159..00000000 --- a/analysis/language/de/stemmer_de.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package de - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_de" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("de") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/de/stop_filter_de.go b/analysis/language/de/stop_filter_de.go deleted file mode 100644 index 2824cd19..00000000 --- a/analysis/language/de/stop_filter_de.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package de - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/de/stop_words_de.go b/analysis/language/de/stop_words_de.go deleted file mode 100644 index b71c8f70..00000000 --- a/analysis/language/de/stop_words_de.go +++ /dev/null @@ -1,318 +0,0 @@ -package de - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_de" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var GermanStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daß | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(GermanStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/en/stemmer_en.go b/analysis/language/en/stemmer_en.go deleted file mode 100644 index 695b6381..00000000 --- a/analysis/language/en/stemmer_en.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package en - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_en" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("en") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/en/stemmer_en_test.go b/analysis/language/en/stemmer_en_test.go deleted file mode 100644 index 6506968e..00000000 --- a/analysis/language/en/stemmer_en_test.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package en - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestEnglishStemmer(t *testing.T) { - tests := []struct { - input analysis.TokenStream - output analysis.TokenStream - }{ - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("walking"), - }, - &analysis.Token{ - Term: []byte("talked"), - }, - &analysis.Token{ - Term: []byte("business"), - }, - &analysis.Token{ - Term: []byte("protected"), - KeyWord: true, - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("walk"), - }, - &analysis.Token{ - Term: []byte("talk"), - }, - &analysis.Token{ - Term: []byte("busi"), - }, - &analysis.Token{ - Term: []byte("protected"), - KeyWord: true, - }, - }, - }, - } - - cache := registry.NewCache() - stemmerFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := stemmerFilter.Filter(test.input) - if !reflect.DeepEqual(actual, test.output) { - t.Errorf("expected %s, got %s", test.output, actual) - } - } -} diff --git a/analysis/language/es/analyzer_es.go b/analysis/language/es/analyzer_es.go deleted file mode 100644 index bcc39891..00000000 --- a/analysis/language/es/analyzer_es.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package es - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "es" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopEsFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerEsFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopEsFilter, - stemmerEsFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/es/analyzer_es_test.go b/analysis/language/es/analyzer_es_test.go deleted file mode 100644 index 67b31b2f..00000000 --- a/analysis/language/es/analyzer_es_test.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package es - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestSpanishAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("chicana"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("chican"), - Position: 1, - Start: 0, - End: 7, - }, - }, - }, - { - input: []byte("chicano"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("chican"), - Position: 1, - Start: 0, - End: 7, - }, - }, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if !reflect.DeepEqual(actual, test.output) { - t.Errorf("expected %v, got %v", test.output, actual) - } - } -} diff --git a/analysis/language/es/stemmer_es.go b/analysis/language/es/stemmer_es.go deleted file mode 100644 index 6b4c64ad..00000000 --- a/analysis/language/es/stemmer_es.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package es - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_es" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("es") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/es/stop_filter_es.go b/analysis/language/es/stop_filter_es.go deleted file mode 100644 index 0a950544..00000000 --- a/analysis/language/es/stop_filter_es.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package es - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/es/stop_words_es.go b/analysis/language/es/stop_words_es.go deleted file mode 100644 index b699daf4..00000000 --- a/analysis/language/es/stop_words_es.go +++ /dev/null @@ -1,380 +0,0 @@ -package es - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_es" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var SpanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sí | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | había from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mí | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mío | mine -mía | -míos | -mías | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(SpanishStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/fa/analyzer_fa.go b/analysis/language/fa/analyzer_fa.go index 7d9a525c..a0121323 100644 --- a/analysis/language/fa/analyzer_fa.go +++ b/analysis/language/fa/analyzer_fa.go @@ -7,8 +7,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build icu full - package fa import ( @@ -18,7 +16,7 @@ import ( "github.com/blevesearch/bleve/analysis/char_filters/zero_width_non_joiner" "github.com/blevesearch/bleve/analysis/language/ar" "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" + "github.com/blevesearch/bleve/analysis/tokenizers/unicode" ) const AnalyzerName = "fa" @@ -28,7 +26,7 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) ( if err != nil { return nil, err } - icuTokenizer, err := cache.TokenizerNamed(icu.Name) + unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name) if err != nil { return nil, err } @@ -52,7 +50,7 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) ( CharFilters: []analysis.CharFilter{ zFilter, }, - Tokenizer: icuTokenizer, + Tokenizer: unicodeTokenizer, TokenFilters: []analysis.TokenFilter{ toLowerFilter, normArFilter, diff --git a/analysis/language/fa/analyzer_fa_test.go b/analysis/language/fa/analyzer_fa_test.go index 1f95a53d..271b8020 100644 --- a/analysis/language/fa/analyzer_fa_test.go +++ b/analysis/language/fa/analyzer_fa_test.go @@ -7,8 +7,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build icu full - package fa import ( diff --git a/analysis/language/fi/analyzer_fi.go b/analysis/language/fi/analyzer_fi.go deleted file mode 100644 index 909c54e5..00000000 --- a/analysis/language/fi/analyzer_fi.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package fi - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "fi" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopFiFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerFiFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopFiFilter, - stemmerFiFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/fi/analyzer_fi_test.go b/analysis/language/fi/analyzer_fi_test.go deleted file mode 100644 index faaa03dd..00000000 --- a/analysis/language/fi/analyzer_fi_test.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package fi - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestFinishAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("edeltäjiinsä"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("edeltäj"), - }, - }, - }, - { - input: []byte("edeltäjistään"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("edeltäj"), - }, - }, - }, - // stop word - { - input: []byte("olla"), - output: analysis.TokenStream{}, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/fi/stemmer_fi.go b/analysis/language/fi/stemmer_fi.go deleted file mode 100644 index 454647b5..00000000 --- a/analysis/language/fi/stemmer_fi.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package fi - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_fi" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("fi") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/fi/stop_filter_fi.go b/analysis/language/fi/stop_filter_fi.go deleted file mode 100644 index 3c5a2f0d..00000000 --- a/analysis/language/fi/stop_filter_fi.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package fi - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/fi/stop_words_fi.go b/analysis/language/fi/stop_words_fi.go deleted file mode 100644 index 7cf0c9c1..00000000 --- a/analysis/language/fi/stop_words_fi.go +++ /dev/null @@ -1,121 +0,0 @@ -package fi - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_fi" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var FinnishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(FinnishStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/fr/stemmer_fr.go b/analysis/language/fr/stemmer_fr.go deleted file mode 100644 index a5edccb7..00000000 --- a/analysis/language/fr/stemmer_fr.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package fr - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_fr" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("fr") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/hu/analyzer_hu.go b/analysis/language/hu/analyzer_hu.go deleted file mode 100644 index cc85f125..00000000 --- a/analysis/language/hu/analyzer_hu.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package hu - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "hu" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopHuFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerHuFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopHuFilter, - stemmerHuFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/hu/analyzer_hu_test.go b/analysis/language/hu/analyzer_hu_test.go deleted file mode 100644 index 5395b371..00000000 --- a/analysis/language/hu/analyzer_hu_test.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package hu - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestHungarianAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("babakocsi"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("babakocs"), - }, - }, - }, - { - input: []byte("babakocsijáért"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("babakocs"), - }, - }, - }, - // stop word - { - input: []byte("által"), - output: analysis.TokenStream{}, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/hu/stemmer_hu.go b/analysis/language/hu/stemmer_hu.go deleted file mode 100644 index a73c5ec1..00000000 --- a/analysis/language/hu/stemmer_hu.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package hu - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_hu" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("hu") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/hu/stop_filter_hu.go b/analysis/language/hu/stop_filter_hu.go deleted file mode 100644 index 5e37a9a9..00000000 --- a/analysis/language/hu/stop_filter_hu.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package hu - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/hu/stop_words_hu.go b/analysis/language/hu/stop_words_hu.go deleted file mode 100644 index fe45d55e..00000000 --- a/analysis/language/hu/stop_words_hu.go +++ /dev/null @@ -1,235 +0,0 @@ -package hu - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_hu" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var HungarianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(HungarianStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/it/stemmer_it.go b/analysis/language/it/stemmer_it.go deleted file mode 100644 index d979f388..00000000 --- a/analysis/language/it/stemmer_it.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package it - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_it" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("it") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/ja/analyzer_ja.go b/analysis/language/ja/analyzer_ja.go index 2592ba0d..8d52586f 100644 --- a/analysis/language/ja/analyzer_ja.go +++ b/analysis/language/ja/analyzer_ja.go @@ -7,8 +7,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build kagome full - package ja import ( diff --git a/analysis/language/ja/analyzer_ja_test.go b/analysis/language/ja/analyzer_ja_test.go index d88d8a13..91031cc7 100644 --- a/analysis/language/ja/analyzer_ja_test.go +++ b/analysis/language/ja/analyzer_ja_test.go @@ -7,8 +7,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build kagome full - package ja import ( diff --git a/analysis/language/ja/ja_morph_kagome.go b/analysis/language/ja/ja_morph_kagome.go index 1eec87da..e0b7cc81 100644 --- a/analysis/language/ja/ja_morph_kagome.go +++ b/analysis/language/ja/ja_morph_kagome.go @@ -7,8 +7,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build kagome full - package ja import ( diff --git a/analysis/language/ja/ja_morph_kagome_test.go b/analysis/language/ja/ja_morph_kagome_test.go index be8d4959..d5ccc3d5 100644 --- a/analysis/language/ja/ja_morph_kagome_test.go +++ b/analysis/language/ja/ja_morph_kagome_test.go @@ -7,8 +7,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build kagome full - package ja import ( diff --git a/analysis/language/nl/analyzer_nl.go b/analysis/language/nl/analyzer_nl.go deleted file mode 100644 index 009ea3d8..00000000 --- a/analysis/language/nl/analyzer_nl.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package nl - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "nl" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopNlFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerNlFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopNlFilter, - stemmerNlFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/nl/analyzer_nl_test.go b/analysis/language/nl/analyzer_nl_test.go deleted file mode 100644 index f0eb7eb0..00000000 --- a/analysis/language/nl/analyzer_nl_test.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package nl - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestDutchAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("lichamelijk"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("licham"), - }, - }, - }, - { - input: []byte("lichamelijke"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("licham"), - }, - }, - }, - // stop word - { - input: []byte("van"), - output: analysis.TokenStream{}, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/nl/stemmer_nl.go b/analysis/language/nl/stemmer_nl.go deleted file mode 100644 index e6613f55..00000000 --- a/analysis/language/nl/stemmer_nl.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package nl - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_nl" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("nl") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/nl/stop_filter_nl.go b/analysis/language/nl/stop_filter_nl.go deleted file mode 100644 index 8be87313..00000000 --- a/analysis/language/nl/stop_filter_nl.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package nl - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/nl/stop_words_nl.go b/analysis/language/nl/stop_words_nl.go deleted file mode 100644 index 4adae100..00000000 --- a/analysis/language/nl/stop_words_nl.go +++ /dev/null @@ -1,143 +0,0 @@ -package nl - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_nl" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var DutchStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(DutchStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/no/analyzer_no.go b/analysis/language/no/analyzer_no.go deleted file mode 100644 index aa62927f..00000000 --- a/analysis/language/no/analyzer_no.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package no - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "no" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopNoFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerNoFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopNoFilter, - stemmerNoFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/no/analyzer_no_test.go b/analysis/language/no/analyzer_no_test.go deleted file mode 100644 index c3026f06..00000000 --- a/analysis/language/no/analyzer_no_test.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package no - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestNorwegianAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("havnedistriktene"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("havnedistrikt"), - }, - }, - }, - { - input: []byte("havnedistrikter"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("havnedistrikt"), - }, - }, - }, - // stop word - { - input: []byte("det"), - output: analysis.TokenStream{}, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/no/stemmer_no.go b/analysis/language/no/stemmer_no.go deleted file mode 100644 index e2b60683..00000000 --- a/analysis/language/no/stemmer_no.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package no - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_no" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("no") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/no/stop_filter_no.go b/analysis/language/no/stop_filter_no.go deleted file mode 100644 index f07b91ba..00000000 --- a/analysis/language/no/stop_filter_no.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package no - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/no/stop_words_no.go b/analysis/language/no/stop_words_no.go deleted file mode 100644 index bfca3484..00000000 --- a/analysis/language/no/stop_words_no.go +++ /dev/null @@ -1,218 +0,0 @@ -package no - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_no" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var NorwegianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard , Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -på | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -så | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nå | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -når | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -å | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sånn | such a -inni | inside/within -mellom | between -vår | our -hver | each -hvem | who -vors | us/ours -hvis | whose -både | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -også | also -slik | just -vært | been -være | to be -båe | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -då | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjå | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(NorwegianStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/porter/stemmer_porter.go b/analysis/language/porter/stemmer_porter.go deleted file mode 100644 index 3e9dca42..00000000 --- a/analysis/language/porter/stemmer_porter.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package porter - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_porter_classic" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("porter") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/pt/stemmer_pt.go b/analysis/language/pt/stemmer_pt.go deleted file mode 100644 index a10c869c..00000000 --- a/analysis/language/pt/stemmer_pt.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package pt - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_pt" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("pt") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/ro/analyzer_ro.go b/analysis/language/ro/analyzer_ro.go deleted file mode 100644 index d4d5c1dd..00000000 --- a/analysis/language/ro/analyzer_ro.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package ro - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "ro" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopRoFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerRoFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopRoFilter, - stemmerRoFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/ro/analyzer_ro_test.go b/analysis/language/ro/analyzer_ro_test.go deleted file mode 100644 index 6715b905..00000000 --- a/analysis/language/ro/analyzer_ro_test.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package ro - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestRomanianAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("absenţa"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("absenţ"), - }, - }, - }, - { - input: []byte("absenţi"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("absenţ"), - }, - }, - }, - // stop word - { - input: []byte("îl"), - output: analysis.TokenStream{}, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/ro/stemmer_ro.go b/analysis/language/ro/stemmer_ro.go deleted file mode 100644 index da18aa66..00000000 --- a/analysis/language/ro/stemmer_ro.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package ro - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_ro" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("ro") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/ro/stop_filter_ro.go b/analysis/language/ro/stop_filter_ro.go deleted file mode 100644 index f9259fc5..00000000 --- a/analysis/language/ro/stop_filter_ro.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package ro - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/ro/stop_words_ro.go b/analysis/language/ro/stop_words_ro.go deleted file mode 100644 index e7d62d41..00000000 --- a/analysis/language/ro/stop_words_ro.go +++ /dev/null @@ -1,257 +0,0 @@ -package ro - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_ro" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/ -// ` was changed to ' to allow for literal string - -var RomanianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(RomanianStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/ru/analyzer_ru.go b/analysis/language/ru/analyzer_ru.go deleted file mode 100644 index 2aaff852..00000000 --- a/analysis/language/ru/analyzer_ru.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package ru - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "ru" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopRuFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerRuFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopRuFilter, - stemmerRuFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/ru/analyzer_ru_test.go b/analysis/language/ru/analyzer_ru_test.go deleted file mode 100644 index eba2f1d1..00000000 --- a/analysis/language/ru/analyzer_ru_test.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package ru - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestRussianAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // digits safe - { - input: []byte("text 1000"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("text"), - }, - &analysis.Token{ - Term: []byte("1000"), - }, - }, - }, - { - input: []byte("Вместе с тем о силе электромагнитной энергии имели представление еще"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("вмест"), - }, - &analysis.Token{ - Term: []byte("сил"), - }, - &analysis.Token{ - Term: []byte("электромагнитн"), - }, - &analysis.Token{ - Term: []byte("энерг"), - }, - &analysis.Token{ - Term: []byte("имел"), - }, - &analysis.Token{ - Term: []byte("представлен"), - }, - }, - }, - { - input: []byte("Но знание это хранилось в тайне"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("знан"), - }, - &analysis.Token{ - Term: []byte("эт"), - }, - &analysis.Token{ - Term: []byte("хран"), - }, - &analysis.Token{ - Term: []byte("тайн"), - }, - }, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/ru/stemmer_ru.go b/analysis/language/ru/stemmer_ru.go deleted file mode 100644 index 899d816e..00000000 --- a/analysis/language/ru/stemmer_ru.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package ru - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_ru" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("ru") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/ru/stop_filter_ru.go b/analysis/language/ru/stop_filter_ru.go deleted file mode 100644 index a7fe18a2..00000000 --- a/analysis/language/ru/stop_filter_ru.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package ru - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/ru/stop_words_ru.go b/analysis/language/ru/stop_words_ru.go deleted file mode 100644 index 0129f48c..00000000 --- a/analysis/language/ru/stop_words_ru.go +++ /dev/null @@ -1,267 +0,0 @@ -package ru - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_ru" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var RussianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter 'ё' is translated to 'е'. - -и | and -в | in/into -во | alternative form -не | not -что | what/that -он | he -на | on/onto -я | i -с | from -со | alternative form -как | how -а | milder form of 'no' (but) -то | conjunction and form of 'that' -все | all -она | she -так | so, thus -его | him -но | but -да | yes/and -ты | thou -к | towards, by -у | around, chez -же | intensifier particle -вы | you -за | beyond, behind -бы | conditional/subj. particle -по | up to, along -только | only -ее | her -мне | to me -было | it was -вот | here is/are, particle -от | away from -меня | me -еще | still, yet, more -нет | no, there isnt/arent -о | about -из | out of -ему | to him -теперь | now -когда | when -даже | even -ну | so, well -вдруг | suddenly -ли | interrogative particle -если | if -уже | already, but homonym of 'narrower' -или | or -ни | neither -быть | to be -был | he was -него | prepositional form of его -до | up to -вас | you accusative -нибудь | indef. suffix preceded by hyphen -опять | again -уж | already, but homonym of 'adder' -вам | to you -сказал | he said -ведь | particle 'after all' -там | there -потом | then -себя | oneself -ничего | nothing -ей | to her -может | usually with 'быть' as 'maybe' -они | they -тут | here -где | where -есть | there is/are -надо | got to, must -ней | prepositional form of ей -для | for -мы | we -тебя | thee -их | them, their -чем | than -была | she was -сам | self -чтоб | in order to -без | without -будто | as if -человек | man, person, one -чего | genitive form of 'what' -раз | once -тоже | also -себе | to oneself -под | beneath -жизнь | life -будет | will be -ж | short form of intensifer particle 'же' -тогда | then -кто | who -этот | this -говорил | was saying -того | genitive form of 'that' -потому | for that reason -этого | genitive form of 'this' -какой | which -совсем | altogether -ним | prepositional form of 'его', 'они' -здесь | here -этом | prepositional form of 'этот' -один | one -почти | almost -мой | my -тем | instrumental/dative plural of 'тот', 'то' -чтобы | full form of 'in order that' -нее | her (acc.) -кажется | it seems -сейчас | now -были | they were -куда | where to -зачем | why -сказать | to say -всех | all (acc., gen. preposn. plural) -никогда | never -сегодня | today -можно | possible, one can -при | by -наконец | finally -два | two -об | alternative form of 'о', about -другой | another -хоть | even -после | after -над | above -больше | more -тот | that one (masc.) -через | across, in -эти | these -нас | us -про | about -всего | in all, only, of all -них | prepositional form of 'они' (they) -какая | which, feminine -много | lots -разве | interrogative particle -сказала | she said -три | three -эту | this, acc. fem. sing. -моя | my, feminine -впрочем | moreover, besides -хорошо | good -свою | ones own, acc. fem. sing. -этой | oblique form of 'эта', fem. 'this' -перед | in front of -иногда | sometimes -лучше | better -чуть | a little -том | preposn. form of 'that one' -нельзя | one must not -такой | such a one -им | to them -более | more -всегда | always -конечно | of course -всю | acc. fem. sing of 'all' -между | between - - - | b: some paradigms - | - | personal pronouns - | - | я меня мне мной [мною] - | ты тебя тебе тобой [тобою] - | он его ему им [него, нему, ним] - | она ее эи ею [нее, нэи, нею] - | оно его ему им [него, нему, ним] - | - | мы нас нам нами - | вы вас вам вами - | они их им ими [них, ним, ними] - | - | себя себе собой [собою] - | - | demonstrative pronouns: этот (this), тот (that) - | - | этот эта это эти - | этого эты это эти - | этого этой этого этих - | этому этой этому этим - | этим этой этим [этою] этими - | этом этой этом этих - | - | тот та то те - | того ту то те - | того той того тех - | тому той тому тем - | тем той тем [тою] теми - | том той том тех - | - | determinative pronouns - | - | (a) весь (all) - | - | весь вся все все - | всего всю все все - | всего всей всего всех - | всему всей всему всем - | всем всей всем [всею] всеми - | всем всей всем всех - | - | (b) сам (himself etc) - | - | сам сама само сами - | самого саму само самих - | самого самой самого самих - | самому самой самому самим - | самим самой самим [самою] самими - | самом самой самом самих - | - | stems of verbs 'to be', 'to have', 'to do' and modal - | - | быть бы буд быв есть суть - | име - | дел - | мог мож мочь - | уме - | хоч хот - | долж - | можн - | нужн - | нельзя - -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(RussianStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/sv/analyzer_sv.go b/analysis/language/sv/analyzer_sv.go deleted file mode 100644 index 252b1745..00000000 --- a/analysis/language/sv/analyzer_sv.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package sv - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "sv" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopSvFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerSvFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopSvFilter, - stemmerSvFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/sv/analyzer_sv_test.go b/analysis/language/sv/analyzer_sv_test.go deleted file mode 100644 index 4b969572..00000000 --- a/analysis/language/sv/analyzer_sv_test.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package sv - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestSwedishAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("jaktkarlarne"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("jaktkarl"), - }, - }, - }, - { - input: []byte("jaktkarlens"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("jaktkarl"), - }, - }, - }, - // stop word - { - input: []byte("och"), - output: analysis.TokenStream{}, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/sv/stemmer_sv.go b/analysis/language/sv/stemmer_sv.go deleted file mode 100644 index 67661307..00000000 --- a/analysis/language/sv/stemmer_sv.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package sv - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_sv" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("sv") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/sv/stop_filter_sv.go b/analysis/language/sv/stop_filter_sv.go deleted file mode 100644 index e8930aac..00000000 --- a/analysis/language/sv/stop_filter_sv.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package sv - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/sv/stop_words_sv.go b/analysis/language/sv/stop_words_sv.go deleted file mode 100644 index b4022fd9..00000000 --- a/analysis/language/sv/stop_words_sv.go +++ /dev/null @@ -1,157 +0,0 @@ -package sv - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_sv" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var SwedishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | så = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -på | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -så | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -då | then, when -sin | his -nu | now -har | have -inte | inte någon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -något | some etc -från | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -någon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -åt | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -några | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sådan | such a -vår | our -blivit | from bli -dess | its -inom | within -mellan | between -sådant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sådana | such a -vart | each -dina | thy -vars | whose -vårt | our -våra | our -ert | your -era | your -vilkas | whose - -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(SwedishStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/th/analyzer_th.go b/analysis/language/th/analyzer_th.go deleted file mode 100644 index a824954a..00000000 --- a/analysis/language/th/analyzer_th.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build icu full - -package th - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" -) - -const AnalyzerName = "th" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - unicodeTokenizer, err := cache.TokenizerNamed(TokenizerName) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopThFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: unicodeTokenizer, - TokenFilters: []analysis.TokenFilter{ - toLowerFilter, - stopThFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/th/analyzer_th_test.go b/analysis/language/th/analyzer_th_test.go deleted file mode 100644 index 4d62e35f..00000000 --- a/analysis/language/th/analyzer_th_test.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build icu full - -package th - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -// tried to adapt these from the lucene tests, most of which either -// use the empty stop dictionary or the english one. - -func TestThaiAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stop words - { - input: []byte("การที่ได้ต้องแสดงว่างานดี"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("แสดง"), - Position: 5, - Start: 39, - End: 51, - }, - &analysis.Token{ - Term: []byte("งาน"), - Position: 7, - Start: 60, - End: 69, - }, - &analysis.Token{ - Term: []byte("ดี"), - Position: 8, - Start: 69, - End: 75, - }, - }, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if !reflect.DeepEqual(actual, test.output) { - t.Errorf("expected %v, got %v", test.output, actual) - } - } -} - -func TestThaiAnalyzerWihtoutOffsets(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stop words - { - input: []byte("บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("บริษัท"), - }, - &analysis.Token{ - Term: []byte("ชื่อ"), - }, - &analysis.Token{ - Term: []byte("xy"), - }, - &analysis.Token{ - Term: []byte("z"), - }, - &analysis.Token{ - Term: []byte("คุย"), - }, - &analysis.Token{ - Term: []byte("xyz"), - }, - &analysis.Token{ - Term: []byte("demo.com"), - }, - }, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Errorf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/th/stop_filter_th.go b/analysis/language/th/stop_filter_th.go deleted file mode 100644 index b60d1773..00000000 --- a/analysis/language/th/stop_filter_th.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package th - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/th/stop_words_th.go b/analysis/language/th/stop_words_th.go deleted file mode 100644 index e99b10d1..00000000 --- a/analysis/language/th/stop_words_th.go +++ /dev/null @@ -1,143 +0,0 @@ -package th - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_th" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var ThaiStopWords = []byte(`# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(ThaiStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/language/th/unicode_tokenizer_th.go b/analysis/language/th/unicode_tokenizer_th.go deleted file mode 100644 index 3e77382a..00000000 --- a/analysis/language/th/unicode_tokenizer_th.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build icu full - -package th - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" - "github.com/blevesearch/bleve/registry" -) - -const TokenizerName = "icu_th" - -func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { - return icu.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH"), nil -} - -func init() { - registry.RegisterTokenizer(TokenizerName, TokenizerConstructor) -} diff --git a/analysis/language/tr/analyzer_tr.go b/analysis/language/tr/analyzer_tr.go deleted file mode 100644 index 8cc06c84..00000000 --- a/analysis/language/tr/analyzer_tr.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package tr - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" - - "github.com/blevesearch/bleve/analysis/token_filters/apostrophe_filter" - "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "github.com/blevesearch/bleve/analysis/tokenizers/icu" -) - -const AnalyzerName = "tr" - -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - icuTokenizer, err := cache.TokenizerNamed(icu.Name) - if err != nil { - return nil, err - } - aposFilter, err := cache.TokenFilterNamed(apostrophe_filter.Name) - if err != nil { - return nil, err - } - toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name) - if err != nil { - return nil, err - } - stopTrFilter, err := cache.TokenFilterNamed(StopName) - if err != nil { - return nil, err - } - stemmerTrFilter, err := cache.TokenFilterNamed(StemmerName) - if err != nil { - return nil, err - } - rv := analysis.Analyzer{ - Tokenizer: icuTokenizer, - TokenFilters: []analysis.TokenFilter{ - aposFilter, - toLowerFilter, - stopTrFilter, - stemmerTrFilter, - }, - } - return &rv, nil -} - -func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) -} diff --git a/analysis/language/tr/analyzer_tr_test.go b/analysis/language/tr/analyzer_tr_test.go deleted file mode 100644 index 5b5b1131..00000000 --- a/analysis/language/tr/analyzer_tr_test.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full -// +build icu full - -package tr - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -func TestTurkishAnalyzer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - // stemming - { - input: []byte("ağacı"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("ağaç"), - }, - }, - }, - { - input: []byte("ağaç"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("ağaç"), - }, - }, - }, - // stop word - { - input: []byte("dolayı"), - output: analysis.TokenStream{}, - }, - // apostrophes - { - input: []byte("Kıbrıs'ta"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("kıbrıs"), - }, - }, - }, - { - input: []byte("Van Gölü'ne"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("van"), - }, - &analysis.Token{ - Term: []byte("göl"), - }, - }, - }, - } - - cache := registry.NewCache() - analyzer, err := cache.AnalyzerNamed(AnalyzerName) - if err != nil { - t.Fatal(err) - } - for _, test := range tests { - actual := analyzer.Analyze(test.input) - if len(actual) != len(test.output) { - t.Fatalf("expected length: %d, got %d", len(test.output), len(actual)) - } - for i, tok := range actual { - if !reflect.DeepEqual(tok.Term, test.output[i].Term) { - t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) - } - } - } -} diff --git a/analysis/language/tr/stemmer_tr.go b/analysis/language/tr/stemmer_tr.go deleted file mode 100644 index 464296ca..00000000 --- a/analysis/language/tr/stemmer_tr.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package tr - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - "github.com/blevesearch/bleve/registry" -) - -const StemmerName = "stemmer_tr" - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return stemmer_filter.NewStemmerFilter("tr") -} - -func init() { - registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) -} diff --git a/analysis/language/tr/stop_filter_tr.go b/analysis/language/tr/stop_filter_tr.go deleted file mode 100644 index f8ab8412..00000000 --- a/analysis/language/tr/stop_filter_tr.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package tr - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - "github.com/blevesearch/bleve/registry" -) - -func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - tokenMap, err := cache.TokenMapNamed(StopName) - if err != nil { - return nil, err - } - return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil -} - -func init() { - registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) -} diff --git a/analysis/language/tr/stop_words_tr.go b/analysis/language/tr/stop_words_tr.go deleted file mode 100644 index f96fb07e..00000000 --- a/analysis/language/tr/stop_words_tr.go +++ /dev/null @@ -1,236 +0,0 @@ -package tr - -import ( - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const StopName = "stop_tr" - -// this content was obtained from: -// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ -// ` was changed to ' to allow for literal string - -var TurkishStopWords = []byte(`# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten -`) - -func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { - rv := analysis.NewTokenMap() - err := rv.LoadBytes(TurkishStopWords) - return rv, err -} - -func init() { - registry.RegisterTokenMap(StopName, TokenMapConstructor) -} diff --git a/analysis/token_filters/cld2/README.md b/analysis/token_filters/cld2/README.md deleted file mode 100644 index 64d71710..00000000 --- a/analysis/token_filters/cld2/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# cld2 token filter - -A bleve token filter which passes the text of each token and passes it to the cld2 library. The library determines what it thinks the language most likely is. The ISO-639 language code replaces the token term. - -In normal usage, you use this with the "single" tokenizer, so there is only one input token. Further, you should precede it with the "to_lower" filter so that the input term is in all lower-case unicode characters. - -# Building - -1. Acquire the source to cld2 in this directory. - - $ svn checkout -r 167 http://cld2.googlecode.com/svn/trunk/ cld2-read-only - -2. Build cld2 - - As dynamic library - - $ cd cld2-read-only/internal/ - $ ./compile_libs.sh - $ cp *.so /usr/local/lib - $ cd ../.. - - Or static library - - $ ./compile_cld2.sh - $ cp *.a /usr/local/lib - -3. Run the unit tests - - $ go test -v - === RUN TestCld2Filter - --- PASS: TestCld2Filter (0.00 seconds) - PASS - ok github.com/couchbaselabs/bleve/analysis/token_filters/cld2 0.033s diff --git a/analysis/token_filters/cld2/cld2_filter.cc b/analysis/token_filters/cld2/cld2_filter.cc deleted file mode 100644 index cb116715..00000000 --- a/analysis/token_filters/cld2/cld2_filter.cc +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -#include -#include -#include -#include -#include "cld2_filter.h" -#include "cld2-read-only/public/compact_lang_det.h" - -const char* DetectLang(const char *buffer) { - - bool is_plain_text = true; - CLD2::CLDHints cldhints = {NULL, NULL, 0, CLD2::UNKNOWN_LANGUAGE}; - bool allow_extended_lang = true; - int flags = 0; - CLD2::Language language3[3]; - int percent3[3]; - double normalized_score3[3]; - CLD2::ResultChunkVector resultchunkvector; - int text_bytes; - bool is_reliable; - - CLD2::Language summary_lang = CLD2::UNKNOWN_LANGUAGE; - - summary_lang = CLD2::ExtDetectLanguageSummary(buffer, - strlen(buffer), - is_plain_text, - &cldhints, - flags, - language3, - percent3, - normalized_score3, - &resultchunkvector, - &text_bytes, - &is_reliable); - - return CLD2::LanguageCode(summary_lang); -} \ No newline at end of file diff --git a/analysis/token_filters/cld2/cld2_filter.go b/analysis/token_filters/cld2/cld2_filter.go deleted file mode 100644 index 4349ef8a..00000000 --- a/analysis/token_filters/cld2/cld2_filter.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build cld2 full - -package cld2 - -// #cgo LDFLAGS: -lcld2_full -// #include "cld2_filter.h" -// #include -import "C" - -import ( - "unsafe" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const Name = "detect_lang" - -type Cld2Filter struct { -} - -func NewCld2Filter() *Cld2Filter { - return &Cld2Filter{} -} - -func (f *Cld2Filter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0, len(input)) - - offset := 0 - for _, token := range input { - var err error - token.Term, err = f.detectLanguage(token.Term) - if err != nil { - token.Term = []byte("error") - } - token.Start = offset - token.End = token.Start + len(token.Term) - token.Type = analysis.AlphaNumeric - rv = append(rv, token) - offset = token.End + 1 - } - - return rv -} - -func (f *Cld2Filter) detectLanguage(input []byte) ([]byte, error) { - cstr := C.CString(string(input)) - res := C.DetectLang(cstr) - return C.GoBytes(unsafe.Pointer(res), C.int(C.strlen(res))), nil -} - -func Cld2FilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - return NewCld2Filter(), nil -} - -func init() { - registry.RegisterTokenFilter(Name, Cld2FilterConstructor) -} diff --git a/analysis/token_filters/cld2/cld2_filter_test.go b/analysis/token_filters/cld2/cld2_filter_test.go deleted file mode 100644 index 00951090..00000000 --- a/analysis/token_filters/cld2/cld2_filter_test.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build cld2 full - -package cld2 - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" -) - -func TestCld2Filter(t *testing.T) { - tests := []struct { - input analysis.TokenStream - output analysis.TokenStream - }{ - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("the quick brown fox"), - Start: 0, - End: 19, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("en"), - Start: 0, - End: 2, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - }, - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("こんにちは世界"), - Start: 0, - End: 21, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("ja"), - Start: 0, - End: 2, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - }, - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"), - Start: 0, - End: 72, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("th"), - Start: 0, - End: 2, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - }, - { - input: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("مرحبا، العالم!"), - Start: 0, - End: 26, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("ar"), - Start: 0, - End: 2, - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - }, - } - - filter := NewCld2Filter() - for _, test := range tests { - res := filter.Filter(test.input) - if !reflect.DeepEqual(res, test.output) { - t.Errorf("expected:") - for _, token := range test.output { - t.Errorf("%#v - %s", token, token.Term) - } - t.Errorf("got:") - for _, token := range res { - t.Errorf("%#v - %s", token, token.Term) - } - } - } - -} diff --git a/analysis/token_filters/cld2/compile_cld2.sh b/analysis/token_filters/cld2/compile_cld2.sh deleted file mode 100755 index 71840bbe..00000000 --- a/analysis/token_filters/cld2/compile_cld2.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -SRC="cldutil cldutil_shared compact_lang_det compact_lang_det_hint_code compact_lang_det_impl debug fixunicodevalue generated_entities generated_language generated_ulscript getonescriptspan lang_script offsetmap scoreonescriptspan tote utf8statetable cld_generated_cjk_uni_prop_80 cld2_generated_cjk_compatible cld_generated_cjk_delta_bi_32 generated_distinct_bi_0 cld2_generated_quad0122 cld2_generated_deltaocta0122 cld2_generated_distinctocta0122 cld_generated_score_quad_octa_0122"; -OBJ=""; -for f in ${SRC}; do - g++ -c -fPIC -O2 -m64 -o "cld2-read-only/internal/${f}.o" "cld2-read-only/internal/${f}.cc"; - OBJ="${OBJ} cld2-read-only/internal/${f}.o"; -done; - -ar rcs libcld2_full.a ${OBJ}; diff --git a/analysis/token_filters/stemmer_filter/README.md b/analysis/token_filters/stemmer_filter/README.md deleted file mode 100644 index 56b0e863..00000000 --- a/analysis/token_filters/stemmer_filter/README.md +++ /dev/null @@ -1,18 +0,0 @@ -## Languages supported - -"danish", -"dutch", -"english", -"finnish", -"french", -"german", -"hungarian", -"italian", -"norwegian", -"porter", -"portuguese", -"romanian", -"russian", -"spanish", -"swedish", -"turkish" \ No newline at end of file diff --git a/analysis/token_filters/stemmer_filter/stemmer_filter.go b/analysis/token_filters/stemmer_filter/stemmer_filter.go deleted file mode 100644 index 44a6e677..00000000 --- a/analysis/token_filters/stemmer_filter/stemmer_filter.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package stemmer_filter - -import ( - "fmt" - - "bitbucket.org/tebeka/snowball" - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const Name = "stem" - -type StemmerFilter struct { - lang string - stemmerPool chan *snowball.Stemmer -} - -func NewStemmerFilter(lang string) (*StemmerFilter, error) { - stemmerPool := make(chan *snowball.Stemmer, 4) - for i := 0; i < 4; i++ { - stemmer, err := snowball.New(lang) - if err != nil { - return nil, err - } - stemmerPool <- stemmer - } - return &StemmerFilter{ - lang: lang, - stemmerPool: stemmerPool, - }, nil -} - -func MustNewStemmerFilter(lang string) *StemmerFilter { - sf, err := NewStemmerFilter(lang) - if err != nil { - panic(err) - } - return sf -} - -func (s *StemmerFilter) List() []string { - return snowball.LangList() -} - -func (s *StemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - for _, token := range input { - // if it is not a protected keyword, stem it - if !token.KeyWord { - stemmer := <-s.stemmerPool - stemmed := stemmer.Stem(string(token.Term)) - s.stemmerPool <- stemmer - token.Term = []byte(stemmed) - } - } - return input -} - -func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { - langVal, ok := config["lang"].(string) - if !ok { - return nil, fmt.Errorf("must specify stemmer language") - } - lang := langVal - return NewStemmerFilter(lang) -} - -func init() { - registry.RegisterTokenFilter(Name, StemmerFilterConstructor) -} diff --git a/analysis/token_filters/stemmer_filter/stemmer_filter_test.go b/analysis/token_filters/stemmer_filter/stemmer_filter_test.go deleted file mode 100644 index 61a7c98f..00000000 --- a/analysis/token_filters/stemmer_filter/stemmer_filter_test.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package stemmer_filter - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" -) - -func TestStemmerFilter(t *testing.T) { - - inputTokenStream := analysis.TokenStream{ - &analysis.Token{ - Term: []byte("walking"), - }, - &analysis.Token{ - Term: []byte("talked"), - }, - &analysis.Token{ - Term: []byte("business"), - }, - &analysis.Token{ - Term: []byte("protected"), - KeyWord: true, - }, - } - - expectedTokenStream := analysis.TokenStream{ - &analysis.Token{ - Term: []byte("walk"), - }, - &analysis.Token{ - Term: []byte("talk"), - }, - &analysis.Token{ - Term: []byte("busi"), - }, - &analysis.Token{ - Term: []byte("protected"), - KeyWord: true, - }, - } - - filter, err := NewStemmerFilter("en") - if err != nil { - t.Fatal(err) - } - ouputTokenStream := filter.Filter(inputTokenStream) - if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) { - t.Errorf("expected %#v got %#v", expectedTokenStream[3], ouputTokenStream[3]) - } -} diff --git a/analysis/tokenizers/icu/boundary.go b/analysis/tokenizers/icu/boundary.go deleted file mode 100644 index 8e6c5dd4..00000000 --- a/analysis/tokenizers/icu/boundary.go +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build icu full - -package icu - -// #cgo LDFLAGS: -licuuc -licudata -// #include -// #include -// #include "unicode/utypes.h" -// #include "unicode/uchar.h" -// #include "unicode/ubrk.h" -// #include "unicode/ustring.h" -import "C" - -import ( - "unsafe" - - "github.com/blevesearch/bleve/analysis" - "github.com/blevesearch/bleve/registry" -) - -const Name = "icu" - -type UnicodeWordBoundaryTokenizer struct { - locale *C.char -} - -func NewUnicodeWordBoundaryTokenizer() *UnicodeWordBoundaryTokenizer { - return &UnicodeWordBoundaryTokenizer{} -} - -func NewUnicodeWordBoundaryCustomLocaleTokenizer(locale string) *UnicodeWordBoundaryTokenizer { - return &UnicodeWordBoundaryTokenizer{ - locale: C.CString(locale), - } -} - -func (t *UnicodeWordBoundaryTokenizer) Tokenize(input []byte) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - - if len(input) < 1 { - return rv - } - - // works - var myUnsafePointer = unsafe.Pointer(&(input[0])) - var myCCharPointer *C.char = (*C.char)(myUnsafePointer) - - var inlen C.int32_t = C.int32_t(len(input)) - var buflen C.int32_t = C.int32_t(2*len(input) + 1) // worse case each byte becomes 2 - var stringToExamine []C.UChar = make([]C.UChar, buflen) - var myUnsafePointerToExamine = unsafe.Pointer(&(stringToExamine[0])) - var myUCharPointer *C.UChar = (*C.UChar)(myUnsafePointerToExamine) - C.u_uastrncpy(myUCharPointer, myCCharPointer, inlen) - - var err C.UErrorCode = C.U_ZERO_ERROR - bi := C.ubrk_open(C.UBRK_WORD, t.locale, myUCharPointer, -1, &err) - - if err > C.U_ZERO_ERROR { - return rv - } - - defer C.ubrk_close(bi) - - position := 0 - var prev C.int32_t - p := C.ubrk_first(bi) - for p != C.UBRK_DONE { - - q := C.ubrk_getRuleStatus(bi) - - // convert boundaries back to utf8 positions - var nilCString *C.char - var indexA C.int32_t - - C.u_strToUTF8(nilCString, 0, &indexA, myUCharPointer, prev, &err) - if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR { - return rv - } else { - err = C.U_ZERO_ERROR - } - - var indexB C.int32_t - C.u_strToUTF8(nilCString, 0, &indexB, myUCharPointer, p, &err) - if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR { - return rv - } else { - err = C.U_ZERO_ERROR - } - - if q != 0 { - position += 1 - token := analysis.Token{ - Start: int(indexA), - End: int(indexB), - Term: input[indexA:indexB], - Position: position, - Type: analysis.AlphaNumeric, - } - if q == 100 { - token.Type = analysis.Numeric - } - if q == 400 { - token.Type = analysis.Ideographic - } - rv = append(rv, &token) - } - prev = p - p = C.ubrk_next(bi) - } - - return rv -} - -func UnicodeWordBoundaryTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { - locale := "" - localeVal, ok := config["locale"].(string) - if ok { - locale = localeVal - } - if locale == "" { - return NewUnicodeWordBoundaryTokenizer(), nil - } else { - return NewUnicodeWordBoundaryCustomLocaleTokenizer(locale), nil - } -} - -func init() { - registry.RegisterTokenizer(Name, UnicodeWordBoundaryTokenizerConstructor) -} diff --git a/analysis/tokenizers/icu/boundary_test.go b/analysis/tokenizers/icu/boundary_test.go deleted file mode 100644 index 65c0db52..00000000 --- a/analysis/tokenizers/icu/boundary_test.go +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build icu full - -package icu - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/analysis" -) - -func TestBoundary(t *testing.T) { - - tests := []struct { - input []byte - locale string - output analysis.TokenStream - }{ - { - []byte("Hello World"), - "en_US", - analysis.TokenStream{ - { - Start: 0, - End: 5, - Term: []byte("Hello"), - Position: 1, - Type: analysis.AlphaNumeric, - }, - { - Start: 6, - End: 11, - Term: []byte("World"), - Position: 2, - Type: analysis.AlphaNumeric, - }, - }, - }, - { - []byte("steven's"), - "en_US", - analysis.TokenStream{ - { - Start: 0, - End: 8, - Term: []byte("steven's"), - Position: 1, - Type: analysis.AlphaNumeric, - }, - }, - }, - { - []byte("こんにちは世界"), - "en_US", - analysis.TokenStream{ - { - Start: 0, - End: 15, - Term: []byte("こんにちは"), - Position: 1, - Type: analysis.Ideographic, - }, - { - Start: 15, - End: 21, - Term: []byte("世界"), - Position: 2, - Type: analysis.Ideographic, - }, - }, - }, - { - []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"), - "th_TH", - analysis.TokenStream{ - { - Start: 0, - End: 9, - Term: []byte("แยก"), - Position: 1, - Type: analysis.AlphaNumeric, - }, - { - Start: 9, - End: 15, - Term: []byte("คำ"), - Position: 2, - Type: analysis.AlphaNumeric, - }, - { - Start: 15, - End: 27, - Term: []byte("ภาษา"), - Position: 3, - Type: analysis.AlphaNumeric, - }, - { - Start: 27, - End: 36, - Term: []byte("ไทย"), - Position: 4, - Type: analysis.AlphaNumeric, - }, - { - Start: 36, - End: 42, - Term: []byte("ก็"), - Position: 5, - Type: analysis.AlphaNumeric, - }, - { - Start: 42, - End: 57, - Term: []byte("ทำได้"), - Position: 6, - Type: analysis.AlphaNumeric, - }, - { - Start: 57, - End: 63, - Term: []byte("นะ"), - Position: 7, - Type: analysis.AlphaNumeric, - }, - { - Start: 63, - End: 72, - Term: []byte("จ้ะ"), - Position: 8, - Type: analysis.AlphaNumeric, - }, - }, - }, - { - []byte("age 25"), - "en_US", - analysis.TokenStream{ - { - Start: 0, - End: 3, - Term: []byte("age"), - Position: 1, - Type: analysis.AlphaNumeric, - }, - { - Start: 4, - End: 6, - Term: []byte("25"), - Position: 2, - Type: analysis.Numeric, - }, - }, - }, - } - - for _, test := range tests { - tokenizer := NewUnicodeWordBoundaryCustomLocaleTokenizer(test.locale) - actual := tokenizer.Tokenize(test.input) - - if !reflect.DeepEqual(actual, test.output) { - t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input)) - } - } -} - -var sampleLargeInput = []byte(`There are three characteristics of liquids which are relevant to the discussion of a BLEVE: -If a liquid in a sealed container is boiled, the pressure inside the container increases. As the liquid changes to a gas it expands - this expansion in a vented container would cause the gas and liquid to take up more space. In a sealed container the gas and liquid are not able to take up more space and so the pressure rises. Pressurized vessels containing liquids can reach an equilibrium where the liquid stops boiling and the pressure stops rising. This occurs when no more heat is being added to the system (either because it has reached ambient temperature or has had a heat source removed). -The boiling temperature of a liquid is dependent on pressure - high pressures will yield high boiling temperatures, and low pressures will yield low boiling temperatures. A common simple experiment is to place a cup of water in a vacuum chamber, and then reduce the pressure in the chamber until the water boils. By reducing the pressure the water will boil even at room temperature. This works both ways - if the pressure is increased beyond normal atmospheric pressures, the boiling of hot water could be suppressed far beyond normal temperatures. The cooling system of a modern internal combustion engine is a real-world example. -When a liquid boils it turns into a gas. The resulting gas takes up far more space than the liquid did. -Typically, a BLEVE starts with a container of liquid which is held above its normal, atmospheric-pressure boiling temperature. Many substances normally stored as liquids, such as CO2, oxygen, and other similar industrial gases have boiling temperatures, at atmospheric pressure, far below room temperature. In the case of water, a BLEVE could occur if a pressurized chamber of water is heated far beyond the standard 100 °C (212 °F). That container, because the boiling water pressurizes it, is capable of holding liquid water at very high temperatures. -If the pressurized vessel, containing liquid at high temperature (which may be room temperature, depending on the substance) ruptures, the pressure which prevents the liquid from boiling is lost. If the rupture is catastrophic, where the vessel is immediately incapable of holding any pressure at all, then there suddenly exists a large mass of liquid which is at very high temperature and very low pressure. This causes the entire volume of liquid to instantaneously boil, which in turn causes an extremely rapid expansion. Depending on temperatures, pressures and the substance involved, that expansion may be so rapid that it can be classified as an explosion, fully capable of inflicting severe damage on its surroundings.`) - -func BenchmarkTokenizeEnglishText(b *testing.B) { - - tokenizer := NewUnicodeWordBoundaryCustomLocaleTokenizer("en_US") - b.ResetTimer() - - for i := 0; i < b.N; i++ { - tokenizer.Tokenize(sampleLargeInput) - } - -} diff --git a/config.go b/config.go index 0071d367..f9100bf2 100644 --- a/config.go +++ b/config.go @@ -16,103 +16,10 @@ import ( "time" "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/store/boltdb" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/registry" - - // token maps - _ "github.com/blevesearch/bleve/analysis/token_map" - - // fragment formatters - _ "github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi" - _ "github.com/blevesearch/bleve/search/highlight/fragment_formatters/html" - - // fragmenters - _ "github.com/blevesearch/bleve/search/highlight/fragmenters/simple" - - // highlighters - _ "github.com/blevesearch/bleve/search/highlight/highlighters/simple" - - // char filters - _ "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter" - _ "github.com/blevesearch/bleve/analysis/char_filters/regexp_char_filter" - _ "github.com/blevesearch/bleve/analysis/char_filters/zero_width_non_joiner" - - // analyzers - _ "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer" - _ "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" - _ "github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer" - _ "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer" - - // token filters - _ "github.com/blevesearch/bleve/analysis/token_filters/apostrophe_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/compound" - _ "github.com/blevesearch/bleve/analysis/token_filters/edge_ngram_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/elision_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/keyword_marker_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/length_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/ngram_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/shingle" - _ "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/truncate_token_filter" - _ "github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize" - - // tokenizers - _ "github.com/blevesearch/bleve/analysis/tokenizers/exception" - _ "github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer" - _ "github.com/blevesearch/bleve/analysis/tokenizers/single_token" - _ "github.com/blevesearch/bleve/analysis/tokenizers/unicode" - _ "github.com/blevesearch/bleve/analysis/tokenizers/whitespace_tokenizer" - - // date time parsers - _ "github.com/blevesearch/bleve/analysis/datetime_parsers/datetime_optional" - _ "github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go" - - // languages - _ "github.com/blevesearch/bleve/analysis/language/ar" - _ "github.com/blevesearch/bleve/analysis/language/bg" - _ "github.com/blevesearch/bleve/analysis/language/ca" - _ "github.com/blevesearch/bleve/analysis/language/cjk" - _ "github.com/blevesearch/bleve/analysis/language/ckb" - _ "github.com/blevesearch/bleve/analysis/language/cs" - _ "github.com/blevesearch/bleve/analysis/language/da" - _ "github.com/blevesearch/bleve/analysis/language/de" - _ "github.com/blevesearch/bleve/analysis/language/el" - _ "github.com/blevesearch/bleve/analysis/language/en" - _ "github.com/blevesearch/bleve/analysis/language/es" - _ "github.com/blevesearch/bleve/analysis/language/eu" - _ "github.com/blevesearch/bleve/analysis/language/fa" - _ "github.com/blevesearch/bleve/analysis/language/fi" - _ "github.com/blevesearch/bleve/analysis/language/fr" - _ "github.com/blevesearch/bleve/analysis/language/ga" - _ "github.com/blevesearch/bleve/analysis/language/gl" - _ "github.com/blevesearch/bleve/analysis/language/hi" - _ "github.com/blevesearch/bleve/analysis/language/hu" - _ "github.com/blevesearch/bleve/analysis/language/hy" - _ "github.com/blevesearch/bleve/analysis/language/id" - _ "github.com/blevesearch/bleve/analysis/language/in" - _ "github.com/blevesearch/bleve/analysis/language/it" - _ "github.com/blevesearch/bleve/analysis/language/nl" - _ "github.com/blevesearch/bleve/analysis/language/no" - _ "github.com/blevesearch/bleve/analysis/language/pt" - _ "github.com/blevesearch/bleve/analysis/language/ro" - _ "github.com/blevesearch/bleve/analysis/language/ru" - _ "github.com/blevesearch/bleve/analysis/language/sv" - _ "github.com/blevesearch/bleve/analysis/language/th" - _ "github.com/blevesearch/bleve/analysis/language/tr" - - // kv stores - _ "github.com/blevesearch/bleve/index/store/boltdb" - _ "github.com/blevesearch/bleve/index/store/goleveldb" - _ "github.com/blevesearch/bleve/index/store/gtreap" - _ "github.com/blevesearch/bleve/index/store/inmem" - - // index types - _ "github.com/blevesearch/bleve/index/upside_down" - - // byte array converters - _ "github.com/blevesearch/bleve/analysis/byte_array_converters/ignore" - _ "github.com/blevesearch/bleve/analysis/byte_array_converters/json" - _ "github.com/blevesearch/bleve/analysis/byte_array_converters/string" + "github.com/blevesearch/bleve/search/highlight/highlighters/html" ) var bleveExpVar = expvar.NewMap("bleve") @@ -146,44 +53,14 @@ func init() { // build the default configuration Config = newConfiguration() - _, err := Config.Cache.DefineFragmentFormatter("highlightSpanHTML", - map[string]interface{}{ - "type": "html", - "before": ``, - "after": ``, - }) - if err != nil { - panic(err) - } - - _, err = Config.Cache.DefineHighlighter("html", - map[string]interface{}{ - "type": "simple", - "fragmenter": "simple", - "formatter": "highlightSpanHTML", - }) - if err != nil { - panic(err) - } - - _, err = Config.Cache.DefineHighlighter("ansi", - map[string]interface{}{ - "type": "simple", - "fragmenter": "simple", - "formatter": "ansi", - }) - if err != nil { - panic(err) - } - // set the default highlighter - Config.DefaultHighlighter = "html" + Config.DefaultHighlighter = html.Name // default kv store - Config.DefaultKVStore = "boltdb" + Config.DefaultKVStore = boltdb.Name // default index - Config.DefaultIndexType = "upside_down" + Config.DefaultIndexType = upside_down.Name bootDuration := time.Since(bootStart) bleveExpVar.Add("bootDuration", int64(bootDuration)) diff --git a/config/README.md b/config/README.md new file mode 100644 index 00000000..5a8e4970 --- /dev/null +++ b/config/README.md @@ -0,0 +1,11 @@ +# Bleve Config + +**NOTE** you probably do not need this package. It is only intended for general purpose applications that want to include large parts of Bleve regardless of whether or not the code is directly using it. + +## General Purpose Applications + +A general purpose application, that must allow users to express the need for Bleve components at runtime can accomplish this by: + +``` +import _ "github.com/blevesearch/bleve/config" +``` \ No newline at end of file diff --git a/config/config.go b/config/config.go new file mode 100644 index 00000000..bdb3036e --- /dev/null +++ b/config/config.go @@ -0,0 +1,98 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package config + +import ( + // token maps + _ "github.com/blevesearch/bleve/analysis/token_map" + + // fragment formatters + _ "github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi" + _ "github.com/blevesearch/bleve/search/highlight/fragment_formatters/html" + + // fragmenters + _ "github.com/blevesearch/bleve/search/highlight/fragmenters/simple" + + // highlighters + _ "github.com/blevesearch/bleve/search/highlight/highlighters/ansi" + _ "github.com/blevesearch/bleve/search/highlight/highlighters/html" + _ "github.com/blevesearch/bleve/search/highlight/highlighters/simple" + + // char filters + _ "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter" + _ "github.com/blevesearch/bleve/analysis/char_filters/regexp_char_filter" + _ "github.com/blevesearch/bleve/analysis/char_filters/zero_width_non_joiner" + + // analyzers + _ "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer" + _ "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" + _ "github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer" + _ "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer" + + // token filters + _ "github.com/blevesearch/bleve/analysis/token_filters/apostrophe_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/compound" + _ "github.com/blevesearch/bleve/analysis/token_filters/edge_ngram_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/elision_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/keyword_marker_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/length_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/ngram_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/shingle" + _ "github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/truncate_token_filter" + _ "github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize" + + // tokenizers + _ "github.com/blevesearch/bleve/analysis/tokenizers/exception" + _ "github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer" + _ "github.com/blevesearch/bleve/analysis/tokenizers/single_token" + _ "github.com/blevesearch/bleve/analysis/tokenizers/unicode" + _ "github.com/blevesearch/bleve/analysis/tokenizers/whitespace_tokenizer" + + // date time parsers + _ "github.com/blevesearch/bleve/analysis/datetime_parsers/datetime_optional" + _ "github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go" + + // languages + _ "github.com/blevesearch/bleve/analysis/language/ar" + _ "github.com/blevesearch/bleve/analysis/language/bg" + _ "github.com/blevesearch/bleve/analysis/language/ca" + _ "github.com/blevesearch/bleve/analysis/language/cjk" + _ "github.com/blevesearch/bleve/analysis/language/ckb" + _ "github.com/blevesearch/bleve/analysis/language/cs" + _ "github.com/blevesearch/bleve/analysis/language/el" + _ "github.com/blevesearch/bleve/analysis/language/en" + _ "github.com/blevesearch/bleve/analysis/language/eu" + _ "github.com/blevesearch/bleve/analysis/language/fa" + _ "github.com/blevesearch/bleve/analysis/language/fr" + _ "github.com/blevesearch/bleve/analysis/language/ga" + _ "github.com/blevesearch/bleve/analysis/language/gl" + _ "github.com/blevesearch/bleve/analysis/language/hi" + _ "github.com/blevesearch/bleve/analysis/language/hy" + _ "github.com/blevesearch/bleve/analysis/language/id" + _ "github.com/blevesearch/bleve/analysis/language/in" + _ "github.com/blevesearch/bleve/analysis/language/it" + _ "github.com/blevesearch/bleve/analysis/language/pt" + + // kv stores + _ "github.com/blevesearch/bleve/index/store/boltdb" + _ "github.com/blevesearch/bleve/index/store/goleveldb" + _ "github.com/blevesearch/bleve/index/store/gtreap" + _ "github.com/blevesearch/bleve/index/store/inmem" + + // index types + _ "github.com/blevesearch/bleve/index/upside_down" + + // byte array converters + _ "github.com/blevesearch/bleve/analysis/byte_array_converters/ignore" + _ "github.com/blevesearch/bleve/analysis/byte_array_converters/json" + _ "github.com/blevesearch/bleve/analysis/byte_array_converters/string" +) diff --git a/analysis/token_filters/cld2/cld2_filter.h b/config/config_cld2.go similarity index 76% rename from analysis/token_filters/cld2/cld2_filter.h rename to config/config_cld2.go index 8f692986..01f156a0 100644 --- a/analysis/token_filters/cld2/cld2_filter.h +++ b/config/config_cld2.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -6,13 +6,11 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -#ifdef __cplusplus -extern "C" { -#endif -const char* DetectLang(const char *buffer); +// +build cld2 full -#ifdef __cplusplus -} /* extern "C" */ -#endif +package config +import ( + _ "github.com/blevesearch/blevex/detect_lang" +) diff --git a/config_cznicb.go b/config/config_cznicb.go similarity index 82% rename from config_cznicb.go rename to config/config_cznicb.go index 336ea79c..3eacfc66 100644 --- a/config_cznicb.go +++ b/config/config_cznicb.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -7,10 +7,10 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build go1.4 +// +build cznicb full -package bleve +package config import ( - _ "github.com/blevesearch/bleve/index/store/cznicb" + _ "github.com/blevesearch/blevex/cznicb" ) diff --git a/config_forestdb.go b/config/config_forestdb.go similarity index 81% rename from config_forestdb.go rename to config/config_forestdb.go index 206ad246..6a379b39 100644 --- a/config_forestdb.go +++ b/config/config_forestdb.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -7,10 +7,10 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build forestdb +// +build forestdb full -package bleve +package config import ( - _ "github.com/blevesearch/bleve/index/store/forestdb" + _ "github.com/blevesearch/blevex/forestdb" ) diff --git a/config_icu.go b/config/config_icu.go similarity index 83% rename from config_icu.go rename to config/config_icu.go index c43e2866..972761ce 100644 --- a/config_icu.go +++ b/config/config_icu.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -9,8 +9,8 @@ // +build icu full -package bleve +package config import ( - _ "github.com/blevesearch/bleve/analysis/tokenizers/icu" + _ "github.com/blevesearch/blevex/lang/th" ) diff --git a/config_kagome.go b/config/config_kagome.go similarity index 92% rename from config_kagome.go rename to config/config_kagome.go index 58634c3f..e943181c 100644 --- a/config_kagome.go +++ b/config/config_kagome.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -9,7 +9,7 @@ // +build kagome full -package bleve +package config import ( _ "github.com/blevesearch/bleve/analysis/language/ja" diff --git a/config_leveldb.go b/config/config_leveldb.go similarity index 74% rename from config_leveldb.go rename to config/config_leveldb.go index 0af9f59b..de2dac9e 100644 --- a/config_leveldb.go +++ b/config/config_leveldb.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -9,13 +9,8 @@ // +build leveldb full -package bleve +package config import ( - _ "github.com/blevesearch/bleve/index/store/leveldb" + _ "github.com/blevesearch/blevex/leveldb" ) - -func init() { - // install leveldb as the default kv store - Config.DefaultKVStore = "leveldb" -} diff --git a/config/config_libstemmer.go b/config/config_libstemmer.go new file mode 100644 index 00000000..e5947e6d --- /dev/null +++ b/config/config_libstemmer.go @@ -0,0 +1,31 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +// +build libstemmer full + +package config + +import ( + _ "github.com/blevesearch/blevex/lang/da" + _ "github.com/blevesearch/blevex/lang/de" + _ "github.com/blevesearch/blevex/lang/en" + _ "github.com/blevesearch/blevex/lang/es" + _ "github.com/blevesearch/blevex/lang/fi" + _ "github.com/blevesearch/blevex/lang/fr" + _ "github.com/blevesearch/blevex/lang/hu" + _ "github.com/blevesearch/blevex/lang/it" + _ "github.com/blevesearch/blevex/lang/nl" + _ "github.com/blevesearch/blevex/lang/no" + _ "github.com/blevesearch/blevex/lang/pt" + _ "github.com/blevesearch/blevex/lang/ro" + _ "github.com/blevesearch/blevex/lang/ru" + _ "github.com/blevesearch/blevex/lang/sv" + _ "github.com/blevesearch/blevex/lang/th" + _ "github.com/blevesearch/blevex/lang/tr" +) diff --git a/config_rocksdb.go b/config/config_rocksdb.go similarity index 81% rename from config_rocksdb.go rename to config/config_rocksdb.go index 0425c228..7f1fc3e6 100644 --- a/config_rocksdb.go +++ b/config/config_rocksdb.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -7,10 +7,10 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build rocksdb +// +build rocksdb full -package bleve +package config import ( - _ "github.com/blevesearch/bleve/index/store/gorocksdb" + _ "github.com/blevesearch/blevex/rocksdb" ) diff --git a/config_cld2.go b/config_cld2.go deleted file mode 100644 index 84961cde..00000000 --- a/config_cld2.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build cld2 full - -package bleve - -import ( - // cld2 token filter - _ "github.com/blevesearch/bleve/analysis/token_filters/cld2" - - // detect language analyzer - _ "github.com/blevesearch/bleve/analysis/analyzers/detect_lang_analyzer" -) diff --git a/config_stemmer.go b/config_stemmer.go deleted file mode 100644 index c99b8106..00000000 --- a/config_stemmer.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build libstemmer full - -package bleve - -import ( - _ "github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter" - - _ "github.com/blevesearch/bleve/analysis/language/porter" -) diff --git a/examples_test.go b/examples_test.go index 43d25fc0..30e4fdb8 100644 --- a/examples_test.go +++ b/examples_test.go @@ -14,6 +14,8 @@ import ( "os" "testing" "time" + + "github.com/blevesearch/bleve/search/highlight/highlighters/ansi" ) var mapping *IndexMapping @@ -312,7 +314,7 @@ func ExampleNewHighlight() { func ExampleNewHighlightWithStyle() { query := NewMatchQuery("nameless") searchRequest := NewSearchRequest(query) - searchRequest.Highlight = NewHighlightWithStyle("ansi") + searchRequest.Highlight = NewHighlightWithStyle(ansi.Name) searchResults, err := example_index.Search(searchRequest) if err != nil { panic(err) diff --git a/index/store/cznicb/batch.go b/index/store/cznicb/batch.go deleted file mode 100644 index 02757eca..00000000 --- a/index/store/cznicb/batch.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2015 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -// +build go1.4 - -package cznicb - -import () - -type op struct { - k []byte - v []byte -} - -type Batch struct { - s *Store - ops []op - merges map[string][][]byte -} - -func (b *Batch) Set(k, v []byte) { - b.ops = append(b.ops, op{k, v}) -} - -func (b *Batch) Delete(k []byte) { - b.ops = append(b.ops, op{k, nil}) -} - -func (b *Batch) Merge(key, val []byte) { - ops, ok := b.merges[string(key)] - if ok && len(ops) > 0 { - last := ops[len(ops)-1] - mergedVal, partialMergeOk := b.s.mo.PartialMerge(key, last, val) - if partialMergeOk { - // replace last entry with the result of the merge - ops[len(ops)-1] = mergedVal - } else { - // could not partial merge, append this to the end - ops = append(ops, val) - } - } else { - ops = [][]byte{val} - } - b.merges[string(key)] = ops -} - -func (b *Batch) Execute() (err error) { - b.s.m.Lock() - defer b.s.m.Unlock() - - t := b.s.t - for key, mergeOps := range b.merges { - k := []byte(key) - t.Put(k, func(oldV interface{}, exists bool) (newV interface{}, write bool) { - ob := []byte(nil) - if exists && oldV != nil { - ob = oldV.([]byte) - } - mergedVal, fullMergeOk := b.s.mo.FullMerge(k, ob, mergeOps) - if !fullMergeOk { - return nil, false - } - return mergedVal, true - }) - } - - for _, op := range b.ops { - if op.v != nil { - t.Set(op.k, op.v) - } else { - t.Delete(op.k) - } - } - - return nil -} - -func (b *Batch) Close() error { - return nil -} diff --git a/index/store/cznicb/cznicb.go b/index/store/cznicb/cznicb.go deleted file mode 100644 index e1887292..00000000 --- a/index/store/cznicb/cznicb.go +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2015 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -// +build go1.4 - -// Package cznicb provides an in-memory implementation of the KVStore -// interfaces using the cznic/b in-memory btree. Of note: this -// implementation does not have reader isolation. -package cznicb - -import ( - "bytes" - "fmt" - "sync" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - - "github.com/cznic/b" -) - -const Name = "cznicb" - -const MAX_CONCURRENT_WRITERS = 1 - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - s := &Store{ - t: b.TreeNew(itemCompare), - availableWriters: make(chan bool, MAX_CONCURRENT_WRITERS), - } - for i := 0; i < MAX_CONCURRENT_WRITERS; i++ { - s.availableWriters <- true - } - return s, nil -} - -func itemCompare(a, b interface{}) int { - return bytes.Compare(a.([]byte), b.([]byte)) -} - -type Store struct { - availableWriters chan bool - m sync.RWMutex - t *b.Tree - mo store.MergeOperator -} - -func (s *Store) Open() error { - return nil -} - -func (s *Store) SetMergeOperator(mo store.MergeOperator) { - s.mo = mo -} - -func (s *Store) Reader() (store.KVReader, error) { - return &Reader{s: s}, nil -} - -func (s *Store) Writer() (store.KVWriter, error) { - available, ok := <-s.availableWriters - if !ok || !available { - return nil, fmt.Errorf("no available writers") - } - return &Writer{s: s, r: &Reader{s: s}}, nil -} - -func (s *Store) Close() error { - return nil -} - -func (s *Store) get(k []byte) ([]byte, error) { - s.m.RLock() - defer s.m.RUnlock() - v, ok := s.t.Get(k) - if !ok || v == nil { - return nil, nil - } - return v.([]byte), nil -} - -func (s *Store) iterator(k []byte) store.KVIterator { - iter := &Iterator{s: s} - iter.Seek(k) - return iter -} - -func (s *Store) set(k, v []byte) (err error) { - s.m.Lock() - defer s.m.Unlock() - s.t.Set(k, v) - return nil -} - -func (s *Store) delete(k []byte) (err error) { - s.m.Lock() - defer s.m.Unlock() - s.t.Delete(k) - return nil -} diff --git a/index/store/cznicb/cznicb_test.go b/index/store/cznicb/cznicb_test.go deleted file mode 100644 index ca48e187..00000000 --- a/index/store/cznicb/cznicb_test.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -package cznicb - -import ( - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -func TestCznicBStore(t *testing.T) { - s, err := StoreConstructor(nil) - if err != nil { - t.Fatal(err) - } - - CommonTestKVStore(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - v, err := writer.Get([]byte("a")) - if err != nil { - t.Fatal(err) - } - if string(v) != "val-a" { - t.Errorf("expected val-a") - } - v, err = writer.Get([]byte("not-there")) - if err != nil { - t.Fatal(err) - } - if v != nil { - t.Errorf("expected nil v") - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} diff --git a/index/store/cznicb/iterator.go b/index/store/cznicb/iterator.go deleted file mode 100644 index eadcd7bd..00000000 --- a/index/store/cznicb/iterator.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2015 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -// +build go1.4 - -package cznicb - -import ( - "errors" - - "github.com/cznic/b" -) - -var iteratorDoneErr = errors.New("iteratorDoneErr") // A sentinel value. - -type Iterator struct { // Assuming that iterators are used single-threaded. - s *Store - e *b.Enumerator - - currK interface{} - currV interface{} - currErr error -} - -func (i *Iterator) SeekFirst() { - i.currK = nil - i.currV = nil - i.currErr = nil - - var err error - i.s.m.RLock() - i.e, err = i.s.t.SeekFirst() - i.s.m.RUnlock() // cannot defer, must unlock before Next - if err != nil { - i.currK = nil - i.currV = nil - i.currErr = iteratorDoneErr - } - - i.Next() -} - -func (i *Iterator) Seek(k []byte) { - i.currK = nil - i.currV = nil - i.currErr = nil - - i.s.m.RLock() - i.e, _ = i.s.t.Seek(k) - i.s.m.RUnlock() // cannot defer, must unlock before Next - - i.Next() -} - -func (i *Iterator) Next() { - if i.currErr != nil { - i.currK = nil - i.currV = nil - i.currErr = iteratorDoneErr - return - } - - i.s.m.RLock() - defer i.s.m.RUnlock() - i.currK, i.currV, i.currErr = i.e.Next() -} - -func (i *Iterator) Current() ([]byte, []byte, bool) { - if i.currErr == iteratorDoneErr || - i.currK == nil || - i.currV == nil { - return nil, nil, false - } - - return i.currK.([]byte), i.currV.([]byte), true -} - -func (i *Iterator) Key() []byte { - k, _, ok := i.Current() - if !ok { - return nil - } - return k -} - -func (i *Iterator) Value() []byte { - _, v, ok := i.Current() - if !ok { - return nil - } - return v -} - -func (i *Iterator) Valid() bool { - _, _, ok := i.Current() - return ok -} - -func (i *Iterator) Close() error { - if i.e != nil { - i.e.Close() - } - i.e = nil - return nil -} diff --git a/index/store/cznicb/reader.go b/index/store/cznicb/reader.go deleted file mode 100644 index 4956f7dd..00000000 --- a/index/store/cznicb/reader.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2015 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -// +build go1.4 - -package cznicb - -import ( - "github.com/blevesearch/bleve/index/store" -) - -type Reader struct { - s *Store -} - -func newReader(s *Store) (*Reader, error) { - return &Reader{ - s: s, - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - -func (r *Reader) Get(key []byte) ([]byte, error) { - return r.s.get(key) -} - -func (r *Reader) Iterator(key []byte) store.KVIterator { - return r.s.iterator(key) -} - -func (r *Reader) Close() error { - return nil -} diff --git a/index/store/cznicb/writer.go b/index/store/cznicb/writer.go deleted file mode 100644 index 279d17a7..00000000 --- a/index/store/cznicb/writer.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build go1.4 - -package cznicb - -import ( - "github.com/blevesearch/bleve/index/store" -) - -type Writer struct { - s *Store - r *Reader -} - -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - -func (w *Writer) Set(key, val []byte) error { - return w.s.set(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.s.delete(key) -} - -func (w *Writer) NewBatch() store.KVBatch { - return &Batch{ - s: w.s, - ops: make([]op, 0, 1000), - merges: make(map[string][][]byte), - } -} - -func (w *Writer) Close() error { - w.s.availableWriters <- true - w.s = nil - return nil -} - -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.r.s.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.r.s.iterator(key) -} diff --git a/index/store/forestdb/batch.go b/index/store/forestdb/batch.go deleted file mode 100644 index a8fad295..00000000 --- a/index/store/forestdb/batch.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build forestdb - -package forestdb - -import ( - "fmt" -) - -type op struct { - k []byte - v []byte -} - -type Batch struct { - s *Store - ops []op - merges map[string][][]byte -} - -func (b *Batch) Set(k, v []byte) { - b.ops = append(b.ops, op{k, v}) -} - -func (b *Batch) Delete(k []byte) { - b.ops = append(b.ops, op{k, nil}) -} - -func (b *Batch) Merge(key, val []byte) { - ops, ok := b.merges[string(key)] - if ok && len(ops) > 0 { - last := ops[len(ops)-1] - mergedVal, partialMergeOk := b.s.mo.PartialMerge(key, last, val) - if partialMergeOk { - // replace last entry with the result of the merge - ops[len(ops)-1] = mergedVal - } else { - // could not partial merge, append this to the end - ops = append(ops, val) - } - } else { - ops = [][]byte{val} - } - b.merges[string(key)] = ops -} - -func (b *Batch) Execute() (err error) { - - for k, mergeOps := range b.merges { - kb := []byte(k) - existingVal, err := b.s.get(kb) - if err != nil { - return err - } - mergedVal, fullMergeOk := b.s.mo.FullMerge(kb, existingVal, mergeOps) - if !fullMergeOk { - return fmt.Errorf("merge operator returned failure") - } - err = b.s.setlocked(kb, mergedVal) - if err != nil { - return err - } - } - - for _, op := range b.ops { - if op.v != nil { - b.s.setlocked(op.k, op.v) - } else { - b.s.deletelocked(op.k) - } - } - - return b.s.commit() -} - -func (b *Batch) Close() error { - return nil -} diff --git a/index/store/forestdb/iterator.go b/index/store/forestdb/iterator.go deleted file mode 100644 index 727bc488..00000000 --- a/index/store/forestdb/iterator.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build forestdb - -package forestdb - -import ( - "github.com/couchbase/goforestdb" -) - -type Iterator struct { - store *Store - snapshot *forestdb.KVStore - iterator *forestdb.Iterator - curr *forestdb.Doc - valid bool -} - -func newIterator(store *Store) *Iterator { - itr, err := store.dbkv.IteratorInit([]byte{}, nil, forestdb.ITR_NONE) - rv := Iterator{ - store: store, - iterator: itr, - valid: err == nil, - } - return &rv -} - -func newIteratorWithSnapshot(store *Store, snapshot *forestdb.KVStore) *Iterator { - itr, err := snapshot.IteratorInit([]byte{}, nil, forestdb.ITR_NONE) - rv := Iterator{ - store: store, - iterator: itr, - valid: err == nil, - } - return &rv -} - -func newIteratorWithSnapshotAndRange(store *Store, snapshot *forestdb.KVStore, start, end []byte) *Iterator { - itr, err := snapshot.IteratorInit(start, end, forestdb.ITR_NONE) - rv := Iterator{ - store: store, - iterator: itr, - valid: err == nil, - } - return &rv -} - -func (i *Iterator) SeekFirst() { - err := i.iterator.SeekMin() - if err != nil { - i.valid = false - return - } - if i.curr != nil { - i.curr.Close() - } - i.curr, err = i.iterator.Get() - if err != nil { - i.valid = false - } -} - -func (i *Iterator) Seek(key []byte) { - err := i.iterator.Seek(key, forestdb.FDB_ITR_SEEK_HIGHER) - if err != nil { - i.valid = false - return - } - if i.curr != nil { - i.curr.Close() - } - i.curr, err = i.iterator.Get() - if err != nil { - i.valid = false - return - } -} - -func (i *Iterator) Next() { - err := i.iterator.Next() - if err != nil { - i.valid = false - return - } - if i.curr != nil { - i.curr.Close() - } - i.curr, err = i.iterator.Get() - if err != nil { - i.valid = false - } -} - -func (i *Iterator) Current() ([]byte, []byte, bool) { - if i.Valid() { - return i.Key(), i.Value(), true - } - return nil, nil, false -} - -func (i *Iterator) Key() []byte { - return i.curr.Key() -} - -func (i *Iterator) Value() []byte { - return i.curr.Body() -} - -func (i *Iterator) Valid() bool { - return i.valid -} - -func (i *Iterator) Close() error { - i.valid = false - if i.curr != nil { - i.curr.Close() - } - return i.iterator.Close() -} diff --git a/index/store/forestdb/reader.go b/index/store/forestdb/reader.go deleted file mode 100644 index 19f184de..00000000 --- a/index/store/forestdb/reader.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build forestdb - -package forestdb - -import ( - "fmt" - - "github.com/blevesearch/bleve/index/store" - "github.com/couchbase/goforestdb" -) - -type Reader struct { - store *Store - snapshot *forestdb.KVStore -} - -func (r *Reader) BytesSafeAfterClose() bool { - return true -} - -func newReader(store *Store) (*Reader, error) { - snapshot, err := store.newSnapshot() - if err != nil { - return nil, fmt.Errorf("error opening snapshot: %v", err) - } - return &Reader{ - store: store, - snapshot: snapshot, - }, nil -} - -func (r *Reader) Get(key []byte) ([]byte, error) { - res, err := r.snapshot.GetKV(key) - if err != nil && err != forestdb.RESULT_KEY_NOT_FOUND { - return nil, err - } - return res, nil -} - -func (r *Reader) Iterator(key []byte) store.KVIterator { - rv := newIteratorWithSnapshot(r.store, r.snapshot) - rv.Seek(key) - return rv -} - -func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { - rv := newIteratorWithSnapshotAndRange(r.store, r.snapshot, start, end) - rv.Seek(start) - return rv -} - -func (r *Reader) Close() error { - return r.snapshot.Close() -} diff --git a/index/store/forestdb/store.go b/index/store/forestdb/store.go deleted file mode 100644 index b6ffe6ba..00000000 --- a/index/store/forestdb/store.go +++ /dev/null @@ -1,292 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build forestdb - -package forestdb - -import ( - "bytes" - "encoding/binary" - "fmt" - "sync" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - "github.com/couchbase/goforestdb" -) - -const Name = "forestdb" - -type Store struct { - path string - config *forestdb.Config - kvconfig *forestdb.KVStoreConfig - dbfile *forestdb.File - dbkv *forestdb.KVStore - writer sync.Mutex - mo store.MergeOperator -} - -func New(path string, createIfMissing bool, - config map[string]interface{}) (*Store, error) { - if config == nil { - config = map[string]interface{}{} - } - - forestDBDefaultConfig := forestdb.DefaultConfig() - forestDBDefaultConfig.SetCompactionMode(forestdb.COMPACT_AUTO) - forestDBDefaultConfig.SetMultiKVInstances(false) - forestDBConfig, err := applyConfig(forestDBDefaultConfig, config) - if err != nil { - return nil, err - } - - rv := Store{ - path: path, - config: forestDBConfig, - kvconfig: forestdb.DefaultKVStoreConfig(), - } - - if createIfMissing { - rv.kvconfig.SetCreateIfMissing(true) - } - - return &rv, nil -} - -func (s *Store) Open() error { - var err error - s.dbfile, err = forestdb.Open(s.path, s.config) - if err != nil { - return err - } - - s.dbkv, err = s.dbfile.OpenKVStoreDefault(s.kvconfig) - if err != nil { - return err - } - - return nil -} - -func (s *Store) SetMergeOperator(mo store.MergeOperator) { - s.mo = mo -} - -func (s *Store) get(key []byte) ([]byte, error) { - res, err := s.dbkv.GetKV(key) - if err != nil && err != forestdb.RESULT_KEY_NOT_FOUND { - return nil, err - } - return res, nil -} - -func (s *Store) set(key, val []byte) error { - s.writer.Lock() - defer s.writer.Unlock() - return s.setlocked(key, val) -} - -func (s *Store) setlocked(key, val []byte) error { - return s.dbkv.SetKV(key, val) -} - -func (s *Store) delete(key []byte) error { - s.writer.Lock() - defer s.writer.Unlock() - return s.deletelocked(key) -} - -func (s *Store) deletelocked(key []byte) error { - return s.dbkv.DeleteKV(key) -} - -func (s *Store) commit() error { - return s.dbfile.Commit(forestdb.COMMIT_NORMAL) -} - -func (s *Store) Close() error { - err := s.dbkv.Close() - if err != nil { - return err - } - return s.dbfile.Close() - -} - -func (ldbs *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(ldbs) - rv.Seek(key) - return rv -} - -func (s *Store) Reader() (store.KVReader, error) { - return newReader(s) -} - -func (ldbs *Store) Writer() (store.KVWriter, error) { - return newWriter(ldbs) -} - -func (s *Store) getSeqNum() (forestdb.SeqNum, error) { - dbinfo, err := s.dbkv.Info() - if err != nil { - return 0, err - } - return dbinfo.LastSeqNum(), nil -} - -func (s *Store) newSnapshot() (*forestdb.KVStore, error) { - seqNum, err := s.getSeqNum() - if err != nil { - return nil, fmt.Errorf("error getting snapshot seqnum: %v", err) - } - snapshot, err := s.dbkv.SnapshotOpen(seqNum) - if err == forestdb.RESULT_NO_DB_INSTANCE { - checkAgainSeqNum, err := s.getSeqNum() - if err != nil { - return nil, fmt.Errorf("error getting snapshot seqnum again: %v", err) - } - return nil, fmt.Errorf("cannot open snapshot %v, checked again its %v, error: %v", seqNum, checkAgainSeqNum, err) - } - return snapshot, err -} - -func (s *Store) GetRollbackID() ([]byte, error) { - seqNum, err := s.getSeqNum() - if err != nil { - return nil, err - } - buf := new(bytes.Buffer) - err = binary.Write(buf, binary.LittleEndian, seqNum) - if err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -func (s *Store) RollbackTo(rollbackId []byte) error { - s.writer.Lock() - defer s.writer.Unlock() - buf := bytes.NewReader(rollbackId) - var seqNum forestdb.SeqNum - err := binary.Read(buf, binary.LittleEndian, &seqNum) - if err != nil { - return err - } - err = s.dbkv.Rollback(seqNum) - if err != nil { - return err - } - return nil -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - path, ok := config["path"].(string) - if !ok { - return nil, fmt.Errorf("must specify path") - } - createIfMissing := false - cim, ok := config["create_if_missing"].(bool) - if ok { - createIfMissing = cim - } - return New(path, createIfMissing, config) -} - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func applyConfig(c *forestdb.Config, config map[string]interface{}) ( - *forestdb.Config, error) { - - if v, exists := config["blockSize"].(float64); exists { - c.SetBlockSize(uint32(v)) - } - if v, exists := config["bufferCacheSize"].(float64); exists { - c.SetBufferCacheSize(uint64(v)) - } - if v, exists := config["chunkSize"].(float64); exists { - c.SetChunkSize(uint16(v)) - } - if v, exists := config["cleanupCacheOnClose"].(bool); exists { - c.SetCleanupCacheOnClose(v) - } - if v, exists := config["compactionBufferSizeMax"].(float64); exists { - c.SetCompactionBufferSizeMax(uint32(v)) - } - if v, exists := config["compactionMinimumFilesize"].(float64); exists { - c.SetCompactionMinimumFilesize(uint64(v)) - } - if v, exists := config["compactionMode"].(string); exists { - switch v { - case "manual": - c.SetCompactionMode(forestdb.COMPACT_MANUAL) - case "auto": - c.SetCompactionMode(forestdb.COMPACT_AUTO) - default: - return nil, fmt.Errorf("Unknown compaction mode: %s", v) - } - - } - if v, exists := config["compactionThreshold"].(float64); exists { - c.SetCompactionThreshold(uint8(v)) - } - if v, exists := config["compactorSleepDuration"].(float64); exists { - c.SetCompactorSleepDuration(uint64(v)) - } - if v, exists := config["compressDocumentBody"].(bool); exists { - c.SetCompressDocumentBody(v) - } - if v, exists := config["durabilityOpt"].(string); exists { - switch v { - case "none": - c.SetDurabilityOpt(forestdb.DRB_NONE) - case "odirect": - c.SetDurabilityOpt(forestdb.DRB_ODIRECT) - case "async": - c.SetDurabilityOpt(forestdb.DRB_ASYNC) - case "async_odirect": - c.SetDurabilityOpt(forestdb.DRB_ODIRECT_ASYNC) - default: - return nil, fmt.Errorf("Unknown durability option: %s", v) - } - - } - if v, exists := config["openFlags"].(string); exists { - switch v { - case "create": - c.SetOpenFlags(forestdb.OPEN_FLAG_CREATE) - case "readonly": - c.SetOpenFlags(forestdb.OPEN_FLAG_RDONLY) - default: - return nil, fmt.Errorf("Unknown open flag: %s", v) - } - } - if v, exists := config["purgingInterval"].(float64); exists { - c.SetPurgingInterval(uint32(v)) - } - if v, exists := config["seqTreeOpt"].(bool); exists { - if !v { - c.SetSeqTreeOpt(forestdb.SEQTREE_NOT_USE) - } - } - if v, exists := config["walFlushBeforeCommit"].(bool); exists { - c.SetWalFlushBeforeCommit(v) - } - if v, exists := config["walThreshold"].(float64); exists { - c.SetWalThreshold(uint64(v)) - } - if v, exists := config["maxWriterLockProb"].(float64); exists { - c.SetMaxWriterLockProb(uint8(v)) - } - return c, nil -} diff --git a/index/store/forestdb/store_test.go b/index/store/forestdb/store_test.go deleted file mode 100644 index dd90fbd9..00000000 --- a/index/store/forestdb/store_test.go +++ /dev/null @@ -1,636 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build forestdb - -package forestdb - -import ( - "os" - "reflect" - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -func TestForestDBStore(t *testing.T) { - defer func() { - err := os.RemoveAll("testdir") - if err != nil { - t.Fatal(err) - } - }() - - err := os.MkdirAll("testdir", 0700) - if err != nil { - t.Fatal(err) - } - s, err := New("testdir/test", true, nil) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) -} - -func TestReaderIsolation(t *testing.T) { - defer func() { - err := os.RemoveAll("testdir") - if err != nil { - t.Fatal(err) - } - }() - - err := os.MkdirAll("testdir", 0700) - if err != nil { - t.Fatal(err) - } - s, err := New("testdir/test", true, nil) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestReaderIsolation(t, s) -} - -// TestRollbackSameHandle tries to rollback a handle -// and ensure that subsequent reads from it also -// reflect the rollback -func TestRollbackSameHandle(t *testing.T) { - defer func() { - err := os.RemoveAll("testdir") - if err != nil { - t.Fatal(err) - } - }() - - err := os.MkdirAll("testdir", 0700) - if err != nil { - t.Fatal(err) - } - s, err := New("testdir/test", true, nil) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - writer, err := s.Writer() - if err != nil { - t.Fatal(err) - } - - // create 2 docs, a and b - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Error(err) - } - - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Error(err) - } - - // get the rollback id - rollbackId, err := s.GetRollbackID() - if err != nil { - t.Error(err) - } - - // create a 3rd doc c - err = writer.Set([]byte("c"), []byte("val-c")) - if err != nil { - t.Error(err) - } - - err = writer.Close() - if err != nil { - t.Error(err) - } - - // make sure c is there - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - val, err := reader.Get([]byte("c")) - if err != nil { - t.Error(err) - } - if string(val) != "val-c" { - t.Errorf("expected value 'val-c' got '%s'", val) - } - err = reader.Close() - if err != nil { - t.Fatal(err) - } - - // now rollback - err = s.RollbackTo(rollbackId) - if err != nil { - t.Fatal(err) - } - - // now make sure c is not there - reader, err = s.Reader() - if err != nil { - t.Error(err) - } - val, err = reader.Get([]byte("c")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected missing, got '%s'", val) - } - err = reader.Close() - if err != nil { - t.Fatal(err) - } -} - -// TestRollbackNewHandle tries to rollback the -// database, then opens a new handle, and ensures -// that the rollback is reflected there as well -func TestRollbackNewHandle(t *testing.T) { - defer func() { - err := os.RemoveAll("testdir") - if err != nil { - t.Fatal(err) - } - }() - - err := os.MkdirAll("testdir", 0700) - if err != nil { - t.Fatal(err) - } - s, err := New("testdir/test", true, nil) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - writer, err := s.Writer() - if err != nil { - t.Fatal(err) - } - - // create 2 docs, a and b - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Error(err) - } - - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Error(err) - } - - // get the rollback id - rollbackId, err := s.GetRollbackID() - if err != nil { - t.Error(err) - } - - // create a 3rd doc c - err = writer.Set([]byte("c"), []byte("val-c")) - if err != nil { - t.Error(err) - } - - err = writer.Close() - if err != nil { - t.Error(err) - } - - // make sure c is there - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - val, err := reader.Get([]byte("c")) - if err != nil { - t.Error(err) - } - if string(val) != "val-c" { - t.Errorf("expected value 'val-c' got '%s'", val) - } - err = reader.Close() - if err != nil { - t.Fatal(err) - } - - // now rollback - err = s.RollbackTo(rollbackId) - if err != nil { - t.Fatal(err) - } - - // now lets open another handle - s2, err := New("testdir/test", true, nil) - if err != nil { - t.Fatal(err) - } - err = s2.Open() - if err != nil { - t.Fatal(err) - } - defer s2.Close() - - // now make sure c is not there - reader2, err := s2.Reader() - if err != nil { - t.Error(err) - } - val, err = reader2.Get([]byte("c")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected missing, got '%s'", val) - } - err = reader2.Close() - if err != nil { - t.Fatal(err) - } -} - -// TestRollbackOtherHandle tries to create 2 handles -// at the beginning, then rollback one of them -// and ensure it affects the other -func TestRollbackOtherHandle(t *testing.T) { - defer func() { - err := os.RemoveAll("testdir") - if err != nil { - t.Fatal(err) - } - }() - - err := os.MkdirAll("testdir", 0700) - if err != nil { - t.Fatal(err) - } - s, err := New("testdir/test", true, nil) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - // open another handle at the same time - s2, err := New("testdir/test", true, nil) - if err != nil { - t.Fatal(err) - } - err = s2.Open() - if err != nil { - t.Fatal(err) - } - defer s2.Close() - - writer, err := s.Writer() - if err != nil { - t.Fatal(err) - } - - // create 2 docs, a and b - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Error(err) - } - - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Error(err) - } - - // get the rollback id - rollbackId, err := s.GetRollbackID() - if err != nil { - t.Error(err) - } - - // create a 3rd doc c - err = writer.Set([]byte("c"), []byte("val-c")) - if err != nil { - t.Error(err) - } - - err = writer.Close() - if err != nil { - t.Error(err) - } - - // make sure c is there - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - val, err := reader.Get([]byte("c")) - if err != nil { - t.Error(err) - } - if string(val) != "val-c" { - t.Errorf("expected value 'val-c' got '%s'", val) - } - err = reader.Close() - if err != nil { - t.Fatal(err) - } - - // now rollback - err = s.RollbackTo(rollbackId) - if err != nil { - t.Fatal(err) - } - - // now make sure c is not on the other handle - reader2, err := s2.Reader() - if err != nil { - t.Error(err) - } - val, err = reader2.Get([]byte("c")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected missing, got '%s'", val) - } - err = reader2.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - -} diff --git a/index/store/forestdb/writer.go b/index/store/forestdb/writer.go deleted file mode 100644 index 01909738..00000000 --- a/index/store/forestdb/writer.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build forestdb - -package forestdb - -import ( - "github.com/blevesearch/bleve/index/store" -) - -type Writer struct { - store *Store -} - -func (w *Writer) BytesSafeAfterClose() bool { - return true -} - -func newWriter(store *Store) (*Writer, error) { - store.writer.Lock() - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) Set(key, val []byte) error { - err := w.store.setlocked(key, val) - if err != nil { - return err - } - return w.store.commit() -} - -func (w *Writer) Delete(key []byte) error { - err := w.store.deletelocked(key) - if err != nil { - return err - } - return w.store.commit() -} - -func (w *Writer) NewBatch() store.KVBatch { - return &Batch{ - s: w.store, - ops: make([]op, 0, 1000), - merges: make(map[string][][]byte), - } -} - -func (w *Writer) Close() error { - w.store.writer.Unlock() - return nil -} - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.store.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} diff --git a/index/store/gorocksdb/batch.go b/index/store/gorocksdb/batch.go deleted file mode 100644 index 3d80ece8..00000000 --- a/index/store/gorocksdb/batch.go +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build rocksdb - -package rocksdb - -import ( - "github.com/tecbot/gorocksdb" -) - -type Batch struct { - w *Writer - batch *gorocksdb.WriteBatch -} - -func (b *Batch) Set(key, val []byte) { - b.batch.Put(key, val) -} - -func (b *Batch) Delete(key []byte) { - b.batch.Delete(key) -} - -func (b *Batch) Merge(key, val []byte) { - b.batch.Merge(key, val) -} - -func (b *Batch) Execute() error { - wopts := defaultWriteOptions() - err := b.w.store.db.Write(wopts, b.batch) - return err -} - -func (b *Batch) Close() error { - return nil -} diff --git a/index/store/gorocksdb/iterator.go b/index/store/gorocksdb/iterator.go deleted file mode 100644 index 9acdb76d..00000000 --- a/index/store/gorocksdb/iterator.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build rocksdb - -package rocksdb - -import ( - "github.com/tecbot/gorocksdb" -) - -type Iterator struct { - store *Store - iterator *gorocksdb.Iterator -} - -func newIterator(store *Store) *Iterator { - ropts := defaultReadOptions() - rv := Iterator{ - store: store, - iterator: store.db.NewIterator(ropts), - } - return &rv -} - -func newIteratorWithSnapshot(store *Store, snapshot *gorocksdb.Snapshot) *Iterator { - options := defaultReadOptions() - options.SetSnapshot(snapshot) - rv := Iterator{ - store: store, - iterator: store.db.NewIterator(options), - } - return &rv -} - -func (ldi *Iterator) SeekFirst() { - ldi.iterator.SeekToFirst() -} - -func (ldi *Iterator) Seek(key []byte) { - ldi.iterator.Seek(key) -} - -func (ldi *Iterator) Next() { - ldi.iterator.Next() -} - -func (ldi *Iterator) Current() ([]byte, []byte, bool) { - if ldi.Valid() { - return ldi.Key(), ldi.Value(), true - } - return nil, nil, false -} - -func (ldi *Iterator) Key() []byte { - return ldi.iterator.Key().Data() -} - -func (ldi *Iterator) Value() []byte { - return ldi.iterator.Value().Data() -} - -func (ldi *Iterator) Valid() bool { - return ldi.iterator.Valid() -} - -func (ldi *Iterator) Close() error { - ldi.iterator.Close() - return nil -} diff --git a/index/store/gorocksdb/reader.go b/index/store/gorocksdb/reader.go deleted file mode 100644 index 72a2e66a..00000000 --- a/index/store/gorocksdb/reader.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build rocksdb - -package rocksdb - -import ( - "github.com/blevesearch/bleve/index/store" - "github.com/tecbot/gorocksdb" -) - -type Reader struct { - store *Store - snapshot *gorocksdb.Snapshot -} - -func newReader(store *Store) (*Reader, error) { - return &Reader{ - store: store, - snapshot: store.db.NewSnapshot(), - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - -func (r *Reader) Get(key []byte) ([]byte, error) { - return r.store.getWithSnapshot(key, r.snapshot) -} - -func (r *Reader) Iterator(key []byte) store.KVIterator { - rv := newIteratorWithSnapshot(r.store, r.snapshot) - rv.Seek(key) - return rv -} - -func (r *Reader) Close() error { - r.snapshot.Release() - return nil -} diff --git a/index/store/gorocksdb/store.go b/index/store/gorocksdb/store.go deleted file mode 100644 index 2916c56e..00000000 --- a/index/store/gorocksdb/store.go +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build rocksdb - -package rocksdb - -import ( - "fmt" - "sync" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - "github.com/tecbot/gorocksdb" -) - -const Name = "rocksdb" - -type Store struct { - path string - opts *gorocksdb.Options - db *gorocksdb.DB - writer sync.Mutex -} - -func New(path string, config map[string]interface{}) (*Store, error) { - rv := Store{ - path: path, - opts: gorocksdb.NewDefaultOptions(), - } - - _, err := applyConfig(rv.opts, config) - if err != nil { - return nil, err - } - - return &rv, nil -} - -func (ldbs *Store) Open() error { - var err error - ldbs.db, err = gorocksdb.OpenDb(ldbs.opts, ldbs.path) - if err != nil { - return err - } - return nil -} - -func (ldbs *Store) SetMergeOperator(mo store.MergeOperator) { - ldbs.opts.SetMergeOperator(mo) -} - -func (ldbs *Store) get(key []byte) ([]byte, error) { - options := defaultReadOptions() - b, err := ldbs.db.Get(options, key) - return b.Data(), err -} - -func (ldbs *Store) getWithSnapshot(key []byte, snapshot *gorocksdb.Snapshot) ([]byte, error) { - options := defaultReadOptions() - options.SetSnapshot(snapshot) - b, err := ldbs.db.Get(options, key) - return b.Data(), err -} - -func (ldbs *Store) set(key, val []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.setlocked(key, val) -} - -func (ldbs *Store) setlocked(key, val []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Put(options, key, val) - return err -} - -func (ldbs *Store) delete(key []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.deletelocked(key) -} - -func (ldbs *Store) deletelocked(key []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Delete(options, key) - return err -} - -func (ldbs *Store) Close() error { - ldbs.db.Close() - return nil -} - -func (ldbs *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(ldbs) - rv.Seek(key) - return rv -} - -func (ldbs *Store) Reader() (store.KVReader, error) { - return newReader(ldbs) -} - -func (ldbs *Store) Writer() (store.KVWriter, error) { - return newWriter(ldbs) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - path, ok := config["path"].(string) - if !ok { - return nil, fmt.Errorf("must specify path") - } - return New(path, config) -} - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func applyConfig(o *gorocksdb.Options, config map[string]interface{}) ( - *gorocksdb.Options, error) { - - cim, ok := config["create_if_missing"].(bool) - if ok { - o.SetCreateIfMissing(cim) - } - - eie, ok := config["error_if_exists"].(bool) - if ok { - o.SetErrorIfExists(eie) - } - - wbs, ok := config["write_buffer_size"].(float64) - if ok { - o.SetWriteBufferSize(int(wbs)) - } - - mof, ok := config["max_open_files"].(float64) - if ok { - o.SetMaxOpenFiles(int(mof)) - } - - tt, ok := config["total_threads"].(float64) - if ok { - o.IncreaseParallelism(int(tt)) - } - - // options in the block based table options object - bbto := gorocksdb.NewDefaultBlockBasedTableOptions() - - lcc, ok := config["lru_cache_capacity"].(float64) - if ok { - c := gorocksdb.NewLRUCache(int(lcc)) - bbto.SetBlockCache(c) - } - - bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) - if ok { - bf := gorocksdb.NewBloomFilter(int(bfbpk)) - bbto.SetFilterPolicy(bf) - } - - // set the block based table options - o.SetBlockBasedTableFactory(bbto) - - return o, nil -} diff --git a/index/store/gorocksdb/store_test.go b/index/store/gorocksdb/store_test.go deleted file mode 100644 index 246cc09d..00000000 --- a/index/store/gorocksdb/store_test.go +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build rocksdb - -package rocksdb - -import ( - "os" - "reflect" - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -var rocksdbTestOptions = map[string]interface{}{ - "create_if_missing": true, -} - -func TestGoRocksDBStore(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - s, err := New("test", rocksdbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) -} - -func TestReaderIsolation(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - s, err := New("test", rocksdbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - -} diff --git a/index/store/gorocksdb/util.go b/index/store/gorocksdb/util.go deleted file mode 100644 index 830fe490..00000000 --- a/index/store/gorocksdb/util.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build rocksdb - -package rocksdb - -import ( - "github.com/tecbot/gorocksdb" -) - -func defaultWriteOptions() *gorocksdb.WriteOptions { - wo := gorocksdb.NewDefaultWriteOptions() - // request fsync on write for safety - wo.SetSync(true) - return wo -} - -func defaultReadOptions() *gorocksdb.ReadOptions { - ro := gorocksdb.NewDefaultReadOptions() - return ro -} diff --git a/index/store/gorocksdb/writer.go b/index/store/gorocksdb/writer.go deleted file mode 100644 index 2c971c7b..00000000 --- a/index/store/gorocksdb/writer.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build rocksdb - -package rocksdb - -import ( - "github.com/blevesearch/bleve/index/store" - "github.com/tecbot/gorocksdb" -) - -type Writer struct { - store *Store -} - -func newWriter(store *Store) (*Writer, error) { - store.writer.Lock() - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - -func (w *Writer) Set(key, val []byte) error { - return w.store.setlocked(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.store.deletelocked(key) -} - -func (w *Writer) NewBatch() store.KVBatch { - rv := Batch{ - w: w, - batch: gorocksdb.NewWriteBatch(), - } - return &rv -} - -func (w *Writer) Close() error { - w.store.writer.Unlock() - return nil -} - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.store.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} diff --git a/index/store/leveldb/batch.go b/index/store/leveldb/batch.go deleted file mode 100644 index 2e95286a..00000000 --- a/index/store/leveldb/batch.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build leveldb full - -package leveldb - -import ( - "github.com/blevesearch/bleve/index/store" - "github.com/jmhodges/levigo" -) - -type Batch struct { - w *Writer - merge *store.EmulatedMerge - batch *levigo.WriteBatch -} - -func (b *Batch) Set(key, val []byte) { - b.batch.Put(key, val) -} - -func (b *Batch) Delete(key []byte) { - b.batch.Delete(key) -} - -func (b *Batch) Merge(key, val []byte) { - b.merge.Merge(key, val) -} - -func (b *Batch) Execute() error { - // first process merges - ops, err := b.merge.ExecuteDeferred(b.w) - if err != nil { - return err - } - for _, op := range ops { - b.batch.Put(op.K, op.V) - } - - wopts := defaultWriteOptions() - defer wopts.Close() - err = b.w.store.db.Write(wopts, b.batch) - return err -} - -func (b *Batch) Close() error { - return nil -} diff --git a/index/store/leveldb/iterator.go b/index/store/leveldb/iterator.go deleted file mode 100644 index 10ef8d7d..00000000 --- a/index/store/leveldb/iterator.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build leveldb full - -package leveldb - -import ( - "github.com/jmhodges/levigo" -) - -type Iterator struct { - store *Store - iterator *levigo.Iterator -} - -func newIterator(store *Store) *Iterator { - ropts := defaultReadOptions() - rv := Iterator{ - store: store, - iterator: store.db.NewIterator(ropts), - } - ropts.Close() - return &rv -} - -func newIteratorWithSnapshot(store *Store, snapshot *levigo.Snapshot) *Iterator { - options := defaultReadOptions() - options.SetSnapshot(snapshot) - rv := Iterator{ - store: store, - iterator: store.db.NewIterator(options), - } - options.Close() - return &rv -} - -func (ldi *Iterator) SeekFirst() { - ldi.iterator.SeekToFirst() -} - -func (ldi *Iterator) Seek(key []byte) { - ldi.iterator.Seek(key) -} - -func (ldi *Iterator) Next() { - ldi.iterator.Next() -} - -func (ldi *Iterator) Current() ([]byte, []byte, bool) { - if ldi.Valid() { - return ldi.Key(), ldi.Value(), true - } - return nil, nil, false -} - -func (ldi *Iterator) Key() []byte { - return ldi.iterator.Key() -} - -func (ldi *Iterator) Value() []byte { - return ldi.iterator.Value() -} - -func (ldi *Iterator) Valid() bool { - return ldi.iterator.Valid() -} - -func (ldi *Iterator) Close() error { - ldi.iterator.Close() - return nil -} diff --git a/index/store/leveldb/reader.go b/index/store/leveldb/reader.go deleted file mode 100644 index c6360317..00000000 --- a/index/store/leveldb/reader.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build leveldb full - -package leveldb - -import ( - "github.com/blevesearch/bleve/index/store" - "github.com/jmhodges/levigo" -) - -type Reader struct { - store *Store - snapshot *levigo.Snapshot -} - -func newReader(store *Store) (*Reader, error) { - return &Reader{ - store: store, - snapshot: store.db.NewSnapshot(), - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return true -} - -func (r *Reader) Get(key []byte) ([]byte, error) { - return r.store.getWithSnapshot(key, r.snapshot) -} - -func (r *Reader) Iterator(key []byte) store.KVIterator { - rv := newIteratorWithSnapshot(r.store, r.snapshot) - rv.Seek(key) - return rv -} - -func (r *Reader) Close() error { - r.store.db.ReleaseSnapshot(r.snapshot) - return nil -} diff --git a/index/store/leveldb/store.go b/index/store/leveldb/store.go deleted file mode 100644 index 46d9a3d7..00000000 --- a/index/store/leveldb/store.go +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build leveldb full - -package leveldb - -import ( - "fmt" - "sync" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - "github.com/jmhodges/levigo" -) - -const Name = "leveldb" - -type Store struct { - path string - opts *levigo.Options - db *levigo.DB - writer sync.Mutex - mo store.MergeOperator -} - -func New(path string, config map[string]interface{}) (*Store, error) { - rv := Store{ - path: path, - opts: levigo.NewOptions(), - } - - _, err := applyConfig(rv.opts, config) - if err != nil { - return nil, err - } - - return &rv, nil -} - -func (ldbs *Store) Open() error { - var err error - ldbs.db, err = levigo.Open(ldbs.path, ldbs.opts) - if err != nil { - return err - } - return nil -} - -func (ldbs *Store) SetMergeOperator(mo store.MergeOperator) { - ldbs.mo = mo -} - -func (ldbs *Store) get(key []byte) ([]byte, error) { - options := defaultReadOptions() - b, err := ldbs.db.Get(options, key) - options.Close() - return b, err -} - -func (ldbs *Store) getWithSnapshot(key []byte, snapshot *levigo.Snapshot) ([]byte, error) { - options := defaultReadOptions() - options.SetSnapshot(snapshot) - b, err := ldbs.db.Get(options, key) - options.Close() - return b, err -} - -func (ldbs *Store) set(key, val []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.setlocked(key, val) -} - -func (ldbs *Store) setlocked(key, val []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Put(options, key, val) - options.Close() - return err -} - -func (ldbs *Store) delete(key []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.deletelocked(key) -} - -func (ldbs *Store) deletelocked(key []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Delete(options, key) - options.Close() - return err -} - -func (ldbs *Store) Close() error { - ldbs.db.Close() - ldbs.opts.Close() - return nil -} - -func (ldbs *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(ldbs) - rv.Seek(key) - return rv -} - -func (ldbs *Store) Reader() (store.KVReader, error) { - return newReader(ldbs) -} - -func (ldbs *Store) Writer() (store.KVWriter, error) { - return newWriter(ldbs) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - path, ok := config["path"].(string) - if !ok { - return nil, fmt.Errorf("must specify path") - } - return New(path, config) -} - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func applyConfig(o *levigo.Options, config map[string]interface{}) ( - *levigo.Options, error) { - - cim, ok := config["create_if_missing"].(bool) - if ok { - o.SetCreateIfMissing(cim) - } - - eie, ok := config["error_if_exists"].(bool) - if ok { - o.SetErrorIfExists(eie) - } - - wbs, ok := config["write_buffer_size"].(float64) - if ok { - o.SetWriteBufferSize(int(wbs)) - } - - bs, ok := config["block_size"].(float64) - if ok { - o.SetBlockSize(int(bs)) - } - - bri, ok := config["block_restart_interval"].(float64) - if ok { - o.SetBlockRestartInterval(int(bri)) - } - - lcc, ok := config["lru_cache_capacity"].(float64) - if ok { - lruCache := levigo.NewLRUCache(int(lcc)) - o.SetCache(lruCache) - } - - bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) - if ok { - bf := levigo.NewBloomFilter(int(bfbpk)) - o.SetFilterPolicy(bf) - } - - return o, nil -} diff --git a/index/store/leveldb/store_test.go b/index/store/leveldb/store_test.go deleted file mode 100644 index 3eca69c1..00000000 --- a/index/store/leveldb/store_test.go +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build leveldb full - -package leveldb - -import ( - "os" - "reflect" - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -var leveldbTestOptions = map[string]interface{}{ - "create_if_missing": true, -} - -func TestLevelDBStore(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - s, err := New("test", leveldbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) -} - -func TestReaderIsolation(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - s, err := New("test", leveldbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - -} diff --git a/index/store/leveldb/util.go b/index/store/leveldb/util.go deleted file mode 100644 index a34f6f84..00000000 --- a/index/store/leveldb/util.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build leveldb full - -package leveldb - -import ( - "github.com/jmhodges/levigo" -) - -func defaultWriteOptions() *levigo.WriteOptions { - wo := levigo.NewWriteOptions() - // request fsync on write for safety - wo.SetSync(true) - return wo -} - -func defaultReadOptions() *levigo.ReadOptions { - ro := levigo.NewReadOptions() - return ro -} diff --git a/index/store/leveldb/writer.go b/index/store/leveldb/writer.go deleted file mode 100644 index 1ef3a4e3..00000000 --- a/index/store/leveldb/writer.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -// +build leveldb full - -package leveldb - -import ( - "github.com/blevesearch/bleve/index/store" - "github.com/jmhodges/levigo" -) - -type Writer struct { - store *Store -} - -func newWriter(store *Store) (*Writer, error) { - store.writer.Lock() - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return true -} - -func (w *Writer) Set(key, val []byte) error { - return w.store.setlocked(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.store.deletelocked(key) -} - -func (w *Writer) NewBatch() store.KVBatch { - rv := Batch{ - w: w, - merge: store.NewEmulatedMerge(w.store.mo), - batch: levigo.NewWriteBatch(), - } - return &rv -} - -func (w *Writer) Close() error { - w.store.writer.Unlock() - return nil -} - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.store.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} diff --git a/index/upside_down/benchmark_cznicb_test.go b/index/upside_down/benchmark_cznicb_test.go index 82eed6ff..47f1c9e6 100644 --- a/index/upside_down/benchmark_cznicb_test.go +++ b/index/upside_down/benchmark_cznicb_test.go @@ -7,13 +7,15 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. +// +build cznicb + package upside_down import ( "testing" "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/cznicb" + "github.com/blevesearch/blevex/cznicb" ) func CreateCznicB() (store.KVStore, error) { diff --git a/index/upside_down/benchmark_forestdb_test.go b/index/upside_down/benchmark_forestdb_test.go index 31b33165..89f5cffe 100644 --- a/index/upside_down/benchmark_forestdb_test.go +++ b/index/upside_down/benchmark_forestdb_test.go @@ -16,7 +16,7 @@ import ( "testing" "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/forestdb" + "github.com/blevesearch/blevex/forestdb" ) func CreateForestDB() (store.KVStore, error) { diff --git a/index/upside_down/benchmark_gorocksdb_test.go b/index/upside_down/benchmark_gorocksdb_test.go index 4c1693f8..fb931c9b 100644 --- a/index/upside_down/benchmark_gorocksdb_test.go +++ b/index/upside_down/benchmark_gorocksdb_test.go @@ -16,7 +16,7 @@ import ( "testing" "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/gorocksdb" + "github.com/blevesearch/blevex/rocksdb" ) var rocksdbTestOptions = map[string]interface{}{ diff --git a/index/upside_down/benchmark_leveldb_test.go b/index/upside_down/benchmark_leveldb_test.go index 4e6a70ed..9c84fdad 100644 --- a/index/upside_down/benchmark_leveldb_test.go +++ b/index/upside_down/benchmark_leveldb_test.go @@ -7,7 +7,7 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -// +build leveldb full +// +build leveldb package upside_down @@ -16,7 +16,7 @@ import ( "testing" "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/leveldb" + "github.com/blevesearch/blevex/leveldb" ) var leveldbTestOptions = map[string]interface{}{ diff --git a/index_impl.go b/index_impl.go index c752d80a..256a13f6 100644 --- a/index_impl.go +++ b/index_impl.go @@ -20,6 +20,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/inmem" "github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/collectors" @@ -49,7 +50,7 @@ func newMemIndex(indexType string, mapping *IndexMapping) (*indexImpl, error) { rv := indexImpl{ path: "", m: mapping, - meta: newIndexMeta(indexType, "mem", nil), + meta: newIndexMeta(indexType, inmem.Name, nil), stats: &IndexStat{}, } diff --git a/index_test.go b/index_test.go index 2ee75df3..e54a6144 100644 --- a/index_test.go +++ b/index_test.go @@ -19,6 +19,8 @@ import ( "sync" "testing" "time" + + "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" ) func TestCrud(t *testing.T) { @@ -874,7 +876,7 @@ func TestKeywordSearchBug207(t *testing.T) { }() f := NewTextFieldMapping() - f.Analyzer = "keyword" + f.Analyzer = keyword_analyzer.Name m := NewIndexMapping() m.DefaultMapping = NewDocumentMapping() diff --git a/mapping_index.go b/mapping_index.go index 18692ff5..62130d10 100644 --- a/mapping_index.go +++ b/mapping_index.go @@ -13,6 +13,9 @@ import ( "encoding/json" "github.com/blevesearch/bleve/analysis" + "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer" + "github.com/blevesearch/bleve/analysis/byte_array_converters/json" + "github.com/blevesearch/bleve/analysis/datetime_parsers/datetime_optional" "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/registry" ) @@ -20,9 +23,9 @@ import ( const defaultTypeField = "_type" const defaultType = "_default" const defaultField = "_all" -const defaultAnalyzer = "standard" -const defaultDateTimeParser = "dateTimeOptional" -const defaultByteArrayConverter = "json" +const defaultAnalyzer = standard_analyzer.Name +const defaultDateTimeParser = datetime_optional.Name +const defaultByteArrayConverter = json_byte_array_converter.Name type customAnalysis struct { CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"` diff --git a/mapping_test.go b/mapping_test.go index 8918e7fe..63cab4de 100644 --- a/mapping_test.go +++ b/mapping_test.go @@ -11,9 +11,12 @@ package bleve import ( "encoding/json" - "github.com/blevesearch/bleve/document" "reflect" "testing" + + "github.com/blevesearch/bleve/analysis/tokenizers/exception" + "github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer" + "github.com/blevesearch/bleve/document" ) var mappingSource = []byte(`{ @@ -248,12 +251,12 @@ func TestMappingForPath(t *testing.T) { func TestMappingWithTokenizerDeps(t *testing.T) { tokNoDeps := map[string]interface{}{ - "type": "regexp", + "type": regexp_tokenizer.Name, "regexp": "", } tokDepsL1 := map[string]interface{}{ - "type": "exception", + "type": exception.Name, "tokenizer": "a", } diff --git a/search/highlight/fragment_formatters/html/fragment_formatter_html.go b/search/highlight/fragment_formatters/html/fragment_formatter_html.go index cd774fbe..a51f710d 100644 --- a/search/highlight/fragment_formatters/html/fragment_formatter_html.go +++ b/search/highlight/fragment_formatters/html/fragment_formatter_html.go @@ -16,8 +16,8 @@ import ( const Name = "html" -const defaultHTMLHighlightBefore = "" -const defaultHTMLHighlightAfter = "" +const defaultHTMLHighlightBefore = "" +const defaultHTMLHighlightAfter = "" type FragmentFormatter struct { before string diff --git a/search/highlight/highlighters/ansi/ansi.go b/search/highlight/highlighters/ansi/ansi.go new file mode 100644 index 00000000..feaabe9d --- /dev/null +++ b/search/highlight/highlighters/ansi/ansi.go @@ -0,0 +1,45 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package ansi + +import ( + "fmt" + + "github.com/blevesearch/bleve/registry" + "github.com/blevesearch/bleve/search/highlight" + ansi_formatter "github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi" + simple_fragmenter "github.com/blevesearch/bleve/search/highlight/fragmenters/simple" + simple_highlighter "github.com/blevesearch/bleve/search/highlight/highlighters/simple" +) + +const Name = "ansi" + +func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) { + + fragmenter, err := cache.FragmenterNamed(simple_fragmenter.Name) + if err != nil { + return nil, fmt.Errorf("error building fragmenter: %v", err) + } + + formatter, err := cache.FragmentFormatterNamed(ansi_formatter.Name) + if err != nil { + return nil, fmt.Errorf("error building fragment formatter: %v", err) + } + + return simple_highlighter.NewHighlighter( + fragmenter, + formatter, + simple_highlighter.DefaultSeparator), + nil +} + +func init() { + registry.RegisterHighlighter(Name, Constructor) +} diff --git a/search/highlight/highlighters/html/html.go b/search/highlight/highlighters/html/html.go new file mode 100644 index 00000000..3b82ae37 --- /dev/null +++ b/search/highlight/highlighters/html/html.go @@ -0,0 +1,45 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package html + +import ( + "fmt" + + "github.com/blevesearch/bleve/registry" + "github.com/blevesearch/bleve/search/highlight" + html_formatter "github.com/blevesearch/bleve/search/highlight/fragment_formatters/html" + simple_fragmenter "github.com/blevesearch/bleve/search/highlight/fragmenters/simple" + simple_highlighter "github.com/blevesearch/bleve/search/highlight/highlighters/simple" +) + +const Name = "html" + +func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) { + + fragmenter, err := cache.FragmenterNamed(simple_fragmenter.Name) + if err != nil { + return nil, fmt.Errorf("error building fragmenter: %v", err) + } + + formatter, err := cache.FragmentFormatterNamed(html_formatter.Name) + if err != nil { + return nil, fmt.Errorf("error building fragment formatter: %v", err) + } + + return simple_highlighter.NewHighlighter( + fragmenter, + formatter, + simple_highlighter.DefaultSeparator), + nil +} + +func init() { + registry.RegisterHighlighter(Name, Constructor) +} diff --git a/search/highlight/highlighters/simple/highlighter_simple.go b/search/highlight/highlighters/simple/highlighter_simple.go index aad81fc4..32647736 100644 --- a/search/highlight/highlighters/simple/highlighter_simple.go +++ b/search/highlight/highlighters/simple/highlighter_simple.go @@ -20,7 +20,7 @@ import ( ) const Name = "simple" -const defaultSeparator = "…" +const DefaultSeparator = "…" type Highlighter struct { fragmenter highlight.Fragmenter @@ -197,7 +197,7 @@ func (fq *FragmentQueue) Pop() interface{} { } func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) { - separator := defaultSeparator + separator := DefaultSeparator separatorVal, ok := config["separator"].(string) if ok { separator = separatorVal diff --git a/search/highlight/highlighters/simple/highlighter_simple_test.go b/search/highlight/highlighters/simple/highlighter_simple_test.go index 8bc30342..85904072 100644 --- a/search/highlight/highlighters/simple/highlighter_simple_test.go +++ b/search/highlight/highlighters/simple/highlighter_simple_test.go @@ -27,7 +27,7 @@ const ( func TestSimpleHighlighter(t *testing.T) { fragmenter := sfrag.NewFragmenter(100) formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight) - highlighter := NewHighlighter(fragmenter, formatter, defaultSeparator) + highlighter := NewHighlighter(fragmenter, formatter, DefaultSeparator) docMatch := search.DocumentMatch{ ID: "a", @@ -154,7 +154,7 @@ Etiam vel augue vel nisl commodo suscipit et ac nisl. Quisque eros diam, porttit fragmenter := sfrag.NewFragmenter(100) formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight) - highlighter := NewHighlighter(fragmenter, formatter, defaultSeparator) + highlighter := NewHighlighter(fragmenter, formatter, DefaultSeparator) fragments := highlighter.BestFragmentsInField(&docMatch, doc, "full", 5) if !reflect.DeepEqual(fragments, expectedFragments) { diff --git a/test/integration_test.go b/test/integration_test.go index b40daa97..374c5f8f 100644 --- a/test/integration_test.go +++ b/test/integration_test.go @@ -20,6 +20,9 @@ import ( "testing" "github.com/blevesearch/bleve" + + // we must explicitly include any functionality we plan on testing + _ "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" ) var dataset = flag.String("dataset", "", "only test datasets matching this regex")