diff --git a/analysis/analyzers/custom_analyzer/custom_analyzer.go b/analysis/analyzers/custom_analyzer/custom_analyzer.go new file mode 100644 index 00000000..9278469a --- /dev/null +++ b/analysis/analyzers/custom_analyzer/custom_analyzer.go @@ -0,0 +1,73 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package standard_analyzer + +import ( + "fmt" + + "github.com/blevesearch/bleve/analysis" + "github.com/blevesearch/bleve/registry" +) + +const Name = "custom" + +func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { + + var charFilters []analysis.CharFilter + charFilterNames, ok := config["char_filters"].([]string) + if ok { + charFilters = make([]analysis.CharFilter, len(charFilterNames)) + for i, charFilterName := range charFilterNames { + charFilter, err := cache.CharFilterNamed(charFilterName) + if err != nil { + return nil, err + } + charFilters[i] = charFilter + } + } + + tokenizerName, ok := config["tokenizer"].(string) + if !ok { + return nil, fmt.Errorf("must specify tokenizer") + } + + tokenizer, err := cache.TokenizerNamed(tokenizerName) + if err != nil { + return nil, err + } + + var tokenFilters []analysis.TokenFilter + tokenFilterNames, ok := config["token_filters"].([]string) + if ok { + tokenFilters = make([]analysis.TokenFilter, len(tokenFilterNames)) + for i, tokenFilterName := range tokenFilterNames { + tokenFilter, err := cache.TokenFilterNamed(tokenFilterName) + if err != nil { + return nil, err + } + tokenFilters[i] = tokenFilter + } + } + + rv := analysis.Analyzer{ + Tokenizer: tokenizer, + } + if charFilters != nil { + rv.CharFilters = charFilters + } + if tokenFilters != nil { + rv.TokenFilters = tokenFilters + } + return &rv, nil +} + +func init() { + registry.RegisterAnalyzer(Name, AnalyzerConstructor) +} diff --git a/analysis/token_filters/elision_filter/elision_filter_test.go b/analysis/token_filters/elision_filter/elision_filter_test.go index 21fc244c..1db2d670 100644 --- a/analysis/token_filters/elision_filter/elision_filter_test.go +++ b/analysis/token_filters/elision_filter/elision_filter_test.go @@ -40,17 +40,19 @@ func TestElisionFilter(t *testing.T) { cache := registry.NewCache() articleListConfig := map[string]interface{}{ + "type": token_map.Name, "tokens": []interface{}{"ar"}, } - _, err := cache.DefineTokenMap("articles_test", token_map.Name, articleListConfig) + _, err := cache.DefineTokenMap("articles_test", articleListConfig) if err != nil { t.Fatal(err) } elisionConfig := map[string]interface{}{ + "type": "elision", "articles_token_map": "articles_test", } - elisionFilter, err := cache.DefineTokenFilter("elision_test", "elision", elisionConfig) + elisionFilter, err := cache.DefineTokenFilter("elision_test", elisionConfig) if err != nil { t.Fatal(err) } diff --git a/analysis/token_filters/stop_tokens_filter/stop_tokens_filter_test.go b/analysis/token_filters/stop_tokens_filter/stop_tokens_filter_test.go index c60eb9f2..0cfca2a1 100644 --- a/analysis/token_filters/stop_tokens_filter/stop_tokens_filter_test.go +++ b/analysis/token_filters/stop_tokens_filter/stop_tokens_filter_test.go @@ -48,17 +48,19 @@ func TestStopWordsFilter(t *testing.T) { cache := registry.NewCache() stopListConfig := map[string]interface{}{ + "type": token_map.Name, "tokens": []interface{}{"a", "in", "the"}, } - _, err := cache.DefineTokenMap("stop_test", token_map.Name, stopListConfig) + _, err := cache.DefineTokenMap("stop_test", stopListConfig) if err != nil { t.Fatal(err) } stopConfig := map[string]interface{}{ + "type": "stop_tokens", "stop_token_map": "stop_test", } - stopFilter, err := cache.DefineTokenFilter("stop_test", "stop_tokens", stopConfig) + stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig) if err != nil { t.Fatal(err) } diff --git a/config.go b/config.go index bc2d29ec..198ecd04 100644 --- a/config.go +++ b/config.go @@ -30,6 +30,7 @@ import ( _ "github.com/blevesearch/bleve/analysis/char_filters/zero_width_non_joiner" // analyzers + _ "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer" _ "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer" _ "github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer" _ "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer" @@ -119,20 +120,23 @@ func init() { // build the default configuration Config = newConfiguration() - Config.Cache.DefineFragmentFormatter("highlightSpanHTML", "html", + Config.Cache.DefineFragmentFormatter("highlightSpanHTML", map[string]interface{}{ + "type": "html", "before": ``, "after": ``, }) - Config.Cache.DefineHighlighter("html", "simple", + Config.Cache.DefineHighlighter("html", map[string]interface{}{ + "type": "simple", "fragmenter": "simple", "formatter": "highlightSpanHTML", }) - Config.Cache.DefineHighlighter("ansi", "simple", + Config.Cache.DefineHighlighter("ansi", map[string]interface{}{ + "type": "simple", "fragmenter": "simple", "formatter": "ansi", }) diff --git a/examples/beer-search/main.go b/examples/beer-search/main.go index 9aaf3755..3fc22fac 100644 --- a/examples/beer-search/main.go +++ b/examples/beer-search/main.go @@ -37,7 +37,10 @@ func main() { if err == bleve.ERROR_INDEX_PATH_DOES_NOT_EXIST { log.Printf("Creating new index...") // create a mapping - indexMapping := buildIndexMapping() + indexMapping, err := buildIndexMapping() + if err != nil { + log.Fatal(err) + } beerIndex, err = bleve.New(*indexPath, indexMapping) if err != nil { log.Fatal(err) diff --git a/examples/beer-search/main_test.go b/examples/beer-search/main_test.go index 167c8f2b..106ffd97 100644 --- a/examples/beer-search/main_test.go +++ b/examples/beer-search/main_test.go @@ -27,7 +27,10 @@ import ( func TestBeerSearchAll(t *testing.T) { defer os.RemoveAll("beer-search-test.bleve") - mapping := buildIndexMapping() + mapping, err := buildIndexMapping() + if err != nil { + t.Fatal(err) + } index, err := bleve.New("beer-search-test.bleve", mapping) if err != nil { t.Fatal(err) @@ -192,7 +195,10 @@ func walkDirectory(dir string, t *testing.T) chan jsonFile { func TestBeerSearchBug87(t *testing.T) { defer os.RemoveAll("beer-search-test.bleve") - mapping := buildIndexMapping() + mapping, err := buildIndexMapping() + if err != nil { + t.Fatal(err) + } index, err := bleve.New("beer-search-test.bleve", mapping) if err != nil { t.Fatal(err) diff --git a/examples/beer-search/mapping.go b/examples/beer-search/mapping.go index c47f4fd0..c7e6a552 100644 --- a/examples/beer-search/mapping.go +++ b/examples/beer-search/mapping.go @@ -6,6 +6,9 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. + +// +build !example1 + package main import ( @@ -14,7 +17,7 @@ import ( const textFieldAnalyzer = "en" -func buildIndexMapping() *bleve.IndexMapping { +func buildIndexMapping() (*bleve.IndexMapping, error) { nameMapping := bleve.NewDocumentMapping(). AddFieldMapping( @@ -67,5 +70,5 @@ func buildIndexMapping() *bleve.IndexMapping { indexMapping.TypeField = "type" indexMapping.DefaultAnalyzer = textFieldAnalyzer - return indexMapping + return indexMapping, nil } diff --git a/examples/beer-search/mapping_example1.go b/examples/beer-search/mapping_example1.go new file mode 100644 index 00000000..239f91de --- /dev/null +++ b/examples/beer-search/mapping_example1.go @@ -0,0 +1,99 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +// +build example1 + +package main + +import ( + "github.com/blevesearch/bleve" +) + +const textFieldAnalyzer = "en" + +func buildIndexMapping() (*bleve.IndexMapping, error) { + + nameMapping := bleve.NewDocumentMapping(). + AddFieldMapping( + bleve.NewFieldMapping( + "", "text", textFieldAnalyzer, + true, true, true, true)) + + descMapping := bleve.NewDocumentMapping(). + AddFieldMapping( + bleve.NewFieldMapping( + "", "text", "enNotTooLong", + true, true, true, true)). + AddFieldMapping( + bleve.NewFieldMapping("descriptionLang", "text", "detect_lang", + false, true, false, false)) + + typeMapping := bleve.NewDocumentMapping(). + AddFieldMapping( + bleve.NewFieldMapping( + "", "text", "keyword", + true, true, true, true)) + + styleMapping := bleve.NewDocumentMapping(). + AddFieldMapping( + bleve.NewFieldMapping( + "", "text", "keyword", + true, true, true, true)) + + categoryMapping := bleve.NewDocumentMapping(). + AddFieldMapping( + bleve.NewFieldMapping( + "", "text", "keyword", + true, true, true, true)) + + beerMapping := bleve.NewDocumentMapping(). + AddSubDocumentMapping("name", nameMapping). + AddSubDocumentMapping("description", descMapping). + AddSubDocumentMapping("type", typeMapping). + AddSubDocumentMapping("style", styleMapping). + AddSubDocumentMapping("category", categoryMapping) + + breweryMapping := bleve.NewDocumentMapping(). + AddSubDocumentMapping("name", nameMapping). + AddSubDocumentMapping("description", descMapping) + + indexMapping := bleve.NewIndexMapping(). + AddDocumentMapping("beer", beerMapping). + AddDocumentMapping("brewery", breweryMapping) + + indexMapping.TypeField = "type" + indexMapping.DefaultAnalyzer = textFieldAnalyzer + + err := indexMapping.AddCustomTokenFilter("notTooLong", + map[string]interface{}{ + "type": "truncate_token", + "length": 5.0, + }) + if err != nil { + return nil, err + } + + err = indexMapping.AddCustomAnalyzer("enNotTooLong", + map[string]interface{}{ + "type": "custom", + "tokenizer": "unicode", + "token_filters": []string{ + "notTooLong", + "possessive_en", + "to_lower", + "stop_en", + "stemmer_en", + }, + }) + if err != nil { + return nil, err + } + + return indexMapping, nil +} diff --git a/mapping_index.go b/mapping_index.go index c40cdf60..87694e22 100644 --- a/mapping_index.go +++ b/mapping_index.go @@ -31,6 +31,27 @@ const defaultAnalyzer = "standard" const defaultDateTimeParser = "dateTimeOptional" const defaultByteArrayConverter = "json" +type customAnalysis struct { + CharFilters map[string]interface{} `json:"char_filters"` + Tokenizers map[string]interface{} `json:"tokenizers"` + TokenMaps map[string]interface{} `json:"token_maps"` + TokenFilters map[string]interface{} `json:"token_filters"` + Analyzers map[string]interface{} `json:"analyzers"` + DateTimeParsers map[string]interface{} `json:"date_time_parsers"` +} + +func newCustomAnalysis() *customAnalysis { + rv := customAnalysis{ + CharFilters: make(map[string]interface{}), + Tokenizers: make(map[string]interface{}), + TokenMaps: make(map[string]interface{}), + TokenFilters: make(map[string]interface{}), + Analyzers: make(map[string]interface{}), + DateTimeParsers: make(map[string]interface{}), + } + return &rv +} + // An IndexMapping controls how objects are place // into an index. // First the type of the object is deteremined. @@ -47,9 +68,64 @@ type IndexMapping struct { DefaultDateTimeParser string `json:"default_datetime_parser"` DefaultField string `json:"default_field"` ByteArrayConverter string `json:"byte_array_converter"` + CustomAnalysis *customAnalysis `json:"analysis"` cache *registry.Cache `json:"_"` } +func (i *IndexMapping) AddCustomCharFilter(name string, config map[string]interface{}) error { + _, err := i.cache.DefineCharFilter(name, config) + if err != nil { + return err + } + i.CustomAnalysis.CharFilters[name] = config + return nil +} + +func (i *IndexMapping) AddCustomTokenizer(name string, config map[string]interface{}) error { + _, err := i.cache.DefineTokenizer(name, config) + if err != nil { + return err + } + i.CustomAnalysis.Tokenizers[name] = config + return nil +} + +func (i *IndexMapping) AddCustomTokenMap(name string, config map[string]interface{}) error { + _, err := i.cache.DefineTokenMap(name, config) + if err != nil { + return err + } + i.CustomAnalysis.TokenMaps[name] = config + return nil +} + +func (i *IndexMapping) AddCustomTokenFilter(name string, config map[string]interface{}) error { + _, err := i.cache.DefineTokenFilter(name, config) + if err != nil { + return err + } + i.CustomAnalysis.TokenFilters[name] = config + return nil +} + +func (i *IndexMapping) AddCustomAnalyzer(name string, config map[string]interface{}) error { + _, err := i.cache.DefineAnalyzer(name, config) + if err != nil { + return err + } + i.CustomAnalysis.Analyzers[name] = config + return nil +} + +func (i *IndexMapping) AddCustomDateTimeParser(name string, config map[string]interface{}) error { + _, err := i.cache.DefineDateTimeParser(name, config) + if err != nil { + return err + } + i.CustomAnalysis.DateTimeParsers[name] = config + return nil +} + func NewIndexMapping() *IndexMapping { return &IndexMapping{ TypeMapping: make(map[string]*DocumentMapping), @@ -60,6 +136,7 @@ func NewIndexMapping() *IndexMapping { DefaultDateTimeParser: defaultDateTimeParser, DefaultField: defaultField, ByteArrayConverter: defaultByteArrayConverter, + CustomAnalysis: newCustomAnalysis(), cache: registry.NewCache(), } } @@ -104,6 +181,7 @@ func (im *IndexMapping) mappingForType(docType string) *DocumentMapping { } func (im *IndexMapping) UnmarshalJSON(data []byte) error { + im.CustomAnalysis = newCustomAnalysis() var tmp struct { TypeMapping map[string]*DocumentMapping `json:"types"` DefaultMapping *DocumentMapping `json:"default_mapping"` diff --git a/registry/registry.go b/registry/registry.go index 72b47327..c978cdba 100644 --- a/registry/registry.go +++ b/registry/registry.go @@ -59,11 +59,23 @@ func NewCache() *Cache { } } +func typeFromConfig(config map[string]interface{}) (string, error) { + typ, ok := config["type"].(string) + if ok { + return typ, nil + } + return "", fmt.Errorf("unable to determine type") +} + func (c *Cache) CharFilterNamed(name string) (analysis.CharFilter, error) { return c.CharFilters.CharFilterNamed(name, c) } -func (c *Cache) DefineCharFilter(name string, typ string, config map[string]interface{}) (analysis.CharFilter, error) { +func (c *Cache) DefineCharFilter(name string, config map[string]interface{}) (analysis.CharFilter, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.CharFilters.DefineCharFilter(name, typ, config, c) } @@ -71,7 +83,11 @@ func (c *Cache) TokenizerNamed(name string) (analysis.Tokenizer, error) { return c.Tokenizers.TokenizerNamed(name, c) } -func (c *Cache) DefineTokenizer(name string, typ string, config map[string]interface{}) (analysis.Tokenizer, error) { +func (c *Cache) DefineTokenizer(name string, config map[string]interface{}) (analysis.Tokenizer, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.Tokenizers.DefineTokenizer(name, typ, config, c) } @@ -79,7 +95,11 @@ func (c *Cache) TokenMapNamed(name string) (analysis.TokenMap, error) { return c.TokenMaps.TokenMapNamed(name, c) } -func (c *Cache) DefineTokenMap(name string, typ string, config map[string]interface{}) (analysis.TokenMap, error) { +func (c *Cache) DefineTokenMap(name string, config map[string]interface{}) (analysis.TokenMap, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.TokenMaps.DefineTokenMap(name, typ, config, c) } @@ -87,7 +107,11 @@ func (c *Cache) TokenFilterNamed(name string) (analysis.TokenFilter, error) { return c.TokenFilters.TokenFilterNamed(name, c) } -func (c *Cache) DefineTokenFilter(name string, typ string, config map[string]interface{}) (analysis.TokenFilter, error) { +func (c *Cache) DefineTokenFilter(name string, config map[string]interface{}) (analysis.TokenFilter, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.TokenFilters.DefineTokenFilter(name, typ, config, c) } @@ -95,7 +119,11 @@ func (c *Cache) AnalyzerNamed(name string) (*analysis.Analyzer, error) { return c.Analyzers.AnalyzerNamed(name, c) } -func (c *Cache) DefineAnalyzer(name string, typ string, config map[string]interface{}) (*analysis.Analyzer, error) { +func (c *Cache) DefineAnalyzer(name string, config map[string]interface{}) (*analysis.Analyzer, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.Analyzers.DefineAnalyzer(name, typ, config, c) } @@ -103,7 +131,11 @@ func (c *Cache) DateTimeParserNamed(name string) (analysis.DateTimeParser, error return c.DateTimeParsers.DateTimeParserNamed(name, c) } -func (c *Cache) DefineDateTimeParser(name string, typ string, config map[string]interface{}) (analysis.DateTimeParser, error) { +func (c *Cache) DefineDateTimeParser(name string, config map[string]interface{}) (analysis.DateTimeParser, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.DateTimeParsers.DefineDateTimeParser(name, typ, config, c) } @@ -111,7 +143,11 @@ func (c *Cache) FragmentFormatterNamed(name string) (highlight.FragmentFormatter return c.FragmentFormatters.FragmentFormatterNamed(name, c) } -func (c *Cache) DefineFragmentFormatter(name string, typ string, config map[string]interface{}) (highlight.FragmentFormatter, error) { +func (c *Cache) DefineFragmentFormatter(name string, config map[string]interface{}) (highlight.FragmentFormatter, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.FragmentFormatters.DefineFragmentFormatter(name, typ, config, c) } @@ -119,7 +155,11 @@ func (c *Cache) FragmenterNamed(name string) (highlight.Fragmenter, error) { return c.Fragmenters.FragmenterNamed(name, c) } -func (c *Cache) DefineFragmenter(name string, typ string, config map[string]interface{}) (highlight.Fragmenter, error) { +func (c *Cache) DefineFragmenter(name string, config map[string]interface{}) (highlight.Fragmenter, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.Fragmenters.DefineFragmenter(name, typ, config, c) } @@ -127,7 +167,11 @@ func (c *Cache) HighlighterNamed(name string) (highlight.Highlighter, error) { return c.Highlighters.HighlighterNamed(name, c) } -func (c *Cache) DefineHighlighter(name string, typ string, config map[string]interface{}) (highlight.Highlighter, error) { +func (c *Cache) DefineHighlighter(name string, config map[string]interface{}) (highlight.Highlighter, error) { + typ, err := typeFromConfig(config) + if err != nil { + return nil, err + } return c.Highlighters.DefineHighlighter(name, typ, config, c) }