From 498e4a0de79a39389d31dab1e1f1f6ad0a2215a5 Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Fri, 2 Oct 2015 15:35:48 +0200 Subject: [PATCH 1/3] simplify FieldMapping.analyzerForField() I stumbled onto that while trying to understand how analyzers are resolved. The new code looks simpler to me and removes useless calls to DocumentMapping.defaultAnalyzerName() when an analyzer is set at FieldMapping level. The slight change to TestStoredFieldPreserved avoids a stacktrace when the test fails. --- index_test.go | 2 +- mapping_field.go | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/index_test.go b/index_test.go index 5488adde..0f4d4850 100644 --- a/index_test.go +++ b/index_test.go @@ -433,7 +433,7 @@ func TestStoredFieldPreserved(t *testing.T) { } if len(res.Hits) != 1 { - t.Errorf("expected 1 hit, got %d", len(res.Hits)) + t.Fatalf("expected 1 hit, got %d", len(res.Hits)) } if res.Hits[0].Fields["name"] != "Marty" { diff --git a/mapping_field.go b/mapping_field.go index 3ff4461b..c5178cf5 100644 --- a/mapping_field.go +++ b/mapping_field.go @@ -132,12 +132,12 @@ func (fm *FieldMapping) processTime(propertyValueTime time.Time, pathString stri } func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { - analyzerName := context.dm.defaultAnalyzerName(path) + analyzerName := fm.Analyzer if analyzerName == "" { - analyzerName = context.im.DefaultAnalyzer - } - if fm.Analyzer != "" { - analyzerName = fm.Analyzer + analyzerName = context.dm.defaultAnalyzerName(path) + if analyzerName == "" { + analyzerName = context.im.DefaultAnalyzer + } } return context.im.analyzerNamed(analyzerName) } From ed1bdbf599a4e7461322480d997d782f9a5f415e Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Fri, 2 Oct 2015 17:00:45 +0200 Subject: [PATCH 2/3] doc: document field analyzer resolution --- index.go | 5 ++++- mapping_field.go | 12 +++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/index.go b/index.go index c2d7f48f..73c49d8b 100644 --- a/index.go +++ b/index.go @@ -127,7 +127,10 @@ func (b *Batch) Reset() { // mapping is found and the root DocumentMapping is dynamic, default mappings // are used based on value type and IndexMapping default configurations. // -// Finally, mapped values are analyzed, indexed or stored. Examples: +// Finally, mapped values are analyzed, indexed or stored. See +// FieldMapping.Analyzer to know how an analyzer is resolved for a given field. +// +// Examples: // // type Date struct { // Day string `json:"day"` diff --git a/mapping_field.go b/mapping_field.go index c5178cf5..0cc10082 100644 --- a/mapping_field.go +++ b/mapping_field.go @@ -19,9 +19,15 @@ import ( // A FieldMapping describes how a specific item // should be put into the index. type FieldMapping struct { - Name string `json:"name,omitempty"` - Type string `json:"type,omitempty"` - Analyzer string `json:"analyzer,omitempty"` + Name string `json:"name,omitempty"` + Type string `json:"type,omitempty"` + + // Analyzer specifies the name of the analyzer to use for this field. If + // Analyzer is empty, traverse the DocumentMapping tree toward the root and + // pick the first non-empty DefaultAnalyzer found. If there is none, use + // the IndexMapping.DefaultAnalyzer. + Analyzer string `json:"analyzer,omitempty"` + Store bool `json:"store,omitempty"` Index bool `json:"index,omitempty"` IncludeTermVectors bool `json:"include_term_vectors,omitempty"` From 2f48c16c848308e3f066e735ba49afbe97acf89a Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Fri, 2 Oct 2015 17:38:07 +0200 Subject: [PATCH 3/3] doc: document IndexMapping.AddCustomAnalyzer --- mapping_index.go | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/mapping_index.go b/mapping_index.go index 62130d10..21a40082 100644 --- a/mapping_index.go +++ b/mapping_index.go @@ -171,7 +171,35 @@ func (im *IndexMapping) AddCustomTokenFilter(name string, config map[string]inte return nil } -// AddCustomAnalyzer defines a custom analyzer for use in this mapping +// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The +// config map must have a "type" string entry to resolve the analyzer +// constructor. The constructor is invoked with the remaining entries and +// returned analyzer is registered in the IndexMapping. +// +// bleve comes with predefined analyzers, like +// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are +// available only if their package is imported by client code. To achieve this, +// use their metadata to fill configuration entries: +// +// import ( +// "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer" +// "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter" +// "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" +// "github.com/blevesearch/bleve/analysis/tokenizers/unicode" +// ) +// +// m := bleve.NewIndexMapping() +// err := m.AddCustomAnalyzer("html", map[string]interface{}{ +// "type": custom_analyzer.Name, +// "char_filters": []string{ +// html_char_filter.Name, +// }, +// "tokenizer": unicode.Name, +// "token_filters": []string{ +// lower_case_filter.Name, +// ... +// }, +// }) func (im *IndexMapping) AddCustomAnalyzer(name string, config map[string]interface{}) error { _, err := im.cache.DefineAnalyzer(name, config) if err != nil {