diff --git a/index.go b/index.go index c2d7f48f..73c49d8b 100644 --- a/index.go +++ b/index.go @@ -127,7 +127,10 @@ func (b *Batch) Reset() { // mapping is found and the root DocumentMapping is dynamic, default mappings // are used based on value type and IndexMapping default configurations. // -// Finally, mapped values are analyzed, indexed or stored. Examples: +// Finally, mapped values are analyzed, indexed or stored. See +// FieldMapping.Analyzer to know how an analyzer is resolved for a given field. +// +// Examples: // // type Date struct { // Day string `json:"day"` diff --git a/index_test.go b/index_test.go index 5488adde..0f4d4850 100644 --- a/index_test.go +++ b/index_test.go @@ -433,7 +433,7 @@ func TestStoredFieldPreserved(t *testing.T) { } if len(res.Hits) != 1 { - t.Errorf("expected 1 hit, got %d", len(res.Hits)) + t.Fatalf("expected 1 hit, got %d", len(res.Hits)) } if res.Hits[0].Fields["name"] != "Marty" { diff --git a/mapping_field.go b/mapping_field.go index 3ff4461b..0cc10082 100644 --- a/mapping_field.go +++ b/mapping_field.go @@ -19,9 +19,15 @@ import ( // A FieldMapping describes how a specific item // should be put into the index. type FieldMapping struct { - Name string `json:"name,omitempty"` - Type string `json:"type,omitempty"` - Analyzer string `json:"analyzer,omitempty"` + Name string `json:"name,omitempty"` + Type string `json:"type,omitempty"` + + // Analyzer specifies the name of the analyzer to use for this field. If + // Analyzer is empty, traverse the DocumentMapping tree toward the root and + // pick the first non-empty DefaultAnalyzer found. If there is none, use + // the IndexMapping.DefaultAnalyzer. + Analyzer string `json:"analyzer,omitempty"` + Store bool `json:"store,omitempty"` Index bool `json:"index,omitempty"` IncludeTermVectors bool `json:"include_term_vectors,omitempty"` @@ -132,12 +138,12 @@ func (fm *FieldMapping) processTime(propertyValueTime time.Time, pathString stri } func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { - analyzerName := context.dm.defaultAnalyzerName(path) + analyzerName := fm.Analyzer if analyzerName == "" { - analyzerName = context.im.DefaultAnalyzer - } - if fm.Analyzer != "" { - analyzerName = fm.Analyzer + analyzerName = context.dm.defaultAnalyzerName(path) + if analyzerName == "" { + analyzerName = context.im.DefaultAnalyzer + } } return context.im.analyzerNamed(analyzerName) } diff --git a/mapping_index.go b/mapping_index.go index 62130d10..21a40082 100644 --- a/mapping_index.go +++ b/mapping_index.go @@ -171,7 +171,35 @@ func (im *IndexMapping) AddCustomTokenFilter(name string, config map[string]inte return nil } -// AddCustomAnalyzer defines a custom analyzer for use in this mapping +// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The +// config map must have a "type" string entry to resolve the analyzer +// constructor. The constructor is invoked with the remaining entries and +// returned analyzer is registered in the IndexMapping. +// +// bleve comes with predefined analyzers, like +// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are +// available only if their package is imported by client code. To achieve this, +// use their metadata to fill configuration entries: +// +// import ( +// "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer" +// "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter" +// "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" +// "github.com/blevesearch/bleve/analysis/tokenizers/unicode" +// ) +// +// m := bleve.NewIndexMapping() +// err := m.AddCustomAnalyzer("html", map[string]interface{}{ +// "type": custom_analyzer.Name, +// "char_filters": []string{ +// html_char_filter.Name, +// }, +// "tokenizer": unicode.Name, +// "token_filters": []string{ +// lower_case_filter.Name, +// ... +// }, +// }) func (im *IndexMapping) AddCustomAnalyzer(name string, config map[string]interface{}) error { _, err := im.cache.DefineAnalyzer(name, config) if err != nil {