2014-07-30 18:30:38 +02:00
|
|
|
// Copyright (c) 2014 Couchbase, Inc.
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
|
|
// and limitations under the License.
|
2014-08-29 20:18:36 +02:00
|
|
|
|
2014-07-30 18:30:38 +02:00
|
|
|
package bleve
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
2016-01-29 19:18:54 +01:00
|
|
|
"fmt"
|
2014-07-30 18:30:38 +02:00
|
|
|
|
2014-08-28 21:38:57 +02:00
|
|
|
"github.com/blevesearch/bleve/analysis"
|
2015-09-16 23:10:59 +02:00
|
|
|
"github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer"
|
|
|
|
"github.com/blevesearch/bleve/analysis/datetime_parsers/datetime_optional"
|
2014-08-28 21:38:57 +02:00
|
|
|
"github.com/blevesearch/bleve/document"
|
|
|
|
"github.com/blevesearch/bleve/registry"
|
2014-07-30 18:30:38 +02:00
|
|
|
)
|
|
|
|
|
2016-01-29 19:18:54 +01:00
|
|
|
var MappingJSONStrict = false
|
|
|
|
|
2014-08-29 20:43:06 +02:00
|
|
|
const defaultTypeField = "_type"
|
|
|
|
const defaultType = "_default"
|
|
|
|
const defaultField = "_all"
|
2015-09-16 23:10:59 +02:00
|
|
|
const defaultAnalyzer = standard_analyzer.Name
|
|
|
|
const defaultDateTimeParser = datetime_optional.Name
|
2014-07-30 18:30:38 +02:00
|
|
|
|
2014-09-01 19:55:23 +02:00
|
|
|
type customAnalysis struct {
|
2014-09-01 20:16:31 +02:00
|
|
|
CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"`
|
|
|
|
Tokenizers map[string]map[string]interface{} `json:"tokenizers,omitempty"`
|
|
|
|
TokenMaps map[string]map[string]interface{} `json:"token_maps,omitempty"`
|
|
|
|
TokenFilters map[string]map[string]interface{} `json:"token_filters,omitempty"`
|
|
|
|
Analyzers map[string]map[string]interface{} `json:"analyzers,omitempty"`
|
|
|
|
DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *customAnalysis) registerAll(i *IndexMapping) error {
|
|
|
|
for name, config := range c.CharFilters {
|
|
|
|
_, err := i.cache.DefineCharFilter(name, config)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2015-05-14 15:50:10 +02:00
|
|
|
|
|
|
|
if len(c.Tokenizers) > 0 {
|
|
|
|
// put all the names in map tracking work to do
|
|
|
|
todo := map[string]struct{}{}
|
2016-04-03 03:54:33 +02:00
|
|
|
for name := range c.Tokenizers {
|
2015-05-14 15:50:10 +02:00
|
|
|
todo[name] = struct{}{}
|
|
|
|
}
|
|
|
|
registered := 1
|
|
|
|
errs := []error{}
|
|
|
|
// as long as we keep making progress, keep going
|
|
|
|
for len(todo) > 0 && registered > 0 {
|
|
|
|
registered = 0
|
|
|
|
errs = []error{}
|
2016-04-03 03:54:33 +02:00
|
|
|
for name := range todo {
|
2015-05-14 15:50:10 +02:00
|
|
|
config := c.Tokenizers[name]
|
|
|
|
_, err := i.cache.DefineTokenizer(name, config)
|
|
|
|
if err != nil {
|
|
|
|
errs = append(errs, err)
|
|
|
|
} else {
|
|
|
|
delete(todo, name)
|
|
|
|
registered++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(errs) > 0 {
|
|
|
|
return errs[0]
|
2014-09-01 20:16:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for name, config := range c.TokenMaps {
|
|
|
|
_, err := i.cache.DefineTokenMap(name, config)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for name, config := range c.TokenFilters {
|
|
|
|
_, err := i.cache.DefineTokenFilter(name, config)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for name, config := range c.Analyzers {
|
|
|
|
_, err := i.cache.DefineAnalyzer(name, config)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for name, config := range c.DateTimeParsers {
|
|
|
|
_, err := i.cache.DefineDateTimeParser(name, config)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
2014-09-01 19:55:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func newCustomAnalysis() *customAnalysis {
|
|
|
|
rv := customAnalysis{
|
2014-09-01 20:16:31 +02:00
|
|
|
CharFilters: make(map[string]map[string]interface{}),
|
|
|
|
Tokenizers: make(map[string]map[string]interface{}),
|
|
|
|
TokenMaps: make(map[string]map[string]interface{}),
|
|
|
|
TokenFilters: make(map[string]map[string]interface{}),
|
|
|
|
Analyzers: make(map[string]map[string]interface{}),
|
|
|
|
DateTimeParsers: make(map[string]map[string]interface{}),
|
2014-09-01 19:55:23 +02:00
|
|
|
}
|
|
|
|
return &rv
|
|
|
|
}
|
|
|
|
|
2014-12-18 18:43:12 +01:00
|
|
|
// An IndexMapping controls how objects are placed
|
2014-08-31 16:55:22 +02:00
|
|
|
// into an index.
|
2014-12-18 18:43:12 +01:00
|
|
|
// First the type of the object is determined.
|
|
|
|
// Once the type is know, the appropriate
|
2014-08-31 16:55:22 +02:00
|
|
|
// DocumentMapping is selected by the type.
|
2014-12-18 18:43:12 +01:00
|
|
|
// If no mapping was determined for that type,
|
2014-08-31 16:55:22 +02:00
|
|
|
// a DefaultMapping will be used.
|
2014-07-30 18:30:38 +02:00
|
|
|
type IndexMapping struct {
|
2014-08-25 15:08:27 +02:00
|
|
|
TypeMapping map[string]*DocumentMapping `json:"types,omitempty"`
|
2014-08-14 03:14:47 +02:00
|
|
|
DefaultMapping *DocumentMapping `json:"default_mapping"`
|
|
|
|
TypeField string `json:"type_field"`
|
|
|
|
DefaultType string `json:"default_type"`
|
|
|
|
DefaultAnalyzer string `json:"default_analyzer"`
|
|
|
|
DefaultDateTimeParser string `json:"default_datetime_parser"`
|
|
|
|
DefaultField string `json:"default_field"`
|
2016-03-08 13:58:29 +01:00
|
|
|
StoreDynamic bool `json:"store_dynamic"`
|
|
|
|
IndexDynamic bool `json:"index_dynamic"`
|
2014-09-01 20:16:31 +02:00
|
|
|
CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
|
2014-10-29 14:31:03 +01:00
|
|
|
cache *registry.Cache
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
|
|
|
|
2014-12-18 18:43:12 +01:00
|
|
|
// AddCustomCharFilter defines a custom char filter for use in this mapping
|
2014-09-02 23:40:46 +02:00
|
|
|
func (im *IndexMapping) AddCustomCharFilter(name string, config map[string]interface{}) error {
|
|
|
|
_, err := im.cache.DefineCharFilter(name, config)
|
2014-09-01 19:55:23 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2014-09-02 23:40:46 +02:00
|
|
|
im.CustomAnalysis.CharFilters[name] = config
|
2014-09-01 19:55:23 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-09-02 23:40:46 +02:00
|
|
|
// AddCustomTokenizer defines a custom tokenizer for use in this mapping
|
|
|
|
func (im *IndexMapping) AddCustomTokenizer(name string, config map[string]interface{}) error {
|
|
|
|
_, err := im.cache.DefineTokenizer(name, config)
|
2014-09-01 19:55:23 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2014-09-02 23:40:46 +02:00
|
|
|
im.CustomAnalysis.Tokenizers[name] = config
|
2014-09-01 19:55:23 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-09-02 23:40:46 +02:00
|
|
|
// AddCustomTokenMap defines a custom token map for use in this mapping
|
|
|
|
func (im *IndexMapping) AddCustomTokenMap(name string, config map[string]interface{}) error {
|
|
|
|
_, err := im.cache.DefineTokenMap(name, config)
|
2014-09-01 19:55:23 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2014-09-02 23:40:46 +02:00
|
|
|
im.CustomAnalysis.TokenMaps[name] = config
|
2014-09-01 19:55:23 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-09-02 23:40:46 +02:00
|
|
|
// AddCustomTokenFilter defines a custom token filter for use in this mapping
|
|
|
|
func (im *IndexMapping) AddCustomTokenFilter(name string, config map[string]interface{}) error {
|
|
|
|
_, err := im.cache.DefineTokenFilter(name, config)
|
2014-09-01 19:55:23 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2014-09-02 23:40:46 +02:00
|
|
|
im.CustomAnalysis.TokenFilters[name] = config
|
2014-09-01 19:55:23 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-10-02 17:38:07 +02:00
|
|
|
// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The
|
|
|
|
// config map must have a "type" string entry to resolve the analyzer
|
|
|
|
// constructor. The constructor is invoked with the remaining entries and
|
|
|
|
// returned analyzer is registered in the IndexMapping.
|
|
|
|
//
|
|
|
|
// bleve comes with predefined analyzers, like
|
|
|
|
// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are
|
|
|
|
// available only if their package is imported by client code. To achieve this,
|
|
|
|
// use their metadata to fill configuration entries:
|
|
|
|
//
|
|
|
|
// import (
|
|
|
|
// "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer"
|
|
|
|
// "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter"
|
|
|
|
// "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
|
|
// "github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
|
|
|
// )
|
|
|
|
//
|
|
|
|
// m := bleve.NewIndexMapping()
|
|
|
|
// err := m.AddCustomAnalyzer("html", map[string]interface{}{
|
|
|
|
// "type": custom_analyzer.Name,
|
|
|
|
// "char_filters": []string{
|
|
|
|
// html_char_filter.Name,
|
|
|
|
// },
|
|
|
|
// "tokenizer": unicode.Name,
|
|
|
|
// "token_filters": []string{
|
|
|
|
// lower_case_filter.Name,
|
|
|
|
// ...
|
|
|
|
// },
|
|
|
|
// })
|
2014-09-02 23:40:46 +02:00
|
|
|
func (im *IndexMapping) AddCustomAnalyzer(name string, config map[string]interface{}) error {
|
|
|
|
_, err := im.cache.DefineAnalyzer(name, config)
|
2014-09-01 19:55:23 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2014-09-02 23:40:46 +02:00
|
|
|
im.CustomAnalysis.Analyzers[name] = config
|
2014-09-01 19:55:23 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-09-02 23:40:46 +02:00
|
|
|
// AddCustomDateTimeParser defines a custom date time parser for use in this mapping
|
|
|
|
func (im *IndexMapping) AddCustomDateTimeParser(name string, config map[string]interface{}) error {
|
|
|
|
_, err := im.cache.DefineDateTimeParser(name, config)
|
2014-09-01 19:55:23 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2014-09-02 23:40:46 +02:00
|
|
|
im.CustomAnalysis.DateTimeParsers[name] = config
|
2014-09-01 19:55:23 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-09-02 23:40:46 +02:00
|
|
|
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
|
2014-07-30 18:30:38 +02:00
|
|
|
func NewIndexMapping() *IndexMapping {
|
|
|
|
return &IndexMapping{
|
2014-08-14 03:14:47 +02:00
|
|
|
TypeMapping: make(map[string]*DocumentMapping),
|
|
|
|
DefaultMapping: NewDocumentMapping(),
|
2014-08-29 20:43:06 +02:00
|
|
|
TypeField: defaultTypeField,
|
|
|
|
DefaultType: defaultType,
|
|
|
|
DefaultAnalyzer: defaultAnalyzer,
|
|
|
|
DefaultDateTimeParser: defaultDateTimeParser,
|
|
|
|
DefaultField: defaultField,
|
2016-03-08 13:58:29 +01:00
|
|
|
IndexDynamic: IndexDynamic,
|
|
|
|
StoreDynamic: StoreDynamic,
|
2014-09-01 19:55:23 +02:00
|
|
|
CustomAnalysis: newCustomAnalysis(),
|
2014-08-14 03:14:47 +02:00
|
|
|
cache: registry.NewCache(),
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-14 03:14:47 +02:00
|
|
|
// Validate will walk the entire structure ensuring the following
|
|
|
|
// explicitly named and default analyzers can be built
|
2016-03-28 23:14:41 +02:00
|
|
|
func (im *IndexMapping) Validate() error {
|
2014-08-14 03:14:47 +02:00
|
|
|
_, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
_, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-03-28 23:14:41 +02:00
|
|
|
err = im.DefaultMapping.Validate(im.cache)
|
2014-08-14 03:14:47 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, docMapping := range im.TypeMapping {
|
2016-03-28 23:14:41 +02:00
|
|
|
err = docMapping.Validate(im.cache)
|
2014-08-14 03:14:47 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-09-02 23:40:46 +02:00
|
|
|
// AddDocumentMapping sets a custom document mapping for the specified type
|
2014-09-03 22:40:10 +02:00
|
|
|
func (im *IndexMapping) AddDocumentMapping(doctype string, dm *DocumentMapping) {
|
2014-07-30 18:30:38 +02:00
|
|
|
im.TypeMapping[doctype] = dm
|
|
|
|
}
|
|
|
|
|
2014-08-30 05:50:47 +02:00
|
|
|
func (im *IndexMapping) mappingForType(docType string) *DocumentMapping {
|
2014-07-30 18:30:38 +02:00
|
|
|
docMapping := im.TypeMapping[docType]
|
|
|
|
if docMapping == nil {
|
|
|
|
docMapping = im.DefaultMapping
|
|
|
|
}
|
|
|
|
return docMapping
|
|
|
|
}
|
|
|
|
|
2016-01-29 19:18:54 +01:00
|
|
|
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
2014-07-30 18:30:38 +02:00
|
|
|
func (im *IndexMapping) UnmarshalJSON(data []byte) error {
|
2016-01-29 19:18:54 +01:00
|
|
|
|
|
|
|
var tmp map[string]json.RawMessage
|
2014-07-30 18:30:38 +02:00
|
|
|
err := json.Unmarshal(data, &tmp)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2016-01-29 19:18:54 +01:00
|
|
|
// set defaults for fields which might have been omitted
|
2014-08-14 03:14:47 +02:00
|
|
|
im.cache = registry.NewCache()
|
2014-09-01 20:16:31 +02:00
|
|
|
im.CustomAnalysis = newCustomAnalysis()
|
2014-08-29 20:43:06 +02:00
|
|
|
im.TypeField = defaultTypeField
|
|
|
|
im.DefaultType = defaultType
|
|
|
|
im.DefaultAnalyzer = defaultAnalyzer
|
|
|
|
im.DefaultDateTimeParser = defaultDateTimeParser
|
|
|
|
im.DefaultField = defaultField
|
2014-08-14 03:14:47 +02:00
|
|
|
im.DefaultMapping = NewDocumentMapping()
|
2016-01-29 19:18:54 +01:00
|
|
|
im.TypeMapping = make(map[string]*DocumentMapping)
|
2016-03-08 13:58:29 +01:00
|
|
|
im.StoreDynamic = StoreDynamic
|
|
|
|
im.IndexDynamic = IndexDynamic
|
2016-01-29 19:18:54 +01:00
|
|
|
|
|
|
|
var invalidKeys []string
|
|
|
|
for k, v := range tmp {
|
|
|
|
switch k {
|
|
|
|
case "analysis":
|
|
|
|
err := json.Unmarshal(v, &im.CustomAnalysis)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "type_field":
|
|
|
|
err := json.Unmarshal(v, &im.TypeField)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "default_type":
|
|
|
|
err := json.Unmarshal(v, &im.DefaultType)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "default_analyzer":
|
|
|
|
err := json.Unmarshal(v, &im.DefaultAnalyzer)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "default_datetime_parser":
|
|
|
|
err := json.Unmarshal(v, &im.DefaultDateTimeParser)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "default_field":
|
|
|
|
err := json.Unmarshal(v, &im.DefaultField)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "default_mapping":
|
|
|
|
err := json.Unmarshal(v, &im.DefaultMapping)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "types":
|
|
|
|
err := json.Unmarshal(v, &im.TypeMapping)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-03-08 13:58:29 +01:00
|
|
|
case "store_dynamic":
|
|
|
|
err := json.Unmarshal(v, &im.StoreDynamic)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
case "index_dynamic":
|
|
|
|
err := json.Unmarshal(v, &im.IndexDynamic)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-01-29 19:18:54 +01:00
|
|
|
default:
|
|
|
|
invalidKeys = append(invalidKeys, k)
|
|
|
|
}
|
2014-08-14 03:14:47 +02:00
|
|
|
}
|
|
|
|
|
2016-01-29 19:18:54 +01:00
|
|
|
if MappingJSONStrict && len(invalidKeys) > 0 {
|
|
|
|
return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys)
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
2014-09-01 20:16:31 +02:00
|
|
|
|
|
|
|
err = im.CustomAnalysis.registerAll(im)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2014-07-30 18:30:38 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-08-14 03:14:47 +02:00
|
|
|
func (im *IndexMapping) determineType(data interface{}) string {
|
2016-01-22 18:04:24 +01:00
|
|
|
// first see if the object implements Classifier
|
2014-07-30 18:30:38 +02:00
|
|
|
classifier, ok := data.(Classifier)
|
|
|
|
if ok {
|
2014-08-14 03:14:47 +02:00
|
|
|
return classifier.Type()
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
|
|
|
|
2014-12-18 18:43:12 +01:00
|
|
|
// now see if we can find a type using the mapping
|
2014-08-14 03:14:47 +02:00
|
|
|
typ, ok := mustString(lookupPropertyPath(data, im.TypeField))
|
|
|
|
if ok {
|
|
|
|
return typ
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
|
|
|
|
2014-08-14 03:14:47 +02:00
|
|
|
return im.DefaultType
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
|
|
|
|
2014-08-30 06:06:16 +02:00
|
|
|
func (im *IndexMapping) mapDocument(doc *document.Document, data interface{}) error {
|
2014-08-14 03:14:47 +02:00
|
|
|
docType := im.determineType(data)
|
2014-08-30 05:50:47 +02:00
|
|
|
docMapping := im.mappingForType(docType)
|
2014-09-03 19:02:10 +02:00
|
|
|
walkContext := im.newWalkContext(doc, docMapping)
|
2016-01-22 01:16:16 +01:00
|
|
|
if docMapping.Enabled {
|
|
|
|
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
|
|
|
|
|
|
|
|
// see if the _all field was disabled
|
|
|
|
allMapping := docMapping.documentMappingForPath("_all")
|
|
|
|
if allMapping == nil || (allMapping.Enabled != false) {
|
|
|
|
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)
|
|
|
|
doc.AddField(field)
|
|
|
|
}
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type walkContext struct {
|
|
|
|
doc *document.Document
|
2014-09-03 16:51:21 +02:00
|
|
|
im *IndexMapping
|
2014-09-03 19:02:10 +02:00
|
|
|
dm *DocumentMapping
|
2014-07-30 18:30:38 +02:00
|
|
|
excludedFromAll []string
|
|
|
|
}
|
|
|
|
|
2014-09-03 19:02:10 +02:00
|
|
|
func (im *IndexMapping) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
|
2014-07-30 18:30:38 +02:00
|
|
|
return &walkContext{
|
|
|
|
doc: doc,
|
2014-09-03 16:51:21 +02:00
|
|
|
im: im,
|
2014-09-03 19:02:10 +02:00
|
|
|
dm: dm,
|
2014-07-30 18:30:38 +02:00
|
|
|
excludedFromAll: []string{},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// attempts to find the best analyzer to use with only a field name
|
|
|
|
// will walk all the document types, look for field mappings at the
|
|
|
|
// provided path, if one exists and it has an explicit analyzer
|
|
|
|
// that is returned
|
|
|
|
// nil should be an acceptable return value meaning we don't know
|
2014-08-14 03:14:47 +02:00
|
|
|
func (im *IndexMapping) analyzerNameForPath(path string) string {
|
2014-07-30 18:30:38 +02:00
|
|
|
// first we look for explicit mapping on the field
|
|
|
|
for _, docMapping := range im.TypeMapping {
|
2015-02-13 14:45:47 +01:00
|
|
|
analyzerName := docMapping.analyzerNameForPath(path)
|
|
|
|
if analyzerName != "" {
|
|
|
|
return analyzerName
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
|
|
|
}
|
2015-01-22 15:54:32 +01:00
|
|
|
// now try the default mapping
|
|
|
|
pathMapping := im.DefaultMapping.documentMappingForPath(path)
|
|
|
|
if pathMapping != nil {
|
|
|
|
if len(pathMapping.Fields) > 0 {
|
|
|
|
if pathMapping.Fields[0].Analyzer != "" {
|
|
|
|
return pathMapping.Fields[0].Analyzer
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-07-30 18:30:38 +02:00
|
|
|
|
|
|
|
// next we will try default analyzers for the path
|
2014-08-14 03:14:47 +02:00
|
|
|
pathDecoded := decodePath(path)
|
2014-07-30 18:30:38 +02:00
|
|
|
for _, docMapping := range im.TypeMapping {
|
2014-08-14 03:14:47 +02:00
|
|
|
rv := docMapping.defaultAnalyzerName(pathDecoded)
|
|
|
|
if rv != "" {
|
2014-07-30 18:30:38 +02:00
|
|
|
return rv
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-14 03:14:47 +02:00
|
|
|
return im.DefaultAnalyzer
|
|
|
|
}
|
|
|
|
|
2014-08-30 05:50:47 +02:00
|
|
|
func (im *IndexMapping) analyzerNamed(name string) *analysis.Analyzer {
|
2014-08-14 03:14:47 +02:00
|
|
|
analyzer, err := im.cache.AnalyzerNamed(name)
|
|
|
|
if err != nil {
|
2014-12-28 21:14:48 +01:00
|
|
|
logger.Printf("error using analyzer named: %s", name)
|
2014-08-14 03:14:47 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return analyzer
|
|
|
|
}
|
|
|
|
|
2014-08-30 05:50:47 +02:00
|
|
|
func (im *IndexMapping) dateTimeParserNamed(name string) analysis.DateTimeParser {
|
2014-08-14 03:14:47 +02:00
|
|
|
dateTimeParser, err := im.cache.DateTimeParserNamed(name)
|
|
|
|
if err != nil {
|
2014-12-28 21:14:48 +01:00
|
|
|
logger.Printf("error using datetime parser named: %s", name)
|
2014-08-14 03:14:47 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return dateTimeParser
|
2014-07-30 18:30:38 +02:00
|
|
|
}
|
2014-08-03 23:19:04 +02:00
|
|
|
|
2014-08-14 03:14:47 +02:00
|
|
|
func (im *IndexMapping) datetimeParserNameForPath(path string) string {
|
2014-08-03 23:19:04 +02:00
|
|
|
|
|
|
|
// first we look for explicit mapping on the field
|
|
|
|
for _, docMapping := range im.TypeMapping {
|
2014-08-30 06:13:46 +02:00
|
|
|
pathMapping := docMapping.documentMappingForPath(path)
|
2014-08-03 23:19:04 +02:00
|
|
|
if pathMapping != nil {
|
|
|
|
if len(pathMapping.Fields) > 0 {
|
2014-09-03 22:40:10 +02:00
|
|
|
if pathMapping.Fields[0].Analyzer != "" {
|
|
|
|
return pathMapping.Fields[0].Analyzer
|
2014-08-03 23:19:04 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-14 03:14:47 +02:00
|
|
|
return im.DefaultDateTimeParser
|
2014-08-06 14:23:29 +02:00
|
|
|
}
|
2014-09-10 00:15:14 +02:00
|
|
|
|
|
|
|
func (im *IndexMapping) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) {
|
|
|
|
analyzer, err := im.cache.AnalyzerNamed(analyzerName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return analyzer.Analyze(text), nil
|
|
|
|
}
|
2015-11-19 11:13:29 +01:00
|
|
|
|
|
|
|
// FieldAnalyzer returns the name of the analyzer used on a field.
|
|
|
|
func (im *IndexMapping) FieldAnalyzer(field string) string {
|
|
|
|
return im.analyzerNameForPath(field)
|
|
|
|
}
|