d95c9c1063
in strict parsing mode, unrecognized keys in the mapping will cause errors. this is partial at the moment and does not include the custom analysis components.
398 lines
11 KiB
Go
398 lines
11 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
// and limitations under the License.
|
|
|
|
package bleve
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"reflect"
|
|
"time"
|
|
|
|
"github.com/blevesearch/bleve/registry"
|
|
)
|
|
|
|
// A DocumentMapping describes how a type of document
|
|
// should be indexed.
|
|
// As documents can be hierarchical, named sub-sections
|
|
// of documents are mapped using the same structure in
|
|
// the Properties field.
|
|
// Each value inside a document can be indexed 0 or more
|
|
// ways. These index entries are called fields and
|
|
// are stored in the Fields field.
|
|
// Entire sections of a document can be ignored or
|
|
// excluded by setting Enabled to false.
|
|
// If not explicitly mapped, default mapping operations
|
|
// are used. To disable this automatic handling, set
|
|
// Dynamic to false.
|
|
type DocumentMapping struct {
|
|
Enabled bool `json:"enabled"`
|
|
Dynamic bool `json:"dynamic"`
|
|
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
|
|
Fields []*FieldMapping `json:"fields,omitempty"`
|
|
DefaultAnalyzer string `json:"default_analyzer"`
|
|
}
|
|
|
|
func (dm *DocumentMapping) validate(cache *registry.Cache) error {
|
|
var err error
|
|
if dm.DefaultAnalyzer != "" {
|
|
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, property := range dm.Properties {
|
|
err = property.validate(cache)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, field := range dm.Fields {
|
|
if field.Analyzer != "" {
|
|
_, err = cache.AnalyzerNamed(field.Analyzer)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if field.DateFormat != "" {
|
|
_, err = cache.DateTimeParserNamed(field.DateFormat)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
switch field.Type {
|
|
case "text", "datetime", "number", "boolean":
|
|
default:
|
|
return fmt.Errorf("unknown field type: '%s'", field.Type)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (dm *DocumentMapping) analyzerNameForPath(path string) string {
|
|
pathElements := decodePath(path)
|
|
last := false
|
|
current := dm
|
|
OUTER:
|
|
for i, pathElement := range pathElements {
|
|
if i == len(pathElements)-1 {
|
|
last = true
|
|
}
|
|
for name, subDocMapping := range current.Properties {
|
|
for _, field := range subDocMapping.Fields {
|
|
if field.Name == "" && name == pathElement {
|
|
if last {
|
|
return field.Analyzer
|
|
}
|
|
current = subDocMapping
|
|
continue OUTER
|
|
} else if field.Name == pathElement {
|
|
if last {
|
|
return field.Analyzer
|
|
}
|
|
current = subDocMapping
|
|
continue OUTER
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
|
|
pathElements := decodePath(path)
|
|
current := dm
|
|
OUTER:
|
|
for _, pathElement := range pathElements {
|
|
for name, subDocMapping := range current.Properties {
|
|
if name == pathElement {
|
|
current = subDocMapping
|
|
continue OUTER
|
|
}
|
|
}
|
|
for _, field := range current.Fields {
|
|
if field.Name == pathElement {
|
|
continue OUTER
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
return current
|
|
}
|
|
|
|
// NewDocumentMapping returns a new document mapping
|
|
// with all the default values.
|
|
func NewDocumentMapping() *DocumentMapping {
|
|
return &DocumentMapping{
|
|
Enabled: true,
|
|
Dynamic: true,
|
|
}
|
|
}
|
|
|
|
// NewDocumentStaticMapping returns a new document
|
|
// mapping that will not automatically index parts
|
|
// of a document without an explicit mapping.
|
|
func NewDocumentStaticMapping() *DocumentMapping {
|
|
return &DocumentMapping{
|
|
Enabled: true,
|
|
}
|
|
}
|
|
|
|
// NewDocumentDisabledMapping returns a new document
|
|
// mapping that will not perform any indexing.
|
|
func NewDocumentDisabledMapping() *DocumentMapping {
|
|
return &DocumentMapping{}
|
|
}
|
|
|
|
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
|
|
// for the specified named subsection.
|
|
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
|
|
if dm.Properties == nil {
|
|
dm.Properties = make(map[string]*DocumentMapping)
|
|
}
|
|
dm.Properties[property] = sdm
|
|
}
|
|
|
|
// AddFieldMappingsAt adds one or more FieldMappings
|
|
// at the named sub-document. If the named sub-document
|
|
// doesn't yet exist it is created for you.
|
|
// This is a convenience function to make most common
|
|
// mappings more concise.
|
|
// Otherwise, you would:
|
|
// subMapping := NewDocumentMapping()
|
|
// subMapping.AddFieldMapping(fieldMapping)
|
|
// parentMapping.AddSubDocumentMapping(property, subMapping)
|
|
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
|
|
if dm.Properties == nil {
|
|
dm.Properties = make(map[string]*DocumentMapping)
|
|
}
|
|
sdm, ok := dm.Properties[property]
|
|
if !ok {
|
|
sdm = NewDocumentMapping()
|
|
}
|
|
for _, fm := range fms {
|
|
sdm.AddFieldMapping(fm)
|
|
}
|
|
dm.Properties[property] = sdm
|
|
}
|
|
|
|
// AddFieldMapping adds the provided FieldMapping for this section
|
|
// of the document.
|
|
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
|
|
if dm.Fields == nil {
|
|
dm.Fields = make([]*FieldMapping, 0)
|
|
}
|
|
dm.Fields = append(dm.Fields, fm)
|
|
}
|
|
|
|
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
|
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
|
|
|
|
var tmp map[string]json.RawMessage
|
|
err := json.Unmarshal(data, &tmp)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// set defaults for fields which might have been omitted
|
|
dm.Enabled = true
|
|
dm.Dynamic = true
|
|
|
|
var invalidKeys []string
|
|
for k, v := range tmp {
|
|
switch k {
|
|
case "enabled":
|
|
err := json.Unmarshal(v, &dm.Enabled)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "dynamic":
|
|
err := json.Unmarshal(v, &dm.Dynamic)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "default_analyzer":
|
|
err := json.Unmarshal(v, &dm.DefaultAnalyzer)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "properties":
|
|
err := json.Unmarshal(v, &dm.Properties)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "fields":
|
|
err := json.Unmarshal(v, &dm.Fields)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
default:
|
|
invalidKeys = append(invalidKeys, k)
|
|
}
|
|
}
|
|
|
|
if MappingJSONStrict && len(invalidKeys) > 0 {
|
|
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
|
|
rv := ""
|
|
current := dm
|
|
for _, pathElement := range path {
|
|
var ok bool
|
|
current, ok = current.Properties[pathElement]
|
|
if !ok {
|
|
break
|
|
}
|
|
if current.DefaultAnalyzer != "" {
|
|
rv = current.DefaultAnalyzer
|
|
}
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
|
|
val := reflect.ValueOf(data)
|
|
typ := val.Type()
|
|
switch typ.Kind() {
|
|
case reflect.Map:
|
|
// FIXME can add support for other map keys in the future
|
|
if typ.Key().Kind() == reflect.String {
|
|
for _, key := range val.MapKeys() {
|
|
fieldName := key.String()
|
|
fieldVal := val.MapIndex(key).Interface()
|
|
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
|
|
}
|
|
}
|
|
case reflect.Struct:
|
|
for i := 0; i < val.NumField(); i++ {
|
|
field := typ.Field(i)
|
|
fieldName := field.Name
|
|
|
|
// if the field has a JSON name, prefer that
|
|
jsonTag := field.Tag.Get("json")
|
|
jsonFieldName := parseJSONTagName(jsonTag)
|
|
if jsonFieldName == "-" {
|
|
continue
|
|
}
|
|
if jsonFieldName != "" {
|
|
fieldName = jsonFieldName
|
|
}
|
|
|
|
if val.Field(i).CanInterface() {
|
|
fieldVal := val.Field(i).Interface()
|
|
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
|
|
}
|
|
}
|
|
case reflect.Slice, reflect.Array:
|
|
for i := 0; i < val.Len(); i++ {
|
|
if val.Index(i).CanInterface() {
|
|
fieldVal := val.Index(i).Interface()
|
|
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
|
|
}
|
|
}
|
|
case reflect.Ptr:
|
|
ptrElem := val.Elem()
|
|
if ptrElem.IsValid() && ptrElem.CanInterface() {
|
|
dm.processProperty(ptrElem.Interface(), path, indexes, context)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
|
|
pathString := encodePath(path)
|
|
// look to see if there is a mapping for this field
|
|
subDocMapping := dm.documentMappingForPath(pathString)
|
|
|
|
// check to see if we even need to do further processing
|
|
if subDocMapping != nil && !subDocMapping.Enabled {
|
|
return
|
|
}
|
|
|
|
propertyValue := reflect.ValueOf(property)
|
|
if !propertyValue.IsValid() {
|
|
// cannot do anything with the zero value
|
|
return
|
|
}
|
|
propertyType := propertyValue.Type()
|
|
switch propertyType.Kind() {
|
|
case reflect.String:
|
|
propertyValueString := propertyValue.String()
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
|
|
}
|
|
} else if dm.Dynamic {
|
|
// automatic indexing behavior
|
|
|
|
// first see if it can be parsed by the default date parser
|
|
dateTimeParser := context.im.dateTimeParserNamed(context.im.DefaultDateTimeParser)
|
|
if dateTimeParser != nil {
|
|
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
|
|
if err != nil {
|
|
// index as text
|
|
fieldMapping := newTextFieldMappingDynamic()
|
|
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
|
|
} else {
|
|
// index as datetime
|
|
fieldMapping := newDateTimeFieldMappingDynamic()
|
|
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
|
|
}
|
|
}
|
|
}
|
|
case reflect.Float64:
|
|
propertyValFloat := propertyValue.Float()
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
|
|
}
|
|
} else if dm.Dynamic {
|
|
// automatic indexing behavior
|
|
fieldMapping := newNumericFieldMappingDynamic()
|
|
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
|
|
}
|
|
case reflect.Bool:
|
|
propertyValBool := propertyValue.Bool()
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
|
|
}
|
|
} else if dm.Dynamic {
|
|
// automatic indexing behavior
|
|
fieldMapping := newBooleanFieldMappingDynamic()
|
|
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
|
|
}
|
|
case reflect.Struct:
|
|
switch property := property.(type) {
|
|
case time.Time:
|
|
// don't descend into the time struct
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processTime(property, pathString, path, indexes, context)
|
|
}
|
|
} else if dm.Dynamic {
|
|
fieldMapping := newDateTimeFieldMappingDynamic()
|
|
fieldMapping.processTime(property, pathString, path, indexes, context)
|
|
}
|
|
default:
|
|
dm.walkDocument(property, path, indexes, context)
|
|
}
|
|
default:
|
|
dm.walkDocument(property, path, indexes, context)
|
|
}
|
|
}
|