0
0
bleve/mapping_document.go
Marty Schoch d95c9c1063 optionally set MappingJSONStrict to true for strict parsing
in strict parsing mode, unrecognized keys in the mapping will
cause errors.  this is partial at the moment and does not
include the custom analysis components.
2016-01-29 13:18:54 -05:00

398 lines
11 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"reflect"
"time"
"github.com/blevesearch/bleve/registry"
)
// A DocumentMapping describes how a type of document
// should be indexed.
// As documents can be hierarchical, named sub-sections
// of documents are mapped using the same structure in
// the Properties field.
// Each value inside a document can be indexed 0 or more
// ways. These index entries are called fields and
// are stored in the Fields field.
// Entire sections of a document can be ignored or
// excluded by setting Enabled to false.
// If not explicitly mapped, default mapping operations
// are used. To disable this automatic handling, set
// Dynamic to false.
type DocumentMapping struct {
Enabled bool `json:"enabled"`
Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"`
DefaultAnalyzer string `json:"default_analyzer"`
}
func (dm *DocumentMapping) validate(cache *registry.Cache) error {
var err error
if dm.DefaultAnalyzer != "" {
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
if err != nil {
return err
}
}
for _, property := range dm.Properties {
err = property.validate(cache)
if err != nil {
return err
}
}
for _, field := range dm.Fields {
if field.Analyzer != "" {
_, err = cache.AnalyzerNamed(field.Analyzer)
if err != nil {
return err
}
}
if field.DateFormat != "" {
_, err = cache.DateTimeParserNamed(field.DateFormat)
if err != nil {
return err
}
}
switch field.Type {
case "text", "datetime", "number", "boolean":
default:
return fmt.Errorf("unknown field type: '%s'", field.Type)
}
}
return nil
}
func (dm *DocumentMapping) analyzerNameForPath(path string) string {
pathElements := decodePath(path)
last := false
current := dm
OUTER:
for i, pathElement := range pathElements {
if i == len(pathElements)-1 {
last = true
}
for name, subDocMapping := range current.Properties {
for _, field := range subDocMapping.Fields {
if field.Name == "" && name == pathElement {
if last {
return field.Analyzer
}
current = subDocMapping
continue OUTER
} else if field.Name == pathElement {
if last {
return field.Analyzer
}
current = subDocMapping
continue OUTER
}
}
}
return ""
}
return ""
}
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
OUTER:
for _, pathElement := range pathElements {
for name, subDocMapping := range current.Properties {
if name == pathElement {
current = subDocMapping
continue OUTER
}
}
for _, field := range current.Fields {
if field.Name == pathElement {
continue OUTER
}
}
return nil
}
return current
}
// NewDocumentMapping returns a new document mapping
// with all the default values.
func NewDocumentMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
Dynamic: true,
}
}
// NewDocumentStaticMapping returns a new document
// mapping that will not automatically index parts
// of a document without an explicit mapping.
func NewDocumentStaticMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
}
}
// NewDocumentDisabledMapping returns a new document
// mapping that will not perform any indexing.
func NewDocumentDisabledMapping() *DocumentMapping {
return &DocumentMapping{}
}
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
// for the specified named subsection.
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
dm.Properties[property] = sdm
}
// AddFieldMappingsAt adds one or more FieldMappings
// at the named sub-document. If the named sub-document
// doesn't yet exist it is created for you.
// This is a convenience function to make most common
// mappings more concise.
// Otherwise, you would:
// subMapping := NewDocumentMapping()
// subMapping.AddFieldMapping(fieldMapping)
// parentMapping.AddSubDocumentMapping(property, subMapping)
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
sdm, ok := dm.Properties[property]
if !ok {
sdm = NewDocumentMapping()
}
for _, fm := range fms {
sdm.AddFieldMapping(fm)
}
dm.Properties[property] = sdm
}
// AddFieldMapping adds the provided FieldMapping for this section
// of the document.
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
if dm.Fields == nil {
dm.Fields = make([]*FieldMapping, 0)
}
dm.Fields = append(dm.Fields, fm)
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
dm.Enabled = true
dm.Dynamic = true
var invalidKeys []string
for k, v := range tmp {
switch k {
case "enabled":
err := json.Unmarshal(v, &dm.Enabled)
if err != nil {
return err
}
case "dynamic":
err := json.Unmarshal(v, &dm.Dynamic)
if err != nil {
return err
}
case "default_analyzer":
err := json.Unmarshal(v, &dm.DefaultAnalyzer)
if err != nil {
return err
}
case "properties":
err := json.Unmarshal(v, &dm.Properties)
if err != nil {
return err
}
case "fields":
err := json.Unmarshal(v, &dm.Fields)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
}
return nil
}
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
rv := ""
current := dm
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultAnalyzer != "" {
rv = current.DefaultAnalyzer
}
}
return rv
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
val := reflect.ValueOf(data)
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// if the field has a JSON name, prefer that
jsonTag := field.Tag.Get("json")
jsonFieldName := parseJSONTagName(jsonTag)
if jsonFieldName == "-" {
continue
}
if jsonFieldName != "" {
fieldName = jsonFieldName
}
if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Slice, reflect.Array:
for i := 0; i < val.Len(); i++ {
if val.Index(i).CanInterface() {
fieldVal := val.Index(i).Interface()
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
}
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
dm.processProperty(ptrElem.Interface(), path, indexes, context)
}
}
}
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
return
}
propertyValue := reflect.ValueOf(property)
if !propertyValue.IsValid() {
// cannot do anything with the zero value
return
}
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
} else if dm.Dynamic {
// automatic indexing behavior
// first see if it can be parsed by the default date parser
dateTimeParser := context.im.dateTimeParserNamed(context.im.DefaultDateTimeParser)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic()
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic()
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
}
case reflect.Float64:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if dm.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic()
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
propertyValBool := propertyValue.Bool()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if dm.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic()
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, pathString, path, indexes, context)
}
} else if dm.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic()
fieldMapping.processTime(property, pathString, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
}