0
0
bleve/mapping/document.go
Marty Schoch a16efa5e78 add experimental support for indexing/query geo points
New field type GeoPointField, or "geopoint" in mapping JSON.

Currently structs and maps are considered when a mapping explicitly
marks a field as type "geopoint".  Several variants of "lon", "lng", and "lat"
are looked for in map keys, struct field names, or method names.

New query type GeoBoundingBoxQuery searches for documents which have a
GeoPointField indexed with a value that is inside the specified bounding box.

New query type GeoDistanceQuery searches for documents which have a
GeoPointField indexed with a value that is less than or equal to the
specified distance from the specified location.

New sort by method "geo_distance".  Hits can be sorted by their distance
from the specified location.

New geo utility package with all routines ported from Lucene.

New FilteringSearcher, which wraps an existing Searcher, but filters
all hits with a user-provided callback.
2017-03-24 17:22:21 -07:00

507 lines
15 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding/json"
"fmt"
"reflect"
"time"
"github.com/blevesearch/bleve/registry"
)
// A DocumentMapping describes how a type of document
// should be indexed.
// As documents can be hierarchical, named sub-sections
// of documents are mapped using the same structure in
// the Properties field.
// Each value inside a document can be indexed 0 or more
// ways. These index entries are called fields and
// are stored in the Fields field.
// Entire sections of a document can be ignored or
// excluded by setting Enabled to false.
// If not explicitly mapped, default mapping operations
// are used. To disable this automatic handling, set
// Dynamic to false.
type DocumentMapping struct {
Enabled bool `json:"enabled"`
Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"`
DefaultAnalyzer string `json:"default_analyzer"`
// StructTagKey overrides "json" when looking for field names in struct tags
StructTagKey string `json:"struct_tag_key,omitempty"`
}
func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
var err error
if dm.DefaultAnalyzer != "" {
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
if err != nil {
return err
}
}
for _, property := range dm.Properties {
err = property.Validate(cache)
if err != nil {
return err
}
}
for _, field := range dm.Fields {
if field.Analyzer != "" {
_, err = cache.AnalyzerNamed(field.Analyzer)
if err != nil {
return err
}
}
if field.DateFormat != "" {
_, err = cache.DateTimeParserNamed(field.DateFormat)
if err != nil {
return err
}
}
switch field.Type {
case "text", "datetime", "number", "boolean", "geopoint":
default:
return fmt.Errorf("unknown field type: '%s'", field.Type)
}
}
return nil
}
// analyzerNameForPath attempts to first find the field
// described by this path, then returns the analyzer
// configured for that field
func (dm *DocumentMapping) analyzerNameForPath(path string) string {
field := dm.fieldDescribedByPath(path)
if field != nil {
return field.Analyzer
}
return ""
}
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
pathElements := decodePath(path)
if len(pathElements) > 1 {
// easy case, there is more than 1 path element remaining
// the next path element must match a property name
// at this level
for propName, subDocMapping := range dm.Properties {
if propName == pathElements[0] {
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
}
}
} else {
// just 1 path elememnt
// first look for property name with empty field
for propName, subDocMapping := range dm.Properties {
if propName == pathElements[0] {
// found property name match, now look at its fields
for _, field := range subDocMapping.Fields {
if field.Name == "" || field.Name == pathElements[0] {
// match
return field
}
}
}
}
// next, walk the properties again, looking for field overriding the name
for propName, subDocMapping := range dm.Properties {
if propName != pathElements[0] {
// property name isn't a match, but field name could override it
for _, field := range subDocMapping.Fields {
if field.Name == pathElements[0] {
return field
}
}
}
}
}
return nil
}
// documentMappingForPath only returns EXACT matches for a sub document
// or for an explicitly mapped field, if you want to find the
// closest document mapping to a field not explicitly mapped
// use closestDocMapping
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
OUTER:
for i, pathElement := range pathElements {
for name, subDocMapping := range current.Properties {
if name == pathElement {
current = subDocMapping
continue OUTER
}
}
// no subDocMapping matches this pathElement
// only if this is the last element check for field name
if i == len(pathElements)-1 {
for _, field := range current.Fields {
if field.Name == pathElement {
break
}
}
}
return nil
}
return current
}
// closestDocMapping findest the most specific document mapping that matches
// part of the provided path
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
OUTER:
for _, pathElement := range pathElements {
for name, subDocMapping := range current.Properties {
if name == pathElement {
current = subDocMapping
continue OUTER
}
}
}
return current
}
// NewDocumentMapping returns a new document mapping
// with all the default values.
func NewDocumentMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
Dynamic: true,
}
}
// NewDocumentStaticMapping returns a new document
// mapping that will not automatically index parts
// of a document without an explicit mapping.
func NewDocumentStaticMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
}
}
// NewDocumentDisabledMapping returns a new document
// mapping that will not perform any indexing.
func NewDocumentDisabledMapping() *DocumentMapping {
return &DocumentMapping{}
}
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
// for the specified named subsection.
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
dm.Properties[property] = sdm
}
// AddFieldMappingsAt adds one or more FieldMappings
// at the named sub-document. If the named sub-document
// doesn't yet exist it is created for you.
// This is a convenience function to make most common
// mappings more concise.
// Otherwise, you would:
// subMapping := NewDocumentMapping()
// subMapping.AddFieldMapping(fieldMapping)
// parentMapping.AddSubDocumentMapping(property, subMapping)
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
sdm, ok := dm.Properties[property]
if !ok {
sdm = NewDocumentMapping()
}
for _, fm := range fms {
sdm.AddFieldMapping(fm)
}
dm.Properties[property] = sdm
}
// AddFieldMapping adds the provided FieldMapping for this section
// of the document.
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
if dm.Fields == nil {
dm.Fields = make([]*FieldMapping, 0)
}
dm.Fields = append(dm.Fields, fm)
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
dm.Enabled = true
dm.Dynamic = true
var invalidKeys []string
for k, v := range tmp {
switch k {
case "enabled":
err := json.Unmarshal(v, &dm.Enabled)
if err != nil {
return err
}
case "dynamic":
err := json.Unmarshal(v, &dm.Dynamic)
if err != nil {
return err
}
case "default_analyzer":
err := json.Unmarshal(v, &dm.DefaultAnalyzer)
if err != nil {
return err
}
case "properties":
err := json.Unmarshal(v, &dm.Properties)
if err != nil {
return err
}
case "fields":
err := json.Unmarshal(v, &dm.Fields)
if err != nil {
return err
}
case "struct_tag_key":
err := json.Unmarshal(v, &dm.StructTagKey)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
}
return nil
}
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
rv := ""
current := dm
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultAnalyzer != "" {
rv = current.DefaultAnalyzer
}
}
return rv
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overriden
structTagKey := dm.StructTagKey
if structTagKey == "" {
structTagKey = "json"
}
val := reflect.ValueOf(data)
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// anonymous fields of type struct can elide the type name
if field.Anonymous && field.Type.Kind() == reflect.Struct {
fieldName = ""
}
// if the field has a name under the specified tag, prefer that
tag := field.Tag.Get(structTagKey)
tagFieldName := parseTagName(tag)
if tagFieldName == "-" {
continue
}
// allow tag to set field name to empty, only if anonymous
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
fieldName = tagFieldName
}
if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
newpath := path
if fieldName != "" {
newpath = append(path, fieldName)
}
dm.processProperty(fieldVal, newpath, indexes, context)
}
}
case reflect.Slice, reflect.Array:
for i := 0; i < val.Len(); i++ {
if val.Index(i).CanInterface() {
fieldVal := val.Index(i).Interface()
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
}
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
dm.processProperty(ptrElem.Interface(), path, indexes, context)
}
case reflect.String:
dm.processProperty(val.String(), path, indexes, context)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(val.Int()), path, indexes, context)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(val.Uint()), path, indexes, context)
case reflect.Float32, reflect.Float64:
dm.processProperty(float64(val.Float()), path, indexes, context)
case reflect.Bool:
dm.processProperty(val.Bool(), path, indexes, context)
}
}
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
closestDocMapping := dm.closestDocMapping(pathString)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
return
}
propertyValue := reflect.ValueOf(property)
if !propertyValue.IsValid() {
// cannot do anything with the zero value
return
}
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
// first see if it can be parsed by the default date parser
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic(context.im)
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
return
case reflect.Float64, reflect.Float32:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic(context.im)
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
propertyValBool := propertyValue.Bool()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic(context.im)
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, pathString, path, indexes, context)
}
default:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "geopoint" {
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
}
case reflect.Map:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "geopoint" {
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
default:
dm.walkDocument(property, path, indexes, context)
}
}