0
0
Fork 0

added support for date range fields and queries

closes #9 and closes #11
This commit is contained in:
Marty Schoch 2014-08-03 17:19:04 -04:00
parent 65b2faeaa2
commit 00d6f9700b
9 changed files with 446 additions and 37 deletions

View File

@ -0,0 +1,35 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package flexible_go
import (
"time"
"github.com/couchbaselabs/bleve/analysis"
)
type FlexibleGoDateTimeParser struct {
layouts []string
}
func NewFlexibleGoDateTimeParser(layouts []string) *FlexibleGoDateTimeParser {
return &FlexibleGoDateTimeParser{
layouts: layouts,
}
}
func (p *FlexibleGoDateTimeParser) ParseDateTime(input string) (time.Time, error) {
for _, layout := range p.layouts {
rv, err := time.Parse(layout, input)
if err == nil {
return rv, nil
}
}
return time.Time{}, analysis.INVALID_DATETIME
}

View File

@ -0,0 +1,83 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package flexible_go
import (
"reflect"
"testing"
"time"
"github.com/couchbaselabs/bleve/analysis"
)
func TestFlexibleDateTimeParser(t *testing.T) {
testLocation := time.FixedZone("", -8*60*60)
tests := []struct {
input string
expectedTime time.Time
expectedError error
}{
{
input: "2014-08-03",
expectedTime: time.Date(2014, 8, 3, 0, 0, 0, 0, time.UTC),
expectedError: nil,
},
{
input: "2014-08-03T15:59:30",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
expectedError: nil,
},
{
input: "2014-08-03 15:59:30",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
expectedError: nil,
},
{
input: "2014-08-03T15:59:30-08:00",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, testLocation),
expectedError: nil,
},
{
input: "2014-08-03T15:59:30.999999999-08:00",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 999999999, testLocation),
expectedError: nil,
},
{
input: "not a date time",
expectedTime: time.Time{},
expectedError: analysis.INVALID_DATETIME,
},
}
rfc3339NoTimezone := "2006-01-02T15:04:05"
rfc3339NoTimezoneNoT := "2006-01-02 15:04:05"
rfc3339NoTime := "2006-01-02"
dateOptionalTimeParser := NewFlexibleGoDateTimeParser(
[]string{
time.RFC3339Nano,
time.RFC3339,
rfc3339NoTimezone,
rfc3339NoTimezoneNoT,
rfc3339NoTime,
})
for _, test := range tests {
actualTime, actualErr := dateOptionalTimeParser.ParseDateTime(test.input)
if actualErr != test.expectedError {
t.Errorf("expected error %#v, got %#v", test.expectedError, actualErr)
continue
}
if !reflect.DeepEqual(actualTime, test.expectedTime) {
t.Errorf("expected time %#v, got %#v", test.expectedTime, actualTime)
t.Errorf("expected location %#v,\n got %#v", test.expectedTime.Location(), actualTime.Location())
}
}
}

View File

@ -10,6 +10,7 @@ package analysis
import (
"fmt"
"time"
)
type CharFilter interface {
@ -21,6 +22,7 @@ type TokenType int
const (
AlphaNumeric TokenType = iota
Numeric
DateTime
)
type Token struct {
@ -65,3 +67,9 @@ func (a *Analyzer) Analyze(input []byte) TokenStream {
}
return tokens
}
var INVALID_DATETIME = fmt.Errorf("unable to parse datetime with any of the layouts")
type DateTimeParser interface {
ParseDateTime(string) (time.Time, error)
}

View File

@ -11,9 +11,12 @@ package bleve
import (
"fmt"
"regexp"
"time"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/datetime_parsers/flexible_go"
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
@ -30,11 +33,12 @@ import (
)
type AnalysisConfig struct {
StopTokenMaps map[string]stop_words_filter.StopWordsMap
CharFilters map[string]analysis.CharFilter
Tokenizers map[string]analysis.Tokenizer
TokenFilters map[string]analysis.TokenFilter
Analyzers map[string]*analysis.Analyzer
StopTokenMaps map[string]stop_words_filter.StopWordsMap
CharFilters map[string]analysis.CharFilter
Tokenizers map[string]analysis.Tokenizer
TokenFilters map[string]analysis.TokenFilter
Analyzers map[string]*analysis.Analyzer
DateTimeParsers map[string]analysis.DateTimeParser
}
type HighlightConfig struct {
@ -42,11 +46,12 @@ type HighlightConfig struct {
}
type Configuration struct {
Analysis *AnalysisConfig
DefaultAnalyzer *string
Highlight *HighlightConfig
DefaultHighlighter *string
CreateIfMissing bool
Analysis *AnalysisConfig
DefaultAnalyzer *string
Highlight *HighlightConfig
DefaultHighlighter *string
CreateIfMissing bool
DefaultDateTimeFormat *string
}
func (c *Configuration) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
@ -98,11 +103,12 @@ func (c *Configuration) MustLoadStopWords(stopWordsBytes []byte) stop_words_filt
func NewConfiguration() *Configuration {
return &Configuration{
Analysis: &AnalysisConfig{
StopTokenMaps: make(map[string]stop_words_filter.StopWordsMap),
CharFilters: make(map[string]analysis.CharFilter),
Tokenizers: make(map[string]analysis.Tokenizer),
TokenFilters: make(map[string]analysis.TokenFilter),
Analyzers: make(map[string]*analysis.Analyzer),
StopTokenMaps: make(map[string]stop_words_filter.StopWordsMap),
CharFilters: make(map[string]analysis.CharFilter),
Tokenizers: make(map[string]analysis.Tokenizer),
TokenFilters: make(map[string]analysis.TokenFilter),
Analyzers: make(map[string]*analysis.Analyzer),
DateTimeParsers: make(map[string]analysis.DateTimeParser),
},
Highlight: &HighlightConfig{
Highlighters: make(map[string]search.Highlighter),
@ -301,4 +307,21 @@ func init() {
// default CreateIfMissing to true
Config.CreateIfMissing = true
// set up the built-in date time formats
rfc3339NoTimezone := "2006-01-02T15:04:05"
rfc3339NoTimezoneNoT := "2006-01-02 15:04:05"
rfc3339NoTime := "2006-01-02"
Config.Analysis.DateTimeParsers["dateTimeOptional"] = flexible_go.NewFlexibleGoDateTimeParser(
[]string{
time.RFC3339Nano,
time.RFC3339,
rfc3339NoTimezone,
rfc3339NoTimezoneNoT,
rfc3339NoTime,
})
dateTimeOptionalName := "dateTimeOptional"
Config.DefaultDateTimeFormat = &dateTimeOptionalName
}

View File

@ -0,0 +1,93 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"fmt"
"time"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/numeric_util"
)
const DEFAULT_DATETIME_INDEXING_OPTIONS = INDEX_FIELD
const DEFAULT_DATETIME_PRECISION_STEP uint = 4
type DateTimeField struct {
name string
options IndexingOptions
value numeric_util.PrefixCoded
}
func (n *DateTimeField) Name() string {
return n.name
}
func (n *DateTimeField) Options() IndexingOptions {
return n.options
}
func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.DateTime,
})
original, err := n.value.Int64()
if err == nil {
shift := DEFAULT_PRECISION_STEP
for shift < 64 {
shiftEncoded, err := numeric_util.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.DateTime,
}
tokens = append(tokens, &token)
shift += DEFAULT_PRECISION_STEP
}
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens)
return fieldLength, tokenFreqs
}
func (n *DateTimeField) Value() []byte {
return n.value
}
func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func NewDateTimeField(name string, dt time.Time) *DateTimeField {
return NewDateTimeFieldWithIndexingOptions(name, dt, DEFAULT_NUMERIC_INDEXING_OPTIONS)
}
func NewDateTimeFieldWithIndexingOptions(name string, dt time.Time, options IndexingOptions) *DateTimeField {
dtInt64 := dt.UnixNano()
prefixCoded := numeric_util.MustNewPrefixCodedInt64(dtInt64, 0)
return &DateTimeField{
name: name,
value: prefixCoded,
options: options,
}
}

View File

@ -22,6 +22,7 @@ type FieldMapping struct {
Index *bool `json:"index"`
IncludeTermVectors *bool `json:"include_term_vectors"`
IncludeInAll *bool `json:"include_in_all"`
DateFormat *string `json:date_format"`
}
func NewFieldMapping(name, typ, analyzer string, store, index bool, includeTermVectors bool, includeInAll bool) *FieldMapping {

View File

@ -12,6 +12,7 @@ import (
"encoding/json"
"fmt"
"reflect"
"time"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/document"
@ -236,16 +237,8 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldName := getFieldName(pathString, path, fieldMapping)
if *fieldMapping.Type == "text" {
fieldName := pathString
if fieldMapping.Name != nil && *fieldMapping.Name != "" {
parentName := ""
if len(path) > 1 {
parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR
}
fieldName = parentName + *fieldMapping.Name
}
options := fieldMapping.Options()
analyzer := Config.Analysis.Analyzers[*fieldMapping.Analyzer]
if analyzer != nil {
@ -256,30 +249,46 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
} else if *fieldMapping.Type == "datetime" {
options := fieldMapping.Options()
dateTimeFormat := *Config.DefaultDateTimeFormat
if fieldMapping.DateFormat != nil {
dateTimeFormat = *fieldMapping.DateFormat
}
dateTimeParser := Config.Analysis.DateTimeParsers[dateTimeFormat]
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
field := document.NewDateTimeFieldWithIndexingOptions(fieldName, parsedDateTime, options)
context.doc.AddField(field)
}
}
}
} else {
// automatic indexing behavior
options := document.STORE_FIELD | document.INDEX_FIELD | document.INCLUDE_TERM_VECTORS
analyzer := im.defaultAnalyzer(context.dm, path)
field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
// first see if it can be parsed by the default date parser
// FIXME add support for index mapping overriding defaults
dateTimeParser := Config.Analysis.DateTimeParsers[*Config.DefaultDateTimeFormat]
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as plain text
options := document.STORE_FIELD | document.INDEX_FIELD | document.INCLUDE_TERM_VECTORS
analyzer := im.defaultAnalyzer(context.dm, path)
field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
} else {
// index as datetime
field := document.NewDateTimeField(pathString, parsedDateTime)
context.doc.AddField(field)
}
}
case reflect.Float64:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldName := getFieldName(pathString, path, fieldMapping)
if *fieldMapping.Type == "number" {
fieldName := pathString
if fieldMapping.Name != nil && *fieldMapping.Name != "" {
parentName := ""
if len(path) > 1 {
parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR
}
fieldName = parentName + *fieldMapping.Name
}
options := fieldMapping.Options()
field := document.NewNumericFieldWithIndexingOptions(fieldName, propertyValFloat, options)
context.doc.AddField(field)
@ -290,7 +299,29 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
field := document.NewNumericField(pathString, propertyValFloat)
context.doc.AddField(field)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldName := getFieldName(pathString, path, fieldMapping)
if *fieldMapping.Type == "datetime" {
options := fieldMapping.Options()
field := document.NewDateTimeFieldWithIndexingOptions(fieldName, property, options)
context.doc.AddField(field)
}
}
} else {
// automatic indexing behavior
field := document.NewDateTimeField(pathString, property)
context.doc.AddField(field)
}
default:
im.walkDocument(property, path, context)
}
default:
im.walkDocument(property, path, context)
}
@ -339,3 +370,36 @@ func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer {
// finally just return the system-wide default analyzer
return Config.Analysis.Analyzers[*Config.DefaultAnalyzer]
}
func (im *IndexMapping) datetimeParserForPath(path string) analysis.DateTimeParser {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
pathMapping := docMapping.DocumentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != nil {
return Config.Analysis.DateTimeParsers[*pathMapping.Fields[0].DateFormat]
}
}
}
}
// next we will try default analyzers for the path
// FIXME introduce default date time parsers at mapping leves
// finally just return the system-wide default analyzer
return Config.Analysis.DateTimeParsers[*Config.DefaultDateTimeFormat]
}
func getFieldName(pathString string, path []string, fieldMapping *FieldMapping) string {
fieldName := pathString
if fieldMapping.Name != nil && *fieldMapping.Name != "" {
parentName := ""
if len(path) > 1 {
parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR
}
fieldName = parentName + *fieldMapping.Name
}
return fieldName
}

View File

@ -96,5 +96,15 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasStart := tmp["start"]
_, hasEnd := tmp["end"]
if hasStart || hasEnd {
var rv DateRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("Unrecognized query")
}

92
query_date_range.go Normal file
View File

@ -0,0 +1,92 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"math"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/numeric_util"
"github.com/couchbaselabs/bleve/search"
)
type DateRangeQuery struct {
Start *string `json:"start,omitempty"`
End *string `json:"end,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
DateTimeParser *string `json:"datetime_parser,omitempty"`
}
func NewDateRangeQuery(start, end *string) *DateRangeQuery {
return &DateRangeQuery{
Start: start,
End: end,
BoostVal: 1.0,
}
}
func (q *DateRangeQuery) Boost() float64 {
return q.BoostVal
}
func (q *DateRangeQuery) SetBoost(b float64) *DateRangeQuery {
q.BoostVal = b
return q
}
func (q *DateRangeQuery) Field() string {
return q.FieldVal
}
func (q *DateRangeQuery) SetField(f string) *DateRangeQuery {
q.FieldVal = f
return q
}
func (q *DateRangeQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
var dateTimeParser analysis.DateTimeParser
if q.DateTimeParser != nil {
dateTimeParser = Config.Analysis.DateTimeParsers[*q.DateTimeParser]
} else {
dateTimeParser = i.m.datetimeParserForPath(q.FieldVal)
}
if dateTimeParser == nil {
return nil, fmt.Errorf("no datetime parser named '%s' registered", q.DateTimeParser)
}
// now parse the endpoints
min := math.Inf(-1)
max := math.Inf(1)
if q.Start != nil && *q.Start != "" {
startTime, err := dateTimeParser.ParseDateTime(*q.Start)
if err != nil {
return nil, err
}
min = numeric_util.Int64ToFloat64(startTime.UnixNano())
}
if q.End != nil && *q.End != "" {
endTime, err := dateTimeParser.ParseDateTime(*q.End)
if err != nil {
return nil, err
}
max = numeric_util.Int64ToFloat64(endTime.UnixNano())
}
return search.NewNumericRangeSearcher(i.i, &min, &max, q.FieldVal, q.BoostVal, explain)
}
func (q *DateRangeQuery) Validate() error {
if q.Start == nil && q.Start == q.End {
return fmt.Errorf("must specify start or end")
}
return nil
}