0
0
Fork 0

added initial support for indexing and querying numeric values

closes #8 and closes #10
This commit is contained in:
Marty Schoch 2014-08-02 19:05:58 -04:00
parent 07eb6311a8
commit 78465ca686
7 changed files with 451 additions and 0 deletions

92
document/field_numeric.go Normal file
View File

@ -0,0 +1,92 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"fmt"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/numeric_util"
)
const DEFAULT_NUMERIC_INDEXING_OPTIONS = INDEX_FIELD
const DEFAULT_PRECISION_STEP uint = 4
type NumericField struct {
name string
options IndexingOptions
value numeric_util.PrefixCoded
}
func (n *NumericField) Name() string {
return n.name
}
func (n *NumericField) Options() IndexingOptions {
return n.options
}
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
original, err := n.value.Int64()
if err == nil {
shift := DEFAULT_PRECISION_STEP
for shift < 64 {
shiftEncoded, err := numeric_util.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += DEFAULT_PRECISION_STEP
}
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens)
return fieldLength, tokenFreqs
}
func (n *NumericField) Value() []byte {
return n.value
}
func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func NewNumericField(name string, number float64) *NumericField {
return NewNumericFieldWithIndexingOptions(name, number, DEFAULT_NUMERIC_INDEXING_OPTIONS)
}
func NewNumericFieldWithIndexingOptions(name string, number float64, options IndexingOptions) *NumericField {
numberInt64 := numeric_util.Float64ToInt64(number)
prefixCoded := numeric_util.MustNewPrefixCodedInt64(numberInt64, 0)
return &NumericField{
name: name,
value: prefixCoded,
options: options,
}
}

View File

@ -0,0 +1,24 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"testing"
)
func TestNumericField(t *testing.T) {
nf := NewNumericField("age", 3.4)
numTokens, tokenFreqs := nf.Analyze()
if numTokens != 16 {
t.Errorf("expected 16 tokens")
}
if len(tokenFreqs) != 16 {
t.Errorf("expected 16 token freqs")
}
}

View File

@ -266,6 +266,31 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
}
case reflect.Float64:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if *fieldMapping.Type == "number" {
fieldName := pathString
if fieldMapping.Name != nil && *fieldMapping.Name != "" {
parentName := ""
if len(path) > 1 {
parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR
}
fieldName = parentName + *fieldMapping.Name
}
options := fieldMapping.Options()
field := document.NewNumericFieldWithIndexingOptions(fieldName, propertyValFloat, options)
context.doc.AddField(field)
}
}
} else {
// automatic indexing behavior
field := document.NewNumericField(pathString, propertyValFloat)
context.doc.AddField(field)
}
default:
im.walkDocument(property, path, context)
}

View File

@ -86,5 +86,15 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasMin := tmp["min"]
_, hasMax := tmp["max"]
if hasMin || hasMax {
var rv NumericRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("Unrecognized query")
}

59
query_numeric_range.go Normal file
View File

@ -0,0 +1,59 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"github.com/couchbaselabs/bleve/search"
)
type NumericRangeQuery struct {
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
}
func NewNumericRangeQuery(min, max *float64) *NumericRangeQuery {
return &NumericRangeQuery{
Min: min,
Max: max,
BoostVal: 1.0,
}
}
func (q *NumericRangeQuery) Boost() float64 {
return q.BoostVal
}
func (q *NumericRangeQuery) SetBoost(b float64) *NumericRangeQuery {
q.BoostVal = b
return q
}
func (q *NumericRangeQuery) Field() string {
return q.FieldVal
}
func (q *NumericRangeQuery) SetField(f string) *NumericRangeQuery {
q.FieldVal = f
return q
}
func (q *NumericRangeQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
return search.NewNumericRangeSearcher(i.i, q.Min, q.Max, q.FieldVal, q.BoostVal, explain)
}
func (q *NumericRangeQuery) Validate() error {
if q.Min == nil && q.Min == q.Max {
return fmt.Errorf("must specify min or max")
}
return nil
}

View File

@ -0,0 +1,195 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"bytes"
"math"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/numeric_util"
)
type NumericRangeSearcher struct {
index index.Index
min *float64
max *float64
field string
explain bool
searcher *TermDisjunctionSearcher
}
func NewNumericRangeSearcher(index index.Index, min *float64, max *float64, field string, boost float64, explain bool) (*NumericRangeSearcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
min = &negInf
}
if max == nil {
Inf := math.Inf(1)
max = &Inf
}
// find all the ranges
minInt64 := numeric_util.Float64ToInt64(*min)
maxInt64 := numeric_util.Float64ToInt64(*max)
// FIXME hard-coded precion, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
// enumerate all the terms in the range
qsearchers := make([]Searcher, len(terms))
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(index, string(term), field, 1.0, explain)
if err != nil {
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewTermDisjunctionSearcher(index, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &NumericRangeSearcher{
index: index,
min: min,
max: max,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *NumericRangeSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *NumericRangeSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *NumericRangeSearcher) Next() (*DocumentMatch, error) {
return s.searcher.Next()
}
func (s *NumericRangeSearcher) Advance(ID string) (*DocumentMatch, error) {
return s.searcher.Next()
}
func (s *NumericRangeSearcher) Close() {
s.searcher.Close()
}
type termRange struct {
startTerm []byte
endTerm []byte
}
func (t *termRange) Enumerate() [][]byte {
rv := make([][]byte, 0)
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
rv = append(rv, next)
next = incrementBytes(next)
}
return rv
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didnt' overflow, so stop
break
}
}
return rv
}
type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte {
rv := make([][]byte, 0)
for _, tri := range tr {
trie := tri.Enumerate()
rv = append(rv, trie...)
}
return rv
}
func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
rv := make(termRanges, 0)
if minBound > maxBound {
return rv
}
for shift := uint(0); ; shift += precisionStep {
diff := int64(1) << (shift + precisionStep)
mask := ((int64(1) << precisionStep) - int64(1)) << shift
hasLower := (minBound & mask) != int64(0)
hasUpper := (maxBound & mask) != mask
var nextMinBound int64
if hasLower {
nextMinBound = (minBound + diff) &^ mask
} else {
nextMinBound = minBound &^ mask
}
var nextMaxBound int64
if hasUpper {
nextMaxBound = (maxBound - diff) &^ mask
} else {
nextMaxBound = maxBound &^ mask
}
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
break
}
if hasLower {
rv = append(rv, newRange(minBound, minBound|mask, shift))
}
if hasUpper {
rv = append(rv, newRange(maxBound&^mask, maxBound, shift))
}
// recurse to next precision
minBound = nextMinBound
maxBound = nextMaxBound
}
return rv
}
func newRange(minBound, maxBound int64, shift uint) *termRange {
maxBound |= (int64(1) << shift) - int64(1)
minBytes := numeric_util.MustNewPrefixCodedInt64(minBound, shift)
maxBytes := numeric_util.MustNewPrefixCodedInt64(maxBound, shift)
return newRangeBytes(minBytes, maxBytes)
}
func newRangeBytes(minBytes, maxBytes []byte) *termRange {
return &termRange{
startTerm: minBytes,
endTerm: maxBytes,
}
}

View File

@ -0,0 +1,46 @@
package search
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/numeric_util"
)
func TestSplitRange(t *testing.T) {
min := numeric_util.Float64ToInt64(1.0)
max := numeric_util.Float64ToInt64(5.0)
ranges := splitInt64Range(min, max, 4)
enumerated := ranges.Enumerate()
if len(enumerated) != 135 {
t.Errorf("expected 135 terms, got %d", len(enumerated))
}
}
func TestIncrementBytes(t *testing.T) {
tests := []struct {
in []byte
out []byte
}{
{
in: []byte{0},
out: []byte{1},
},
{
in: []byte{0, 0},
out: []byte{0, 1},
},
{
in: []byte{0, 255},
out: []byte{1, 0},
},
}
for _, test := range tests {
actual := incrementBytes(test.in)
if !reflect.DeepEqual(actual, test.out) {
t.Errorf("expected %#v, got %#v", test.out, actual)
}
}
}