added initial support for indexing and querying numeric values
closes #8 and closes #10
This commit is contained in:
parent
07eb6311a8
commit
78465ca686
|
@ -0,0 +1,92 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/numeric_util"
|
||||
)
|
||||
|
||||
const DEFAULT_NUMERIC_INDEXING_OPTIONS = INDEX_FIELD
|
||||
|
||||
const DEFAULT_PRECISION_STEP uint = 4
|
||||
|
||||
type NumericField struct {
|
||||
name string
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
}
|
||||
|
||||
func (n *NumericField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *NumericField) Options() IndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
||||
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(n.value),
|
||||
Term: n.value,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
})
|
||||
|
||||
original, err := n.value.Int64()
|
||||
if err == nil {
|
||||
|
||||
shift := DEFAULT_PRECISION_STEP
|
||||
for shift < 64 {
|
||||
shiftEncoded, err := numeric_util.NewPrefixCodedInt64(original, shift)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
token := analysis.Token{
|
||||
Start: 0,
|
||||
End: len(shiftEncoded),
|
||||
Term: shiftEncoded,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
}
|
||||
tokens = append(tokens, &token)
|
||||
shift += DEFAULT_PRECISION_STEP
|
||||
}
|
||||
}
|
||||
|
||||
fieldLength := len(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens)
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
func (n *NumericField) Value() []byte {
|
||||
return n.value
|
||||
}
|
||||
|
||||
func (n *NumericField) GoString() string {
|
||||
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func NewNumericField(name string, number float64) *NumericField {
|
||||
return NewNumericFieldWithIndexingOptions(name, number, DEFAULT_NUMERIC_INDEXING_OPTIONS)
|
||||
}
|
||||
|
||||
func NewNumericFieldWithIndexingOptions(name string, number float64, options IndexingOptions) *NumericField {
|
||||
numberInt64 := numeric_util.Float64ToInt64(number)
|
||||
prefixCoded := numeric_util.MustNewPrefixCodedInt64(numberInt64, 0)
|
||||
return &NumericField{
|
||||
name: name,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package document
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNumericField(t *testing.T) {
|
||||
nf := NewNumericField("age", 3.4)
|
||||
numTokens, tokenFreqs := nf.Analyze()
|
||||
if numTokens != 16 {
|
||||
t.Errorf("expected 16 tokens")
|
||||
}
|
||||
if len(tokenFreqs) != 16 {
|
||||
t.Errorf("expected 16 token freqs")
|
||||
}
|
||||
}
|
|
@ -266,6 +266,31 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
|
|||
field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
case reflect.Float64:
|
||||
propertyValFloat := propertyValue.Float()
|
||||
if subDocMapping != nil {
|
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
if *fieldMapping.Type == "number" {
|
||||
fieldName := pathString
|
||||
if fieldMapping.Name != nil && *fieldMapping.Name != "" {
|
||||
parentName := ""
|
||||
if len(path) > 1 {
|
||||
parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR
|
||||
}
|
||||
fieldName = parentName + *fieldMapping.Name
|
||||
}
|
||||
options := fieldMapping.Options()
|
||||
field := document.NewNumericFieldWithIndexingOptions(fieldName, propertyValFloat, options)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// automatic indexing behavior
|
||||
field := document.NewNumericField(pathString, propertyValFloat)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
|
||||
default:
|
||||
im.walkDocument(property, path, context)
|
||||
}
|
||||
|
|
10
query.go
10
query.go
|
@ -86,5 +86,15 @@ func ParseQuery(input []byte) (Query, error) {
|
|||
}
|
||||
return &rv, nil
|
||||
}
|
||||
_, hasMin := tmp["min"]
|
||||
_, hasMax := tmp["max"]
|
||||
if hasMin || hasMax {
|
||||
var rv NumericRangeQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
return nil, fmt.Errorf("Unrecognized query")
|
||||
}
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type NumericRangeQuery struct {
|
||||
Min *float64 `json:"min,omitempty"`
|
||||
Max *float64 `json:"max,omitempty"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewNumericRangeQuery(min, max *float64) *NumericRangeQuery {
|
||||
return &NumericRangeQuery{
|
||||
Min: min,
|
||||
Max: max,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *NumericRangeQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *NumericRangeQuery) SetBoost(b float64) *NumericRangeQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *NumericRangeQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *NumericRangeQuery) SetField(f string) *NumericRangeQuery {
|
||||
q.FieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *NumericRangeQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
return search.NewNumericRangeSearcher(i.i, q.Min, q.Max, q.FieldVal, q.BoostVal, explain)
|
||||
}
|
||||
|
||||
func (q *NumericRangeQuery) Validate() error {
|
||||
if q.Min == nil && q.Min == q.Max {
|
||||
return fmt.Errorf("must specify min or max")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/numeric_util"
|
||||
)
|
||||
|
||||
type NumericRangeSearcher struct {
|
||||
index index.Index
|
||||
min *float64
|
||||
max *float64
|
||||
field string
|
||||
explain bool
|
||||
searcher *TermDisjunctionSearcher
|
||||
}
|
||||
|
||||
func NewNumericRangeSearcher(index index.Index, min *float64, max *float64, field string, boost float64, explain bool) (*NumericRangeSearcher, error) {
|
||||
// account for unbounded edges
|
||||
if min == nil {
|
||||
negInf := math.Inf(-1)
|
||||
min = &negInf
|
||||
}
|
||||
if max == nil {
|
||||
Inf := math.Inf(1)
|
||||
max = &Inf
|
||||
}
|
||||
// find all the ranges
|
||||
minInt64 := numeric_util.Float64ToInt64(*min)
|
||||
maxInt64 := numeric_util.Float64ToInt64(*max)
|
||||
// FIXME hard-coded precion, should match field declaration
|
||||
termRanges := splitInt64Range(minInt64, maxInt64, 4)
|
||||
terms := termRanges.Enumerate()
|
||||
// enumerate all the terms in the range
|
||||
qsearchers := make([]Searcher, len(terms))
|
||||
for i, term := range terms {
|
||||
var err error
|
||||
qsearchers[i], err = NewTermSearcher(index, string(term), field, 1.0, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
// build disjunction searcher of these ranges
|
||||
searcher, err := NewTermDisjunctionSearcher(index, qsearchers, 0, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &NumericRangeSearcher{
|
||||
index: index,
|
||||
min: min,
|
||||
max: max,
|
||||
field: field,
|
||||
explain: explain,
|
||||
searcher: searcher,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *NumericRangeSearcher) Count() uint64 {
|
||||
return s.searcher.Count()
|
||||
}
|
||||
|
||||
func (s *NumericRangeSearcher) Weight() float64 {
|
||||
return s.searcher.Weight()
|
||||
}
|
||||
|
||||
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
|
||||
s.searcher.SetQueryNorm(qnorm)
|
||||
}
|
||||
|
||||
func (s *NumericRangeSearcher) Next() (*DocumentMatch, error) {
|
||||
return s.searcher.Next()
|
||||
|
||||
}
|
||||
|
||||
func (s *NumericRangeSearcher) Advance(ID string) (*DocumentMatch, error) {
|
||||
return s.searcher.Next()
|
||||
}
|
||||
|
||||
func (s *NumericRangeSearcher) Close() {
|
||||
s.searcher.Close()
|
||||
}
|
||||
|
||||
type termRange struct {
|
||||
startTerm []byte
|
||||
endTerm []byte
|
||||
}
|
||||
|
||||
func (t *termRange) Enumerate() [][]byte {
|
||||
rv := make([][]byte, 0)
|
||||
next := t.startTerm
|
||||
for bytes.Compare(next, t.endTerm) <= 0 {
|
||||
rv = append(rv, next)
|
||||
next = incrementBytes(next)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func incrementBytes(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
// didnt' overflow, so stop
|
||||
break
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
type termRanges []*termRange
|
||||
|
||||
func (tr termRanges) Enumerate() [][]byte {
|
||||
rv := make([][]byte, 0)
|
||||
for _, tri := range tr {
|
||||
trie := tri.Enumerate()
|
||||
rv = append(rv, trie...)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
|
||||
rv := make(termRanges, 0)
|
||||
if minBound > maxBound {
|
||||
return rv
|
||||
}
|
||||
|
||||
for shift := uint(0); ; shift += precisionStep {
|
||||
|
||||
diff := int64(1) << (shift + precisionStep)
|
||||
mask := ((int64(1) << precisionStep) - int64(1)) << shift
|
||||
hasLower := (minBound & mask) != int64(0)
|
||||
hasUpper := (maxBound & mask) != mask
|
||||
|
||||
var nextMinBound int64
|
||||
if hasLower {
|
||||
nextMinBound = (minBound + diff) &^ mask
|
||||
} else {
|
||||
nextMinBound = minBound &^ mask
|
||||
}
|
||||
var nextMaxBound int64
|
||||
if hasUpper {
|
||||
nextMaxBound = (maxBound - diff) &^ mask
|
||||
} else {
|
||||
nextMaxBound = maxBound &^ mask
|
||||
}
|
||||
|
||||
lowerWrapped := nextMinBound < minBound
|
||||
upperWrapped := nextMaxBound > maxBound
|
||||
|
||||
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
|
||||
// We are in the lowest precision or the next precision is not available.
|
||||
rv = append(rv, newRange(minBound, maxBound, shift))
|
||||
// exit the split recursion loop
|
||||
break
|
||||
}
|
||||
|
||||
if hasLower {
|
||||
rv = append(rv, newRange(minBound, minBound|mask, shift))
|
||||
}
|
||||
if hasUpper {
|
||||
rv = append(rv, newRange(maxBound&^mask, maxBound, shift))
|
||||
}
|
||||
|
||||
// recurse to next precision
|
||||
minBound = nextMinBound
|
||||
maxBound = nextMaxBound
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func newRange(minBound, maxBound int64, shift uint) *termRange {
|
||||
maxBound |= (int64(1) << shift) - int64(1)
|
||||
minBytes := numeric_util.MustNewPrefixCodedInt64(minBound, shift)
|
||||
maxBytes := numeric_util.MustNewPrefixCodedInt64(maxBound, shift)
|
||||
return newRangeBytes(minBytes, maxBytes)
|
||||
}
|
||||
|
||||
func newRangeBytes(minBytes, maxBytes []byte) *termRange {
|
||||
return &termRange{
|
||||
startTerm: minBytes,
|
||||
endTerm: maxBytes,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/numeric_util"
|
||||
)
|
||||
|
||||
func TestSplitRange(t *testing.T) {
|
||||
min := numeric_util.Float64ToInt64(1.0)
|
||||
max := numeric_util.Float64ToInt64(5.0)
|
||||
ranges := splitInt64Range(min, max, 4)
|
||||
enumerated := ranges.Enumerate()
|
||||
if len(enumerated) != 135 {
|
||||
t.Errorf("expected 135 terms, got %d", len(enumerated))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIncrementBytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
in []byte
|
||||
out []byte
|
||||
}{
|
||||
{
|
||||
in: []byte{0},
|
||||
out: []byte{1},
|
||||
},
|
||||
{
|
||||
in: []byte{0, 0},
|
||||
out: []byte{0, 1},
|
||||
},
|
||||
{
|
||||
in: []byte{0, 255},
|
||||
out: []byte{1, 0},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
actual := incrementBytes(test.in)
|
||||
if !reflect.DeepEqual(actual, test.out) {
|
||||
t.Errorf("expected %#v, got %#v", test.out, actual)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue