0
0
Fork 0

introduce new query TermRange

The term range query is not often used in full-text queries, but
can be useful when filtering on keyword indexed text terms in
the index.

The JSON syntax to do a TermRange query is the same as for
NumericRange, but the min/max values must be string and not
float64.
This commit is contained in:
Marty Schoch 2017-03-31 20:29:24 -04:00
parent 4d00d863af
commit 1eba5541f2
7 changed files with 424 additions and 2 deletions

View File

@ -139,6 +139,23 @@ func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
}
// NewTermRangeQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *query.TermRangeQuery {
return query.NewTermRangeQuery(min, max)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *query.TermRangeQuery {
return query.NewTermRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
}
// NewPhraseQuery creates a new Query for finding
// exact term phrases in the index.
// The provided terms must exist in the correct

View File

@ -161,8 +161,8 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasMin := tmp["min"]
_, hasMax := tmp["max"]
_, hasMin := tmp["min"].(float64)
_, hasMax := tmp["max"].(float64)
if hasMin || hasMax {
var rv NumericRangeQuery
err := json.Unmarshal(input, &rv)
@ -171,6 +171,16 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasMinStr := tmp["min"].(string)
_, hasMaxStr := tmp["max"].(string)
if hasMinStr || hasMaxStr {
var rv TermRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasStart := tmp["start"]
_, hasEnd := tmp["end"]
if hasStart || hasEnd {

View File

@ -25,6 +25,8 @@ import (
var minNum = 5.1
var maxNum = 7.1
var minTerm = "bob"
var maxTerm = "cat"
var startDateStr = "2011-01-01T00:00:00Z"
var endDateStr = "2012-01-01T00:00:00Z"
var startDate time.Time
@ -142,6 +144,14 @@ func TestParseQuery(t *testing.T) {
return q
}(),
},
{
input: []byte(`{"min":"bob","max":"cat","field":"desc"}`),
output: func() Query {
q := NewTermRangeQuery(minTerm, maxTerm)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"start":"` + startDateStr + `","end":"` + endDateStr + `","field":"desc"}`),
output: func() Query {

View File

@ -0,0 +1,95 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type TermRangeQuery struct {
Min string `json:"min,omitempty"`
Max string `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermRangeQuery creates a new Query for ranges
// of text term values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *TermRangeQuery {
return NewTermRangeInclusiveQuery(min, max, nil, nil)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *TermRangeQuery {
return &TermRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *TermRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermRangeQuery) Field() string {
return q.FieldVal
}
func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
var minTerm []byte
if q.Min != "" {
minTerm = []byte(q.Min)
}
var maxTerm []byte
if q.Max != "" {
maxTerm = []byte(q.Max)
}
return searcher.NewTermRangeSearcher(i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *TermRangeQuery) Validate() error {
if q.Min == "" && q.Min == q.Max {
return fmt.Errorf("term range query must specify min or max")
}
return nil
}

View File

@ -0,0 +1,75 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewTermRangeSearcher(indexReader index.IndexReader,
min, max []byte, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
if min == nil {
min = []byte{}
}
rangeMax := max
if rangeMax != nil {
// the term dictionary range end has an unfortunate implementation
rangeMax = append(rangeMax, 0)
}
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictRange(field, min, rangeMax)
if err != nil {
return nil, err
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
if !*inclusiveMin && min != nil && string(min) == terms[0] {
terms = terms[1:]
}
// if our term list included the max, it would be the last item
if !*inclusiveMax && max != nil && string(max) == terms[len(terms)-1] {
terms = terms[:len(terms)-1]
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options)
}

View File

@ -0,0 +1,192 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/search"
)
func TestTermRangeSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
tests := []struct {
min []byte
max []byte
inclusiveMin bool
inclusiveMax bool
field string
want []string
}{
{
min: []byte("marty"),
max: []byte("marty"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1"},
},
{
min: []byte("marty"),
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "4"},
},
// inclusive max false should exclude ravi
{
min: []byte("marty"),
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: false,
want: []string{"1"},
},
// inclusive max false should remove last/only item
{
min: []byte("martz"),
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: false,
want: nil,
},
// inclusive min false should remove marty
{
min: []byte("marty"),
max: []byte("ravi"),
field: "name",
inclusiveMin: false,
inclusiveMax: true,
want: []string{"4"},
},
// inclusive min false should remove first/only item
{
min: []byte("marty"),
max: []byte("rav"),
field: "name",
inclusiveMin: false,
inclusiveMax: true,
want: nil,
},
// max nil sees everyting after marty
{
min: []byte("marty"),
max: nil,
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "2", "4"},
},
// min nil sees everyting before ravi
{
min: nil,
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "3", "4", "5"},
},
// min and max nil sees everything
{
min: nil,
max: nil,
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max nil sees everything, even with inclusiveMin false
{
min: nil,
max: nil,
field: "name",
inclusiveMin: false,
inclusiveMax: true,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max nil sees everything, even with inclusiveMax false
{
min: nil,
max: nil,
field: "name",
inclusiveMin: true,
inclusiveMax: false,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max nil sees everything, even with both false
{
min: nil,
max: nil,
field: "name",
inclusiveMin: false,
inclusiveMax: false,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max non-nil, but match 0 terms
{
min: []byte("martz"),
max: []byte("rav"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: nil,
},
}
for _, test := range tests {
searcher, err := NewTermRangeSearcher(twoDocIndexReader, test.min, test.max,
&test.inclusiveMin, &test.inclusiveMax, test.field, 1.0, search.SearcherOptions{Explain: true})
if err != nil {
t.Fatal(err)
}
var got []string
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(
searcher.DocumentMatchPoolSize(), 0),
}
next, err := searcher.Next(ctx)
i := 0
for err == nil && next != nil {
got = append(got, string(next.IndexInternalID))
ctx.DocumentMatchPool.Put(next)
next, err = searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v", err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected: %v, got %v for test %#v", test.want, got, test)
}
}
}

View File

@ -801,5 +801,28 @@
}
]
}
},
{
"comment": "test term range",
"search": {
"from": 0,
"size": 10,
"query": {
"field": "title",
"max": "miz",
"min": "mis"
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "a"
},
{
"id": "b"
}
]
}
}
]