change sort field impl to use indexed values not stored values
This commit is contained in:
parent
0d873916f0
commit
750e0ac16c
|
@ -36,21 +36,6 @@ func (d *Document) AddField(f Field) *Document {
|
|||
return d
|
||||
}
|
||||
|
||||
func (d *Document) FieldNamed(field string) Field {
|
||||
for _, f := range d.Fields {
|
||||
if f.Name() == field {
|
||||
return f
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Document) CompareFieldsNamed(other *Document, field string, descending bool) int {
|
||||
fieldi := d.FieldNamed(field)
|
||||
fieldj := other.FieldNamed(field)
|
||||
return CompareFieldValues(fieldi, fieldj, descending)
|
||||
}
|
||||
|
||||
func (d *Document) GoString() string {
|
||||
fields := ""
|
||||
for i, field := range d.Fields {
|
||||
|
|
|
@ -32,86 +32,3 @@ type Field interface {
|
|||
// the rate of indexing
|
||||
NumPlainTextBytes() uint64
|
||||
}
|
||||
|
||||
// CompareFieldValues provides ordering amongst stored field values
|
||||
// when trying compare field values of different types,
|
||||
// we impose the following order:
|
||||
// - nil (missing field)
|
||||
// - boolean
|
||||
// - number
|
||||
// - text
|
||||
// - date
|
||||
func CompareFieldValues(i, j Field, descending bool) int {
|
||||
|
||||
lower := func() int {
|
||||
if descending {
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
higher := func() int {
|
||||
if descending {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
switch i := i.(type) {
|
||||
case nil:
|
||||
switch j.(type) {
|
||||
case nil:
|
||||
return 0
|
||||
default:
|
||||
return lower()
|
||||
}
|
||||
case *BooleanField:
|
||||
switch j := j.(type) {
|
||||
case nil:
|
||||
return higher()
|
||||
case *BooleanField:
|
||||
return i.Compare(j, descending)
|
||||
default:
|
||||
return lower()
|
||||
}
|
||||
case *NumericField:
|
||||
switch j := j.(type) {
|
||||
case nil:
|
||||
return higher()
|
||||
case *BooleanField:
|
||||
return higher()
|
||||
case *NumericField:
|
||||
return i.Compare(j, descending)
|
||||
default:
|
||||
return lower()
|
||||
}
|
||||
case *TextField:
|
||||
switch j := j.(type) {
|
||||
case nil:
|
||||
return higher()
|
||||
case *BooleanField:
|
||||
return higher()
|
||||
case *NumericField:
|
||||
return higher()
|
||||
case *TextField:
|
||||
return i.Compare(j, descending)
|
||||
default:
|
||||
return lower()
|
||||
}
|
||||
case *DateTimeField:
|
||||
switch j := j.(type) {
|
||||
case nil:
|
||||
return higher()
|
||||
case *BooleanField:
|
||||
return higher()
|
||||
case *NumericField:
|
||||
return higher()
|
||||
case *TextField:
|
||||
return higher()
|
||||
case *DateTimeField:
|
||||
return i.Compare(j, descending)
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
|
|
@ -71,17 +71,6 @@ func (b *BooleanField) NumPlainTextBytes() uint64 {
|
|||
return b.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (b *BooleanField) Compare(other *BooleanField, descending bool) int {
|
||||
bv, _ := b.Boolean()
|
||||
otherbv, _ := other.Boolean()
|
||||
if bv == otherbv {
|
||||
return 0
|
||||
} else if (otherbv && !descending) || (bv && descending) {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
|
||||
return &BooleanField{
|
||||
name: name,
|
||||
|
|
|
@ -100,17 +100,6 @@ func (n *DateTimeField) NumPlainTextBytes() uint64 {
|
|||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Compare(other *DateTimeField, descending bool) int {
|
||||
dt, _ := n.DateTime()
|
||||
otherdt, _ := other.DateTime()
|
||||
if dt.Equal(otherdt) {
|
||||
return 0
|
||||
} else if (dt.Before(otherdt) && !descending) || (otherdt.Before(dt) && descending) {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
|
||||
return &DateTimeField{
|
||||
name: name,
|
||||
|
|
|
@ -96,17 +96,6 @@ func (n *NumericField) NumPlainTextBytes() uint64 {
|
|||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (n *NumericField) Compare(other *NumericField, descending bool) int {
|
||||
num, _ := n.Number()
|
||||
othernum, _ := other.Number()
|
||||
if num == othernum {
|
||||
return 0
|
||||
} else if (num < othernum && !descending) || (num > othernum && descending) {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
|
||||
return &NumericField{
|
||||
name: name,
|
||||
|
|
|
@ -1,383 +0,0 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestCompareFieldValues(t *testing.T) {
|
||||
|
||||
t1 := time.Now()
|
||||
t2 := t1.Add(1 * time.Hour)
|
||||
|
||||
dtf1, _ := NewDateTimeField("", nil, t1)
|
||||
dtf2, _ := NewDateTimeField("", nil, t2)
|
||||
|
||||
tests := []struct {
|
||||
l Field
|
||||
r Field
|
||||
desc bool
|
||||
res int
|
||||
}{
|
||||
// nil simple
|
||||
{
|
||||
l: nil,
|
||||
r: nil,
|
||||
res: 0,
|
||||
},
|
||||
// boolean simple
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: NewBooleanField("", nil, true),
|
||||
res: 0,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: NewBooleanField("", nil, false),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, false),
|
||||
r: NewBooleanField("", nil, true),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: NewBooleanField("", nil, false),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, false),
|
||||
r: NewBooleanField("", nil, true),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// numeric simple
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
res: 0,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 5.14),
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: NewNumericField("", nil, 5.14),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 5.14),
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: NewNumericField("", nil, 5.14),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// text simple
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
res: 0,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("dog")),
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: NewTextField("", nil, []byte("dog")),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("dog")),
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: NewTextField("", nil, []byte("dog")),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// datetime simple
|
||||
{
|
||||
l: dtf1,
|
||||
r: dtf1,
|
||||
res: 0,
|
||||
},
|
||||
{
|
||||
l: dtf2,
|
||||
r: dtf1,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: dtf2,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: dtf2,
|
||||
r: dtf1,
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: dtf2,
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// mixed types, nil left
|
||||
{
|
||||
l: nil,
|
||||
r: NewBooleanField("", nil, true),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: nil,
|
||||
r: NewBooleanField("", nil, true),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: nil,
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: nil,
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: nil,
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: nil,
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: nil,
|
||||
r: dtf1,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: nil,
|
||||
r: dtf1,
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// mixed types, boolean left
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: nil,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: nil,
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: dtf1,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewBooleanField("", nil, true),
|
||||
r: dtf1,
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// mixed types, number left
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: nil,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: nil,
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: NewBooleanField("", nil, true),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: NewBooleanField("", nil, true),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: dtf1,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewNumericField("", nil, 3.14),
|
||||
r: dtf1,
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// mixed types, text left
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: nil,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: nil,
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: NewBooleanField("", nil, true),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: NewBooleanField("", nil, true),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: dtf1,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: NewTextField("", nil, []byte("cat")),
|
||||
r: dtf1,
|
||||
desc: true,
|
||||
res: 1,
|
||||
},
|
||||
// mixed types, datetimes left
|
||||
{
|
||||
l: dtf1,
|
||||
r: nil,
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: nil,
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: NewBooleanField("", nil, true),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: NewBooleanField("", nil, true),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: NewNumericField("", nil, 3.14),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
res: 1,
|
||||
},
|
||||
{
|
||||
l: dtf1,
|
||||
r: NewTextField("", nil, []byte("cat")),
|
||||
desc: true,
|
||||
res: -1,
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
actual := CompareFieldValues(test.l, test.r, test.desc)
|
||||
if actual != test.res {
|
||||
t.Errorf("expected %d, got %d for case %d", test.res, actual, i)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -11,7 +11,6 @@ package document
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
@ -78,13 +77,6 @@ func (t *TextField) NumPlainTextBytes() uint64 {
|
|||
return t.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (t *TextField) Compare(other *TextField, descending bool) int {
|
||||
if descending {
|
||||
return strings.Compare(string(other.value), string(t.value))
|
||||
}
|
||||
return strings.Compare(string(t.value), string(other.value))
|
||||
}
|
||||
|
||||
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
|
||||
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
|
||||
}
|
||||
|
|
|
@ -79,8 +79,7 @@ type IndexReader interface {
|
|||
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
|
||||
|
||||
Document(id string) (*document.Document, error)
|
||||
DocumentFieldTerms(id IndexInternalID) (FieldTerms, error)
|
||||
DocumentFieldTermsForFields(id IndexInternalID, fields []string) (FieldTerms, error)
|
||||
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error)
|
||||
|
||||
Fields() ([]string, error)
|
||||
|
||||
|
@ -93,8 +92,29 @@ type IndexReader interface {
|
|||
Close() error
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
type FieldTerms map[string][]string
|
||||
|
||||
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
|
||||
func (f FieldTerms) FieldsNotYetCached(fields []string) []string {
|
||||
var rv []string
|
||||
for _, field := range fields {
|
||||
if _, ok := f[field]; !ok {
|
||||
rv = append(rv, field)
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Merge will combine two FieldTerms
|
||||
// it assumes that the terms lists are complete (thus do not need to be merged)
|
||||
// field terms from the other list always replace the ones in the receiver
|
||||
func (f FieldTerms) Merge(other FieldTerms) {
|
||||
for field, terms := range other {
|
||||
f[field] = terms
|
||||
}
|
||||
}
|
||||
|
||||
type TermFieldVector struct {
|
||||
Field string
|
||||
ArrayPositions []uint64
|
||||
|
|
|
@ -98,25 +98,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) {
|
||||
back, err := i.index.backIndexRowForDoc(i.kvreader, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := make(index.FieldTerms, len(back.termEntries))
|
||||
for _, entry := range back.termEntries {
|
||||
fieldName := i.index.fieldCache.FieldIndexed(uint16(*entry.Field))
|
||||
terms, ok := rv[fieldName]
|
||||
if !ok {
|
||||
terms = make([]string, 0)
|
||||
}
|
||||
terms = append(terms, *entry.Term)
|
||||
rv[fieldName] = terms
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
|
||||
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
|
||||
back, err := i.index.backIndexRowForDoc(i.kvreader, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -1179,7 +1179,7 @@ func TestIndexDocumentFieldTerms(t *testing.T) {
|
|||
}
|
||||
}()
|
||||
|
||||
fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"))
|
||||
fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"), []string{"name", "title"})
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
|
|
@ -744,8 +744,8 @@ func TestSortMatchSearch(t *testing.T) {
|
|||
|
||||
req := NewSearchRequest(NewMatchQuery("One"))
|
||||
req.SortBy(search.SortOrder{
|
||||
&search.SortStoredField{Field: "Day"},
|
||||
&search.SortStoredField{Field: "Name"},
|
||||
&search.SortField{Field: "Day"},
|
||||
&search.SortField{Field: "Name"},
|
||||
})
|
||||
req.Fields = []string{"*"}
|
||||
sr, err := index.Search(req)
|
||||
|
|
|
@ -9,9 +9,7 @@
|
|||
|
||||
package numeric_util
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
import "fmt"
|
||||
|
||||
const ShiftStartInt64 byte = 0x20
|
||||
|
||||
|
@ -72,3 +70,18 @@ func (p PrefixCoded) Int64() (int64, error) {
|
|||
}
|
||||
return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil
|
||||
}
|
||||
|
||||
func ValidPrefixCodedTerm(p string) (bool, int) {
|
||||
if len(p) > 0 {
|
||||
if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 {
|
||||
return false, 0
|
||||
}
|
||||
shift := p[0] - ShiftStartInt64
|
||||
nChars := ((63 - int(shift)) / 7) + 1
|
||||
if len(p) != nChars+1 {
|
||||
return false, 0
|
||||
}
|
||||
return true, int(shift)
|
||||
}
|
||||
return false, 0
|
||||
}
|
||||
|
|
|
@ -98,6 +98,45 @@ func TestPrefixCoded(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestPrefixCodedValid(t *testing.T) {
|
||||
// all of the shared tests should be valid
|
||||
for _, test := range tests {
|
||||
valid, _ := ValidPrefixCodedTerm(string(test.output))
|
||||
if !valid {
|
||||
t.Errorf("expected %s to be valid prefix coded, is not", string(test.output))
|
||||
}
|
||||
}
|
||||
|
||||
invalidTests := []struct {
|
||||
data PrefixCoded
|
||||
}{
|
||||
// first byte invalid skip (too low)
|
||||
{
|
||||
data: PrefixCoded{0x19, 'c', 'a', 't'},
|
||||
},
|
||||
// first byte invalid skip (too high)
|
||||
{
|
||||
data: PrefixCoded{0x20 + 64, 'c'},
|
||||
},
|
||||
// length of trailing bytes wrong (too long)
|
||||
{
|
||||
data: PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
|
||||
},
|
||||
// length of trailing bytes wrong (too short)
|
||||
{
|
||||
data: PrefixCoded{0x20 + 63},
|
||||
},
|
||||
}
|
||||
|
||||
// all of the shared tests should be valid
|
||||
for _, test := range invalidTests {
|
||||
valid, _ := ValidPrefixCodedTerm(string(test.data))
|
||||
if valid {
|
||||
t.Errorf("expected %s to be invalid prefix coded, it is", string(test.data))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTestPrefixCoded(b *testing.B) {
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
|
|
@ -113,18 +113,19 @@ func (hc *HeapCollector) collectSingle(ctx *search.SearchContext, reader index.I
|
|||
}
|
||||
|
||||
// see if we need to load the stored fields
|
||||
if len(hc.sort.RequiredStoredFields()) > 0 {
|
||||
if d.ID == "" {
|
||||
// look up the id since we need it for lookup
|
||||
d.ID, err = reader.FinalizeDocID(d.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
d.Document, err = reader.Document(d.ID)
|
||||
if len(hc.sort.RequiredFields()) > 0 {
|
||||
// find out which fields haven't been loaded yet
|
||||
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.sort.RequiredFields())
|
||||
// look them up
|
||||
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// cache these as well
|
||||
if d.CachedFieldTerms == nil {
|
||||
d.CachedFieldTerms = make(map[string][]string)
|
||||
}
|
||||
d.CachedFieldTerms.Merge(fieldTerms)
|
||||
}
|
||||
|
||||
// optimization, we track lowest sorting hit already removed from heap
|
||||
|
|
|
@ -95,11 +95,7 @@ func (sr *stubReader) Document(id string) (*document.Document, error) {
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
|
||||
func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
|
|||
for _, facetBuilder := range fb.facets {
|
||||
fields = append(fields, facetBuilder.Field())
|
||||
}
|
||||
fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.IndexInternalID, fields)
|
||||
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fields)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -71,6 +71,10 @@ type DocumentMatch struct {
|
|||
// fields as float64s and date fields as time.RFC3339 formatted strings.
|
||||
Fields map[string]interface{} `json:"fields,omitempty"`
|
||||
|
||||
// as we learn field terms, we can cache important ones for later use
|
||||
// for example, sorting and building facets need these values
|
||||
CachedFieldTerms index.FieldTerms `json:"-"`
|
||||
|
||||
// if we load the document for this hit, remember it so we dont load again
|
||||
Document *document.Document `json:"-"`
|
||||
|
||||
|
|
151
search/sort.go
151
search/sort.go
|
@ -11,15 +11,21 @@ package search
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/numeric_util"
|
||||
)
|
||||
|
||||
var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
|
||||
var LowTerm = string([]byte{0x00})
|
||||
|
||||
type SearchSort interface {
|
||||
Compare(a, b *DocumentMatch) int
|
||||
|
||||
RequiresDocID() bool
|
||||
RequiresScoring() bool
|
||||
RequiresStoredFields() []string
|
||||
RequiresFields() []string
|
||||
}
|
||||
|
||||
func ParseSearchSort(input json.RawMessage) (SearchSort, error) {
|
||||
|
@ -42,7 +48,7 @@ func ParseSearchSort(input json.RawMessage) (SearchSort, error) {
|
|||
Descending: descending,
|
||||
}, nil
|
||||
}
|
||||
return &SortStoredField{
|
||||
return &SortField{
|
||||
Field: tmp,
|
||||
Descending: descending,
|
||||
}, nil
|
||||
|
@ -100,35 +106,146 @@ func (so SortOrder) RequiresDocID() bool {
|
|||
return rv
|
||||
}
|
||||
|
||||
func (so SortOrder) RequiredStoredFields() []string {
|
||||
func (so SortOrder) RequiredFields() []string {
|
||||
var rv []string
|
||||
for _, soi := range so {
|
||||
rv = append(rv, soi.RequiresStoredFields()...)
|
||||
rv = append(rv, soi.RequiresFields()...)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// SortStoredField will sort results by the value of a stored field
|
||||
type SortStoredField struct {
|
||||
// SortFieldType lets you control some internal sort behavior
|
||||
// normally leaving this to the zero-value of SortFieldAuto is fine
|
||||
type SortFieldType int
|
||||
|
||||
const (
|
||||
// SortFieldAuto applies heuristics attempt to automatically sort correctly
|
||||
SortFieldAuto SortFieldType = iota
|
||||
// SortFieldAsString forces sort as string (no prefix coded terms removed)
|
||||
SortFieldAsString
|
||||
// SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed)
|
||||
SortFieldAsNumber
|
||||
// SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed)
|
||||
SortFieldAsDate
|
||||
)
|
||||
|
||||
// SortFieldMode describes the behavior if the field has multiple values
|
||||
type SortFieldMode int
|
||||
|
||||
const (
|
||||
// SortFieldFirst uses the first (or only) value, this is the default zero-value
|
||||
SortFieldFirst SortFieldMode = iota // FIXME name is confusing
|
||||
// SortFieldMin uses the minimum value
|
||||
SortFieldMin
|
||||
// SortFieldMax uses the maximum value
|
||||
SortFieldMax
|
||||
)
|
||||
|
||||
const SortFieldMissingLast = "_last"
|
||||
const SortFieldMissingFirst = "_first"
|
||||
|
||||
// SortField will sort results by the value of a stored field
|
||||
// Field is the name of the field
|
||||
// Descending reverse the sort order (default false)
|
||||
// Type allows forcing of string/number/date behavior (default auto)
|
||||
// Mode controls behavior for multi-values fields (default first)
|
||||
// Missing controls behavior of missing values (default last)
|
||||
type SortField struct {
|
||||
Field string
|
||||
Descending bool
|
||||
Type SortFieldType
|
||||
Mode SortFieldMode
|
||||
Missing string
|
||||
}
|
||||
|
||||
// Compare orders DocumentMatch instances by stored field values
|
||||
func (s *SortStoredField) Compare(i, j *DocumentMatch) int {
|
||||
return i.Document.CompareFieldsNamed(j.Document, s.Field, s.Descending)
|
||||
func (s *SortField) Compare(i, j *DocumentMatch) int {
|
||||
iTerms := i.CachedFieldTerms[s.Field]
|
||||
iTerms = s.filterTermsByType(iTerms)
|
||||
iTerm := s.filterTermsByMode(iTerms)
|
||||
jTerms := j.CachedFieldTerms[s.Field]
|
||||
jTerms = s.filterTermsByType(jTerms)
|
||||
jTerm := s.filterTermsByMode(jTerms)
|
||||
rv := strings.Compare(iTerm, jTerm)
|
||||
if s.Descending {
|
||||
rv = -rv
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SortField) filterTermsByMode(terms []string) string {
|
||||
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldFirst) {
|
||||
return terms[0]
|
||||
} else if len(terms) > 1 {
|
||||
switch s.Mode {
|
||||
case SortFieldMin:
|
||||
sort.Strings(terms)
|
||||
return terms[0]
|
||||
case SortFieldMax:
|
||||
sort.Strings(terms)
|
||||
return terms[len(terms)-1]
|
||||
}
|
||||
}
|
||||
|
||||
// handle missing terms
|
||||
if s.Missing == "" || s.Missing == SortFieldMissingLast {
|
||||
if s.Descending {
|
||||
return LowTerm
|
||||
}
|
||||
return HighTerm
|
||||
} else if s.Missing == SortFieldMissingFirst {
|
||||
if s.Descending {
|
||||
return HighTerm
|
||||
}
|
||||
return LowTerm
|
||||
}
|
||||
return s.Missing
|
||||
}
|
||||
|
||||
// filterTermsByType attempts to make one pass on the terms
|
||||
// if we are in auto-mode AND all the terms look like prefix-coded numbers
|
||||
// return only the terms which had shift of 0
|
||||
// if we are in explicit number or date mode, return only valid
|
||||
// prefix coded numbers with shift of 0
|
||||
func (s *SortField) filterTermsByType(terms []string) []string {
|
||||
stype := s.Type
|
||||
if stype == SortFieldAuto {
|
||||
allTermsPrefixCoded := true
|
||||
var termsWithShiftZero []string
|
||||
for _, term := range terms {
|
||||
valid, shift := numeric_util.ValidPrefixCodedTerm(term)
|
||||
if valid && shift == 0 {
|
||||
termsWithShiftZero = append(termsWithShiftZero, term)
|
||||
} else if !valid {
|
||||
allTermsPrefixCoded = false
|
||||
}
|
||||
}
|
||||
if allTermsPrefixCoded {
|
||||
terms = termsWithShiftZero
|
||||
}
|
||||
} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
|
||||
var termsWithShiftZero []string
|
||||
for _, term := range terms {
|
||||
valid, shift := numeric_util.ValidPrefixCodedTerm(term)
|
||||
if valid && shift == 0 {
|
||||
termsWithShiftZero = append(termsWithShiftZero)
|
||||
}
|
||||
}
|
||||
terms = termsWithShiftZero
|
||||
}
|
||||
return terms
|
||||
}
|
||||
|
||||
// RequiresDocID says this SearchSort does not require the DocID be loaded
|
||||
func (s *SortStoredField) RequiresDocID() bool { return false }
|
||||
func (s *SortField) RequiresDocID() bool { return false }
|
||||
|
||||
// RequiresScoring says this SearchStore does not require scoring
|
||||
func (s *SortStoredField) RequiresScoring() bool { return false }
|
||||
func (s *SortField) RequiresScoring() bool { return false }
|
||||
|
||||
// RequiresStoredFields says this SearchStore requires the specified stored field
|
||||
func (s *SortStoredField) RequiresStoredFields() []string { return []string{s.Field} }
|
||||
// RequiresFields says this SearchStore requires the specified stored field
|
||||
func (s *SortField) RequiresFields() []string { return []string{s.Field} }
|
||||
|
||||
func (s *SortStoredField) MarshalJSON() ([]byte, error) {
|
||||
func (s *SortField) MarshalJSON() ([]byte, error) {
|
||||
if s.Descending {
|
||||
return json.Marshal("-" + s.Field)
|
||||
}
|
||||
|
@ -154,8 +271,8 @@ func (s *SortDocID) RequiresDocID() bool { return true }
|
|||
// RequiresScoring says this SearchStore does not require scoring
|
||||
func (s *SortDocID) RequiresScoring() bool { return false }
|
||||
|
||||
// RequiresStoredFields says this SearchStore does not require any stored fields
|
||||
func (s *SortDocID) RequiresStoredFields() []string { return nil }
|
||||
// RequiresFields says this SearchStore does not require any stored fields
|
||||
func (s *SortDocID) RequiresFields() []string { return nil }
|
||||
|
||||
func (s *SortDocID) MarshalJSON() ([]byte, error) {
|
||||
if s.Descending {
|
||||
|
@ -185,8 +302,8 @@ func (s *SortScore) RequiresDocID() bool { return false }
|
|||
// RequiresScoring says this SearchStore does require scoring
|
||||
func (s *SortScore) RequiresScoring() bool { return true }
|
||||
|
||||
// RequiresStoredFields says this SearchStore does not require any store fields
|
||||
func (s *SortScore) RequiresStoredFields() []string { return nil }
|
||||
// RequiresFields says this SearchStore does not require any store fields
|
||||
func (s *SortScore) RequiresFields() []string { return nil }
|
||||
|
||||
func (s *SortScore) MarshalJSON() ([]byte, error) {
|
||||
if s.Descending {
|
||||
|
|
Loading…
Reference in New Issue