0
0
Fork 0

reduce garbage created while processing facets

previously we parsed/returned large sections of the documents
back index row in order to compute facet information.  this
would require parsing the protobuf of the entire back index row.
unfortunately this creates considerable garbage.

this new version introduces a visitor/callback approach to
working with data inside the back index row.  the benefit
of this approach is that we can let the higher-level code
see values, prior to any copies of data being made or
intermediate garbage being created.  implementations of
the callback must copy any value which they would like to
retain beyond the callback.

NOTE: this approach is duplicates code from the
automatically generated protobuf code

NOTE: this approach assumes that the "field" field be serialized
before the "terms" field.  This is guaranteed by our currently
generated protobuf encoder, and is recommended by the protobuf
spec.  But, decoders SHOULD support them occuring in any order,
which we do not.
This commit is contained in:
Marty Schoch 2017-03-02 16:19:08 -05:00
parent b04745abcc
commit 0eba2a3f0c
15 changed files with 441 additions and 142 deletions

View File

@ -48,6 +48,8 @@ type Index interface {
Advanced() (store.KVStore, error)
}
type DocumentFieldTermVisitor func(field string, term []byte)
type IndexReader interface {
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)
@ -64,7 +66,7 @@ type IndexReader interface {
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
Document(id string) (*document.Document, error)
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error)
DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
Fields() ([]string, error)

View File

@ -101,15 +101,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
return
}
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
back, err := backIndexRowForDoc(i.kvreader, id)
if err != nil {
return nil, err
}
if back == nil {
return nil, nil
}
rv := make(index.FieldTerms, len(fields))
func (i *IndexReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false)
@ -117,12 +109,34 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri
fieldsMap[id] = f
}
}
for _, entry := range back.termsEntries {
if field, ok := fieldsMap[uint16(*entry.Field)]; ok {
rv[field] = entry.Terms
}
tempRow := BackIndexRow{
doc: id,
}
return rv, nil
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf) {
keyBuf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf)
if err != nil {
return err
}
value, err := i.kvreader.Get(keyBuf[:keySize])
if err != nil {
return err
}
if value == nil {
return nil
}
return visitBackIndexRow(value, func(field uint32, term []byte) {
if field, ok := fieldsMap[uint16(field)]; ok {
visitor(field, term)
}
})
}
func (i *IndexReader) Fields() (fields []string, err error) {

View File

@ -881,3 +881,232 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
rv.value = value[1:]
return rv, nil
}
type backIndexFieldTermVisitor func(field uint32, term []byte)
// visitBackIndexRow is designed to process a protobuf encoded
// value, without creating unnecessary garbage. Instead values are passed
// to a callback, inspected first, and only copied if necessary.
// Due to the fact that this borrows from generated code, it must be marnually
// updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexRowValue) Unmarshal(data []byte) error
// the sections which create garbage or parse unintersting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// dont parse term entries
// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
// instead, inspect them
if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// don't parse stored entries
// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
// don't track unrecognized data
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
// Instead values are passed to a callback, inspected first, and only copied if
// necessary. Due to the fact that this borrows from generated code, it must
// be marnually updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
// the sections which create garbage or parse uninteresting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
var theField uint32
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
// m.Field = &v
theField = v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + int(stringLen)
if postIndex > l {
return io.ErrUnexpectedEOF
}
//m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
callback(theField, data[iNdEx:postIndex])
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
// if hasFields[0]&uint64(0x00000001) == 0 {
// return new(github_com_golang_protobuf_proto.RequiredNotSetError)
// }
return nil
}

View File

@ -361,3 +361,22 @@ func BenchmarkStoredRowDecode(b *testing.B) {
}
}
}
func TestVisitBackIndexRow(t *testing.T) {
expected := map[uint32][]byte{
0: []byte("beer"),
1: []byte("beat"),
}
val := []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}
err := visitBackIndexRow(val, func(field uint32, term []byte) {
if reflect.DeepEqual(expected[field], term) {
delete(expected, field)
}
})
if err != nil {
t.Fatal(err)
}
if len(expected) > 0 {
t.Errorf("expected visitor to see these but did not %v", expected)
}
}

View File

@ -1251,7 +1251,7 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
}
}
func TestIndexDocumentFieldTerms(t *testing.T) {
func TestIndexDocumentVisitFieldTerms(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
@ -1294,7 +1294,11 @@ func TestIndexDocumentFieldTerms(t *testing.T) {
}
}()
fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"), []string{"name", "title"})
fieldTerms := make(index.FieldTerms)
err = indexReader.DocumentVisitFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}, func(field string, term []byte) {
fieldTerms[field] = append(fieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}

View File

@ -104,8 +104,8 @@ func (sr *stubReader) Document(id string) (*document.Document, error) {
return nil, nil
}
func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
return nil, nil
func (sr *stubReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
return nil
}
func (sr *stubReader) Fields() ([]string, error) {

View File

@ -114,12 +114,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
default:
}
}
if hc.facetsBuilder != nil {
err = hc.facetsBuilder.Update(next)
if err != nil {
break
}
}
err = hc.collectSingle(searchContext, reader, next)
if err != nil {
@ -144,6 +138,13 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
// visit field terms for features that require it (sort, facets)
err := hc.visitFieldTerms(reader, d)
if err != nil {
return err
}
// increment total hits
hc.total++
d.HitNumber = hc.total
@ -153,7 +154,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
hc.maxScore = d.Score
}
var err error
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds {
d.ID, err = reader.ExternalID(d.IndexInternalID)
@ -162,22 +162,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
}
}
// see if we need to load the stored fields
if len(hc.neededFields) > 0 {
// find out which fields haven't been loaded yet
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
// look them up
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if d.CachedFieldTerms == nil {
d.CachedFieldTerms = make(map[string][]string)
}
d.CachedFieldTerms.Merge(fieldTerms)
}
// compute this hits sort value
if len(hc.sort) == 1 && hc.cachedScoring[0] {
d.Sort = sortByScoreOpt
@ -215,9 +199,31 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
return nil
}
// visitFieldTerms is responsible for visiting the field terms of the
// search hit, and passing visited terms to the sort and facet builder
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
if hc.facetsBuilder != nil {
hc.facetsBuilder.StartDoc()
}
err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
})
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
return err
}
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
}
// finalizeResults starts with the heap containing the final top size+skip

View File

@ -18,7 +18,6 @@ import (
"sort"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
@ -35,6 +34,7 @@ type DateTimeFacetBuilder struct {
total int
missing int
ranges map[string]*dateTimeRange
sawValue bool
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
@ -58,36 +58,35 @@ func (fb *DateTimeFacetBuilder) Field() string {
return fb.field
}
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
} else {
}
}
func (fb *DateTimeFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *DateTimeFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View File

@ -17,7 +17,6 @@ package facet
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
@ -34,6 +33,7 @@ type NumericFacetBuilder struct {
total int
missing int
ranges map[string]*numericRange
sawValue bool
}
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
@ -57,36 +57,35 @@ func (fb *NumericFacetBuilder) Field() string {
return fb.field
}
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
} else {
}
}
func (fb *NumericFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *NumericFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View File

@ -18,7 +18,6 @@ import (
"strconv"
"testing"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
)
@ -52,7 +51,9 @@ func numericFacetN(b *testing.B, numTerms int) {
nfb.AddRange("rangename"+strconv.Itoa(i), &min, &max)
for _, pv := range pcodedvalues {
nfb.Update(index.FieldTerms{field: []string{string(pv)}})
nfb.StartDoc()
nfb.UpdateVisitor(field, pv)
nfb.EndDoc()
}
}

View File

@ -17,7 +17,6 @@ package facet
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
@ -27,6 +26,7 @@ type TermsFacetBuilder struct {
termsCount map[string]int
total int
missing int
sawValue bool
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
@ -41,19 +41,20 @@ func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
existingCount, existed := fb.termsCount[term]
if existed {
fb.termsCount[term] = existingCount + 1
} else {
fb.termsCount[term] = 1
}
fb.total++
}
} else {
func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
fb.total++
}
}
func (fb *TermsFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *TermsFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View File

@ -18,8 +18,6 @@ import (
"io/ioutil"
"regexp"
"testing"
"github.com/blevesearch/bleve/index"
)
var terms []string
@ -61,7 +59,9 @@ func termsFacetN(b *testing.B, numTerms int) {
for len(tfb.termsCount) < numTerms && i <= termsLen {
j := i % termsLen
term := terms[j]
tfb.Update(index.FieldTerms{field: []string{term}})
tfb.StartDoc()
tfb.UpdateVisitor(field, []byte(term))
tfb.EndDoc()
i++
}

View File

@ -21,7 +21,10 @@ import (
)
type FacetBuilder interface {
Update(index.FieldTerms)
StartDoc()
UpdateVisitor(field string, term []byte)
EndDoc()
Result() *FacetResult
Field() string
}
@ -41,33 +44,29 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder
fb.fields = append(fb.fields, facetBuilder.Field())
}
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
if fb.fields == nil {
for _, facetBuilder := range fb.facets {
fb.fields = append(fb.fields, facetBuilder.Field())
}
}
func (fb *FacetsBuilder) RequiredFields() []string {
return fb.fields
}
if len(fb.fields) > 0 {
// find out which fields haven't been loaded yet
fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fb.fields)
// look them up
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if docMatch.CachedFieldTerms == nil {
docMatch.CachedFieldTerms = make(map[string][]string)
}
docMatch.CachedFieldTerms.Merge(fieldTerms)
}
func (fb *FacetsBuilder) StartDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.Update(docMatch.CachedFieldTerms)
facetBuilder.StartDoc()
}
}
func (fb *FacetsBuilder) EndDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.EndDoc()
}
}
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
for _, facetBuilder := range fb.facets {
facetBuilder.UpdateVisitor(field, term)
}
return nil
}
type TermFacet struct {

View File

@ -69,10 +69,6 @@ type DocumentMatch struct {
// fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"`
// as we learn field terms, we can cache important ones for later use
// for example, sorting and building facets need these values
CachedFieldTerms index.FieldTerms `json:"-"`
// if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"`

View File

@ -27,6 +27,7 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var LowTerm = string([]byte{0x00})
type SearchSort interface {
UpdateVisitor(field string, term []byte)
Value(a *DocumentMatch) string
Descending() bool
@ -171,6 +172,12 @@ func (so SortOrder) Value(doc *DocumentMatch) {
}
}
func (so SortOrder) UpdateVisitor(field string, term []byte) {
for _, soi := range so {
soi.UpdateVisitor(field, term)
}
}
// Compare will compare two document matches using the specified sort order
// if both are numbers, we avoid converting back to term
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
@ -300,13 +307,24 @@ type SortField struct {
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
values []string
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortField) UpdateVisitor(field string, term []byte) {
if field == s.Field {
s.values = append(s.values, string(term))
}
}
// Value returns the sort value of the DocumentMatch
// it also resets the state of this SortField for
// processing the next document
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := i.CachedFieldTerms[s.Field]
iTerms = s.filterTermsByType(iTerms)
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
s.values = nil
return iTerm
}
@ -435,6 +453,12 @@ type SortDocID struct {
Desc bool
}
// UpdateVisitor is a no-op for SortDocID as it's value
// is not dependent on any field terms
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
@ -466,6 +490,12 @@ type SortScore struct {
Desc bool
}
// UpdateVisitor is a no-op for SortScore as it's value
// is not dependent on any field terms
func (s *SortScore) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) string {
return "_score"