reduce garbage created while processing facets
previously we parsed/returned large sections of the documents back index row in order to compute facet information. this would require parsing the protobuf of the entire back index row. unfortunately this creates considerable garbage. this new version introduces a visitor/callback approach to working with data inside the back index row. the benefit of this approach is that we can let the higher-level code see values, prior to any copies of data being made or intermediate garbage being created. implementations of the callback must copy any value which they would like to retain beyond the callback. NOTE: this approach is duplicates code from the automatically generated protobuf code NOTE: this approach assumes that the "field" field be serialized before the "terms" field. This is guaranteed by our currently generated protobuf encoder, and is recommended by the protobuf spec. But, decoders SHOULD support them occuring in any order, which we do not.
This commit is contained in:
parent
b04745abcc
commit
0eba2a3f0c
|
@ -48,6 +48,8 @@ type Index interface {
|
|||
Advanced() (store.KVStore, error)
|
||||
}
|
||||
|
||||
type DocumentFieldTermVisitor func(field string, term []byte)
|
||||
|
||||
type IndexReader interface {
|
||||
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)
|
||||
|
||||
|
@ -64,7 +66,7 @@ type IndexReader interface {
|
|||
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
|
||||
|
||||
Document(id string) (*document.Document, error)
|
||||
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error)
|
||||
DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
|
||||
|
||||
Fields() ([]string, error)
|
||||
|
||||
|
|
|
@ -101,15 +101,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
|
||||
back, err := backIndexRowForDoc(i.kvreader, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if back == nil {
|
||||
return nil, nil
|
||||
}
|
||||
rv := make(index.FieldTerms, len(fields))
|
||||
func (i *IndexReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
|
||||
fieldsMap := make(map[uint16]string, len(fields))
|
||||
for _, f := range fields {
|
||||
id, ok := i.index.fieldCache.FieldNamed(f, false)
|
||||
|
@ -117,12 +109,34 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri
|
|||
fieldsMap[id] = f
|
||||
}
|
||||
}
|
||||
for _, entry := range back.termsEntries {
|
||||
if field, ok := fieldsMap[uint16(*entry.Field)]; ok {
|
||||
rv[field] = entry.Terms
|
||||
}
|
||||
|
||||
tempRow := BackIndexRow{
|
||||
doc: id,
|
||||
}
|
||||
return rv, nil
|
||||
|
||||
keyBuf := GetRowBuffer()
|
||||
if tempRow.KeySize() > len(keyBuf) {
|
||||
keyBuf = make([]byte, 2*tempRow.KeySize())
|
||||
}
|
||||
defer PutRowBuffer(keyBuf)
|
||||
keySize, err := tempRow.KeyTo(keyBuf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
value, err := i.kvreader.Get(keyBuf[:keySize])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return visitBackIndexRow(value, func(field uint32, term []byte) {
|
||||
if field, ok := fieldsMap[uint16(field)]; ok {
|
||||
visitor(field, term)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexReader) Fields() (fields []string, err error) {
|
||||
|
|
|
@ -881,3 +881,232 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
|
|||
rv.value = value[1:]
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
type backIndexFieldTermVisitor func(field uint32, term []byte)
|
||||
|
||||
// visitBackIndexRow is designed to process a protobuf encoded
|
||||
// value, without creating unnecessary garbage. Instead values are passed
|
||||
// to a callback, inspected first, and only copied if necessary.
|
||||
// Due to the fact that this borrows from generated code, it must be marnually
|
||||
// updated if the protobuf definition changes.
|
||||
//
|
||||
// This code originates from:
|
||||
// func (m *BackIndexRowValue) Unmarshal(data []byte) error
|
||||
// the sections which create garbage or parse unintersting sections
|
||||
// have been commented out. This was done by design to allow for easier
|
||||
// merging in the future if that original function is regenerated
|
||||
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
|
||||
}
|
||||
var msglen int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
msglen |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + msglen
|
||||
if msglen < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
// dont parse term entries
|
||||
// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
|
||||
// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
|
||||
// return err
|
||||
// }
|
||||
// instead, inspect them
|
||||
if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
|
||||
return err
|
||||
}
|
||||
iNdEx = postIndex
|
||||
case 2:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
|
||||
}
|
||||
var msglen int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
msglen |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + msglen
|
||||
if msglen < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
// don't parse stored entries
|
||||
// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
|
||||
// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
|
||||
// return err
|
||||
// }
|
||||
iNdEx = postIndex
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
// don't track unrecognized data
|
||||
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
|
||||
// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
|
||||
// Instead values are passed to a callback, inspected first, and only copied if
|
||||
// necessary. Due to the fact that this borrows from generated code, it must
|
||||
// be marnually updated if the protobuf definition changes.
|
||||
//
|
||||
// This code originates from:
|
||||
// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
|
||||
// the sections which create garbage or parse uninteresting sections
|
||||
// have been commented out. This was done by design to allow for easier
|
||||
// merging in the future if that original function is regenerated
|
||||
func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
|
||||
var theField uint32
|
||||
|
||||
var hasFields [1]uint64
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
|
||||
}
|
||||
var v uint32
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
v |= (uint32(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
// m.Field = &v
|
||||
theField = v
|
||||
hasFields[0] |= uint64(0x00000001)
|
||||
case 2:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
|
||||
}
|
||||
var stringLen uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
stringLen |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + int(stringLen)
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
//m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
|
||||
callback(theField, data[iNdEx:postIndex])
|
||||
iNdEx = postIndex
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
// if hasFields[0]&uint64(0x00000001) == 0 {
|
||||
// return new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
// }
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -361,3 +361,22 @@ func BenchmarkStoredRowDecode(b *testing.B) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVisitBackIndexRow(t *testing.T) {
|
||||
expected := map[uint32][]byte{
|
||||
0: []byte("beer"),
|
||||
1: []byte("beat"),
|
||||
}
|
||||
val := []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}
|
||||
err := visitBackIndexRow(val, func(field uint32, term []byte) {
|
||||
if reflect.DeepEqual(expected[field], term) {
|
||||
delete(expected, field)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(expected) > 0 {
|
||||
t.Errorf("expected visitor to see these but did not %v", expected)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1251,7 +1251,7 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestIndexDocumentFieldTerms(t *testing.T) {
|
||||
func TestIndexDocumentVisitFieldTerms(t *testing.T) {
|
||||
defer func() {
|
||||
err := DestroyTest()
|
||||
if err != nil {
|
||||
|
@ -1294,7 +1294,11 @@ func TestIndexDocumentFieldTerms(t *testing.T) {
|
|||
}
|
||||
}()
|
||||
|
||||
fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"), []string{"name", "title"})
|
||||
fieldTerms := make(index.FieldTerms)
|
||||
|
||||
err = indexReader.DocumentVisitFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}, func(field string, term []byte) {
|
||||
fieldTerms[field] = append(fieldTerms[field], string(term))
|
||||
})
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
|
|
@ -104,8 +104,8 @@ func (sr *stubReader) Document(id string) (*document.Document, error) {
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
|
||||
return nil, nil
|
||||
func (sr *stubReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) Fields() ([]string, error) {
|
||||
|
|
|
@ -114,12 +114,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
|||
default:
|
||||
}
|
||||
}
|
||||
if hc.facetsBuilder != nil {
|
||||
err = hc.facetsBuilder.Update(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
err = hc.collectSingle(searchContext, reader, next)
|
||||
if err != nil {
|
||||
|
@ -144,6 +138,13 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
|||
var sortByScoreOpt = []string{"_score"}
|
||||
|
||||
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
|
||||
|
||||
// visit field terms for features that require it (sort, facets)
|
||||
err := hc.visitFieldTerms(reader, d)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// increment total hits
|
||||
hc.total++
|
||||
d.HitNumber = hc.total
|
||||
|
@ -153,7 +154,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
|
|||
hc.maxScore = d.Score
|
||||
}
|
||||
|
||||
var err error
|
||||
// see if we need to load ID (at this early stage, for example to sort on it)
|
||||
if hc.needDocIds {
|
||||
d.ID, err = reader.ExternalID(d.IndexInternalID)
|
||||
|
@ -162,22 +162,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
|
|||
}
|
||||
}
|
||||
|
||||
// see if we need to load the stored fields
|
||||
if len(hc.neededFields) > 0 {
|
||||
// find out which fields haven't been loaded yet
|
||||
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
|
||||
// look them up
|
||||
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// cache these as well
|
||||
if d.CachedFieldTerms == nil {
|
||||
d.CachedFieldTerms = make(map[string][]string)
|
||||
}
|
||||
d.CachedFieldTerms.Merge(fieldTerms)
|
||||
}
|
||||
|
||||
// compute this hits sort value
|
||||
if len(hc.sort) == 1 && hc.cachedScoring[0] {
|
||||
d.Sort = sortByScoreOpt
|
||||
|
@ -215,9 +199,31 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
|
|||
return nil
|
||||
}
|
||||
|
||||
// visitFieldTerms is responsible for visiting the field terms of the
|
||||
// search hit, and passing visited terms to the sort and facet builder
|
||||
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.StartDoc()
|
||||
}
|
||||
|
||||
err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.UpdateVisitor(field, term)
|
||||
}
|
||||
hc.sort.UpdateVisitor(field, term)
|
||||
})
|
||||
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.EndDoc()
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// SetFacetsBuilder registers a facet builder for this collector
|
||||
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
hc.facetsBuilder = facetsBuilder
|
||||
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
|
||||
}
|
||||
|
||||
// finalizeResults starts with the heap containing the final top size+skip
|
||||
|
|
|
@ -18,7 +18,6 @@ import (
|
|||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
@ -35,6 +34,7 @@ type DateTimeFacetBuilder struct {
|
|||
total int
|
||||
missing int
|
||||
ranges map[string]*dateTimeRange
|
||||
sawValue bool
|
||||
}
|
||||
|
||||
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
|
||||
|
@ -58,36 +58,35 @@ func (fb *DateTimeFacetBuilder) Field() string {
|
|||
return fb.field
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) {
|
||||
terms, ok := ft[fb.field]
|
||||
if ok {
|
||||
for _, term := range terms {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
t := time.Unix(0, i64)
|
||||
func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
|
||||
if field == fb.field {
|
||||
fb.sawValue = true
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
t := time.Unix(0, i64)
|
||||
|
||||
// look at each of the ranges for a match
|
||||
for rangeName, r := range fb.ranges {
|
||||
|
||||
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
|
||||
|
||||
existingCount, existed := fb.termsCount[rangeName]
|
||||
if existed {
|
||||
fb.termsCount[rangeName] = existingCount + 1
|
||||
} else {
|
||||
fb.termsCount[rangeName] = 1
|
||||
}
|
||||
fb.total++
|
||||
}
|
||||
// look at each of the ranges for a match
|
||||
for rangeName, r := range fb.ranges {
|
||||
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
|
||||
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
|
||||
fb.total++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) StartDoc() {
|
||||
fb.sawValue = false
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) EndDoc() {
|
||||
if !fb.sawValue {
|
||||
fb.missing++
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package facet
|
|||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
@ -34,6 +33,7 @@ type NumericFacetBuilder struct {
|
|||
total int
|
||||
missing int
|
||||
ranges map[string]*numericRange
|
||||
sawValue bool
|
||||
}
|
||||
|
||||
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
|
||||
|
@ -57,36 +57,35 @@ func (fb *NumericFacetBuilder) Field() string {
|
|||
return fb.field
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) {
|
||||
terms, ok := ft[fb.field]
|
||||
if ok {
|
||||
for _, term := range terms {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
f64 := numeric.Int64ToFloat64(i64)
|
||||
func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
|
||||
if field == fb.field {
|
||||
fb.sawValue = true
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
f64 := numeric.Int64ToFloat64(i64)
|
||||
|
||||
// look at each of the ranges for a match
|
||||
for rangeName, r := range fb.ranges {
|
||||
|
||||
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
|
||||
|
||||
existingCount, existed := fb.termsCount[rangeName]
|
||||
if existed {
|
||||
fb.termsCount[rangeName] = existingCount + 1
|
||||
} else {
|
||||
fb.termsCount[rangeName] = 1
|
||||
}
|
||||
fb.total++
|
||||
}
|
||||
// look at each of the ranges for a match
|
||||
for rangeName, r := range fb.ranges {
|
||||
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
|
||||
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
|
||||
fb.total++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) StartDoc() {
|
||||
fb.sawValue = false
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) EndDoc() {
|
||||
if !fb.sawValue {
|
||||
fb.missing++
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ import (
|
|||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
)
|
||||
|
||||
|
@ -52,7 +51,9 @@ func numericFacetN(b *testing.B, numTerms int) {
|
|||
nfb.AddRange("rangename"+strconv.Itoa(i), &min, &max)
|
||||
|
||||
for _, pv := range pcodedvalues {
|
||||
nfb.Update(index.FieldTerms{field: []string{string(pv)}})
|
||||
nfb.StartDoc()
|
||||
nfb.UpdateVisitor(field, pv)
|
||||
nfb.EndDoc()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@ package facet
|
|||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
|
@ -27,6 +26,7 @@ type TermsFacetBuilder struct {
|
|||
termsCount map[string]int
|
||||
total int
|
||||
missing int
|
||||
sawValue bool
|
||||
}
|
||||
|
||||
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
|
||||
|
@ -41,19 +41,20 @@ func (fb *TermsFacetBuilder) Field() string {
|
|||
return fb.field
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) {
|
||||
terms, ok := ft[fb.field]
|
||||
if ok {
|
||||
for _, term := range terms {
|
||||
existingCount, existed := fb.termsCount[term]
|
||||
if existed {
|
||||
fb.termsCount[term] = existingCount + 1
|
||||
} else {
|
||||
fb.termsCount[term] = 1
|
||||
}
|
||||
fb.total++
|
||||
}
|
||||
} else {
|
||||
func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
|
||||
if field == fb.field {
|
||||
fb.sawValue = true
|
||||
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
|
||||
fb.total++
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) StartDoc() {
|
||||
fb.sawValue = false
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) EndDoc() {
|
||||
if !fb.sawValue {
|
||||
fb.missing++
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,8 +18,6 @@ import (
|
|||
"io/ioutil"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
var terms []string
|
||||
|
@ -61,7 +59,9 @@ func termsFacetN(b *testing.B, numTerms int) {
|
|||
for len(tfb.termsCount) < numTerms && i <= termsLen {
|
||||
j := i % termsLen
|
||||
term := terms[j]
|
||||
tfb.Update(index.FieldTerms{field: []string{term}})
|
||||
tfb.StartDoc()
|
||||
tfb.UpdateVisitor(field, []byte(term))
|
||||
tfb.EndDoc()
|
||||
i++
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,10 @@ import (
|
|||
)
|
||||
|
||||
type FacetBuilder interface {
|
||||
Update(index.FieldTerms)
|
||||
StartDoc()
|
||||
UpdateVisitor(field string, term []byte)
|
||||
EndDoc()
|
||||
|
||||
Result() *FacetResult
|
||||
Field() string
|
||||
}
|
||||
|
@ -41,33 +44,29 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
|
|||
|
||||
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
|
||||
fb.facets[name] = facetBuilder
|
||||
fb.fields = append(fb.fields, facetBuilder.Field())
|
||||
}
|
||||
|
||||
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
|
||||
if fb.fields == nil {
|
||||
for _, facetBuilder := range fb.facets {
|
||||
fb.fields = append(fb.fields, facetBuilder.Field())
|
||||
}
|
||||
}
|
||||
func (fb *FacetsBuilder) RequiredFields() []string {
|
||||
return fb.fields
|
||||
}
|
||||
|
||||
if len(fb.fields) > 0 {
|
||||
// find out which fields haven't been loaded yet
|
||||
fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fb.fields)
|
||||
// look them up
|
||||
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// cache these as well
|
||||
if docMatch.CachedFieldTerms == nil {
|
||||
docMatch.CachedFieldTerms = make(map[string][]string)
|
||||
}
|
||||
docMatch.CachedFieldTerms.Merge(fieldTerms)
|
||||
}
|
||||
func (fb *FacetsBuilder) StartDoc() {
|
||||
for _, facetBuilder := range fb.facets {
|
||||
facetBuilder.Update(docMatch.CachedFieldTerms)
|
||||
facetBuilder.StartDoc()
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *FacetsBuilder) EndDoc() {
|
||||
for _, facetBuilder := range fb.facets {
|
||||
facetBuilder.EndDoc()
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
|
||||
for _, facetBuilder := range fb.facets {
|
||||
facetBuilder.UpdateVisitor(field, term)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type TermFacet struct {
|
||||
|
|
|
@ -69,10 +69,6 @@ type DocumentMatch struct {
|
|||
// fields as float64s and date fields as time.RFC3339 formatted strings.
|
||||
Fields map[string]interface{} `json:"fields,omitempty"`
|
||||
|
||||
// as we learn field terms, we can cache important ones for later use
|
||||
// for example, sorting and building facets need these values
|
||||
CachedFieldTerms index.FieldTerms `json:"-"`
|
||||
|
||||
// if we load the document for this hit, remember it so we dont load again
|
||||
Document *document.Document `json:"-"`
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
|
|||
var LowTerm = string([]byte{0x00})
|
||||
|
||||
type SearchSort interface {
|
||||
UpdateVisitor(field string, term []byte)
|
||||
Value(a *DocumentMatch) string
|
||||
Descending() bool
|
||||
|
||||
|
@ -171,6 +172,12 @@ func (so SortOrder) Value(doc *DocumentMatch) {
|
|||
}
|
||||
}
|
||||
|
||||
func (so SortOrder) UpdateVisitor(field string, term []byte) {
|
||||
for _, soi := range so {
|
||||
soi.UpdateVisitor(field, term)
|
||||
}
|
||||
}
|
||||
|
||||
// Compare will compare two document matches using the specified sort order
|
||||
// if both are numbers, we avoid converting back to term
|
||||
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
|
||||
|
@ -300,13 +307,24 @@ type SortField struct {
|
|||
Type SortFieldType
|
||||
Mode SortFieldMode
|
||||
Missing SortFieldMissing
|
||||
values []string
|
||||
}
|
||||
|
||||
// UpdateVisitor notifies this sort field that in this document
|
||||
// this field has the specified term
|
||||
func (s *SortField) UpdateVisitor(field string, term []byte) {
|
||||
if field == s.Field {
|
||||
s.values = append(s.values, string(term))
|
||||
}
|
||||
}
|
||||
|
||||
// Value returns the sort value of the DocumentMatch
|
||||
// it also resets the state of this SortField for
|
||||
// processing the next document
|
||||
func (s *SortField) Value(i *DocumentMatch) string {
|
||||
iTerms := i.CachedFieldTerms[s.Field]
|
||||
iTerms = s.filterTermsByType(iTerms)
|
||||
iTerms := s.filterTermsByType(s.values)
|
||||
iTerm := s.filterTermsByMode(iTerms)
|
||||
s.values = nil
|
||||
return iTerm
|
||||
}
|
||||
|
||||
|
@ -435,6 +453,12 @@ type SortDocID struct {
|
|||
Desc bool
|
||||
}
|
||||
|
||||
// UpdateVisitor is a no-op for SortDocID as it's value
|
||||
// is not dependent on any field terms
|
||||
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
|
||||
|
||||
}
|
||||
|
||||
// Value returns the sort value of the DocumentMatch
|
||||
func (s *SortDocID) Value(i *DocumentMatch) string {
|
||||
return i.ID
|
||||
|
@ -466,6 +490,12 @@ type SortScore struct {
|
|||
Desc bool
|
||||
}
|
||||
|
||||
// UpdateVisitor is a no-op for SortScore as it's value
|
||||
// is not dependent on any field terms
|
||||
func (s *SortScore) UpdateVisitor(field string, term []byte) {
|
||||
|
||||
}
|
||||
|
||||
// Value returns the sort value of the DocumentMatch
|
||||
func (s *SortScore) Value(i *DocumentMatch) string {
|
||||
return "_score"
|
||||
|
|
Loading…
Reference in New Issue