reduce garbage created while processing facets

previously we parsed/returned large sections of the documents back index row in order to compute facet information. this would require parsing the protobuf of the entire back index row. unfortunately this creates considerable garbage. this new version introduces a visitor/callback approach to working with data inside the back index row. the benefit of this approach is that we can let the higher-level code see values, prior to any copies of data being made or intermediate garbage being created. implementations of the callback must copy any value which they would like to retain beyond the callback. NOTE: this approach is duplicates code from the automatically generated protobuf code NOTE: this approach assumes that the "field" field be serialized before the "terms" field. This is guaranteed by our currently generated protobuf encoder, and is recommended by the protobuf spec. But, decoders SHOULD support them occuring in any order, which we do not.
2017-03-02 16:19:08 -05:00 · 2017-03-02 16:19:08 -05:00 · 0eba2a3f0c
parent b04745abcc
commit 0eba2a3f0c
15 changed files with 441 additions and 142 deletions
--- a/index/index.go
+++ b/index/index.go
@ -48,6 +48,8 @@ type Index interface {
 	Advanced() (store.KVStore, error)
 }

+type DocumentFieldTermVisitor func(field string, term []byte)
+
 type IndexReader interface {
 	TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)

@ -64,7 +66,7 @@ type IndexReader interface {
 	FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)

 	Document(id string) (*document.Document, error)
-	DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error)
+	DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error

 	Fields() ([]string, error)

--- a/index/upsidedown/index_reader.go
+++ b/index/upsidedown/index_reader.go
@ -101,15 +101,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
 	return
 }

-func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
-	back, err := backIndexRowForDoc(i.kvreader, id)
-	if err != nil {
-		return nil, err
-	}
-	if back == nil {
-		return nil, nil
-	}
-	rv := make(index.FieldTerms, len(fields))
+func (i *IndexReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
 	fieldsMap := make(map[uint16]string, len(fields))
 	for _, f := range fields {
 		id, ok := i.index.fieldCache.FieldNamed(f, false)
@ -117,12 +109,34 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri
 			fieldsMap[id] = f
 		}
 	}
-	for _, entry := range back.termsEntries {
-		if field, ok := fieldsMap[uint16(*entry.Field)]; ok {
-			rv[field] = entry.Terms
-		}
+
+	tempRow := BackIndexRow{
+		doc: id,
 	}
-	return rv, nil
+
+	keyBuf := GetRowBuffer()
+	if tempRow.KeySize() > len(keyBuf) {
+		keyBuf = make([]byte, 2*tempRow.KeySize())
+	}
+	defer PutRowBuffer(keyBuf)
+	keySize, err := tempRow.KeyTo(keyBuf)
+	if err != nil {
+		return err
+	}
+
+	value, err := i.kvreader.Get(keyBuf[:keySize])
+	if err != nil {
+		return err
+	}
+	if value == nil {
+		return nil
+	}
+
+	return visitBackIndexRow(value, func(field uint32, term []byte) {
+		if field, ok := fieldsMap[uint16(field)]; ok {
+			visitor(field, term)
+		}
+	})
 }

 func (i *IndexReader) Fields() (fields []string, err error) {
--- a/index/upsidedown/row.go
+++ b/index/upsidedown/row.go
@ -881,3 +881,232 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
 	rv.value = value[1:]
 	return rv, nil
 }
+
+type backIndexFieldTermVisitor func(field uint32, term []byte)
+
+// visitBackIndexRow is designed to process a protobuf encoded
+// value, without creating unnecessary garbage.  Instead values are passed
+// to a callback, inspected first, and only copied if necessary.
+// Due to the fact that this borrows from generated code, it must be marnually
+// updated if the protobuf definition changes.
+//
+// This code originates from:
+// func (m *BackIndexRowValue) Unmarshal(data []byte) error
+// the sections which create garbage or parse unintersting sections
+// have been commented out.  This was done by design to allow for easier
+// merging in the future if that original function is regenerated
+func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
+	l := len(data)
+	iNdEx := 0
+	for iNdEx < l {
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := data[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := data[iNdEx]
+				iNdEx++
+				msglen |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			postIndex := iNdEx + msglen
+			if msglen < 0 {
+				return ErrInvalidLengthUpsidedown
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			// dont parse term entries
+			// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
+			// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
+			// 	return err
+			// }
+			// instead, inspect them
+			if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := data[iNdEx]
+				iNdEx++
+				msglen |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			postIndex := iNdEx + msglen
+			if msglen < 0 {
+				return ErrInvalidLengthUpsidedown
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			// don't parse stored entries
+			// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
+			// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
+			// 	return err
+			// }
+			iNdEx = postIndex
+		default:
+			var sizeOfWire int
+			for {
+				sizeOfWire++
+				wire >>= 7
+				if wire == 0 {
+					break
+				}
+			}
+			iNdEx -= sizeOfWire
+			skippy, err := skipUpsidedown(data[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthUpsidedown
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			// don't track unrecognized data
+			//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	return nil
+}
+
+// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
+// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
+// Instead values are passed to a callback, inspected first, and only copied if
+// necessary.  Due to the fact that this borrows from generated code, it must
+// be marnually updated if the protobuf definition changes.
+//
+// This code originates from:
+// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
+// the sections which create garbage or parse uninteresting sections
+// have been commented out.  This was done by design to allow for easier
+// merging in the future if that original function is regenerated
+func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
+	var theField uint32
+
+	var hasFields [1]uint64
+	l := len(data)
+	iNdEx := 0
+	for iNdEx < l {
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := data[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
+			}
+			var v uint32
+			for shift := uint(0); ; shift += 7 {
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := data[iNdEx]
+				iNdEx++
+				v |= (uint32(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			// m.Field = &v
+			theField = v
+			hasFields[0] |= uint64(0x00000001)
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := data[iNdEx]
+				iNdEx++
+				stringLen |= (uint64(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			postIndex := iNdEx + int(stringLen)
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			//m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
+			callback(theField, data[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			var sizeOfWire int
+			for {
+				sizeOfWire++
+				wire >>= 7
+				if wire == 0 {
+					break
+				}
+			}
+			iNdEx -= sizeOfWire
+			skippy, err := skipUpsidedown(data[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthUpsidedown
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+	// if hasFields[0]&uint64(0x00000001) == 0 {
+	// 	return new(github_com_golang_protobuf_proto.RequiredNotSetError)
+	// }
+
+	return nil
+}
--- a/index/upsidedown/row_test.go
+++ b/index/upsidedown/row_test.go
@ -361,3 +361,22 @@ func BenchmarkStoredRowDecode(b *testing.B) {
 		}
 	}
 }
+
+func TestVisitBackIndexRow(t *testing.T) {
+	expected := map[uint32][]byte{
+		0: []byte("beer"),
+		1: []byte("beat"),
+	}
+	val := []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}
+	err := visitBackIndexRow(val, func(field uint32, term []byte) {
+		if reflect.DeepEqual(expected[field], term) {
+			delete(expected, field)
+		}
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(expected) > 0 {
+		t.Errorf("expected visitor to see these but did not %v", expected)
+	}
+}
--- a/index/upsidedown/upsidedown_test.go
+++ b/index/upsidedown/upsidedown_test.go
@ -1251,7 +1251,7 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
 	}
 }

-func TestIndexDocumentFieldTerms(t *testing.T) {
+func TestIndexDocumentVisitFieldTerms(t *testing.T) {
 	defer func() {
 		err := DestroyTest()
 		if err != nil {
@ -1294,7 +1294,11 @@ func TestIndexDocumentFieldTerms(t *testing.T) {
 		}
 	}()

-	fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"), []string{"name", "title"})
+	fieldTerms := make(index.FieldTerms)
+
+	err = indexReader.DocumentVisitFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}, func(field string, term []byte) {
+		fieldTerms[field] = append(fieldTerms[field], string(term))
+	})
 	if err != nil {
 		t.Error(err)
 	}
--- a/search/collector/search_test.go
+++ b/search/collector/search_test.go
@ -104,8 +104,8 @@ func (sr *stubReader) Document(id string) (*document.Document, error) {
 	return nil, nil
 }

-func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
-	return nil, nil
+func (sr *stubReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
+	return nil
 }

 func (sr *stubReader) Fields() ([]string, error) {
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@ -114,12 +114,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 			default:
 			}
 		}
-		if hc.facetsBuilder != nil {
-			err = hc.facetsBuilder.Update(next)
-			if err != nil {
-				break
-			}
-		}

 		err = hc.collectSingle(searchContext, reader, next)
 		if err != nil {
@ -144,6 +138,13 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 var sortByScoreOpt = []string{"_score"}

 func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
+
+	// visit field terms for features that require it (sort, facets)
+	err := hc.visitFieldTerms(reader, d)
+	if err != nil {
+		return err
+	}
+
 	// increment total hits
 	hc.total++
 	d.HitNumber = hc.total
@ -153,7 +154,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
 		hc.maxScore = d.Score
 	}

-	var err error
 	// see if we need to load ID (at this early stage, for example to sort on it)
 	if hc.needDocIds {
 		d.ID, err = reader.ExternalID(d.IndexInternalID)
@ -162,22 +162,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
 		}
 	}

-	// see if we need to load the stored fields
-	if len(hc.neededFields) > 0 {
-		// find out which fields haven't been loaded yet
-		fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
-		// look them up
-		fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
-		if err != nil {
-			return err
-		}
-		// cache these as well
-		if d.CachedFieldTerms == nil {
-			d.CachedFieldTerms = make(map[string][]string)
-		}
-		d.CachedFieldTerms.Merge(fieldTerms)
-	}
-
 	// compute this hits sort value
 	if len(hc.sort) == 1 && hc.cachedScoring[0] {
 		d.Sort = sortByScoreOpt
@ -215,9 +199,31 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
 	return nil
 }

+// visitFieldTerms is responsible for visiting the field terms of the
+// search hit, and passing visited terms to the sort and facet builder
+func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
+	if hc.facetsBuilder != nil {
+		hc.facetsBuilder.StartDoc()
+	}
+
+	err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
+		if hc.facetsBuilder != nil {
+			hc.facetsBuilder.UpdateVisitor(field, term)
+		}
+		hc.sort.UpdateVisitor(field, term)
+	})
+
+	if hc.facetsBuilder != nil {
+		hc.facetsBuilder.EndDoc()
+	}
+
+	return err
+}
+
 // SetFacetsBuilder registers a facet builder for this collector
 func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
 	hc.facetsBuilder = facetsBuilder
+	hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
 }

 // finalizeResults starts with the heap containing the final top size+skip
--- a/search/facet/facet_builder_datetime.go
+++ b/search/facet/facet_builder_datetime.go
@ -18,7 +18,6 @@ import (
 	"sort"
 	"time"

-	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/numeric"
 	"github.com/blevesearch/bleve/search"
 )
@ -35,6 +34,7 @@ type DateTimeFacetBuilder struct {
 	total      int
 	missing    int
 	ranges     map[string]*dateTimeRange
+	sawValue   bool
 }

 func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
@ -58,36 +58,35 @@ func (fb *DateTimeFacetBuilder) Field() string {
 	return fb.field
 }

-func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) {
-	terms, ok := ft[fb.field]
-	if ok {
-		for _, term := range terms {
-			// only consider the values which are shifted 0
-			prefixCoded := numeric.PrefixCoded(term)
-			shift, err := prefixCoded.Shift()
-			if err == nil && shift == 0 {
-				i64, err := prefixCoded.Int64()
-				if err == nil {
-					t := time.Unix(0, i64)
+func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
+	if field == fb.field {
+		fb.sawValue = true
+		// only consider the values which are shifted 0
+		prefixCoded := numeric.PrefixCoded(term)
+		shift, err := prefixCoded.Shift()
+		if err == nil && shift == 0 {
+			i64, err := prefixCoded.Int64()
+			if err == nil {
+				t := time.Unix(0, i64)

-					// look at each of the ranges for a match
-					for rangeName, r := range fb.ranges {
-
-						if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
-
-							existingCount, existed := fb.termsCount[rangeName]
-							if existed {
-								fb.termsCount[rangeName] = existingCount + 1
-							} else {
-								fb.termsCount[rangeName] = 1
-							}
-							fb.total++
-						}
+				// look at each of the ranges for a match
+				for rangeName, r := range fb.ranges {
+					if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
+						fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
+						fb.total++
 					}
 				}
 			}
 		}
-	} else {
+	}
+}
+
+func (fb *DateTimeFacetBuilder) StartDoc() {
+	fb.sawValue = false
+}
+
+func (fb *DateTimeFacetBuilder) EndDoc() {
+	if !fb.sawValue {
 		fb.missing++
 	}
 }
--- a/search/facet/facet_builder_numeric.go
+++ b/search/facet/facet_builder_numeric.go
@ -17,7 +17,6 @@ package facet
 import (
 	"sort"

-	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/numeric"
 	"github.com/blevesearch/bleve/search"
 )
@ -34,6 +33,7 @@ type NumericFacetBuilder struct {
 	total      int
 	missing    int
 	ranges     map[string]*numericRange
+	sawValue   bool
 }

 func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
@ -57,36 +57,35 @@ func (fb *NumericFacetBuilder) Field() string {
 	return fb.field
 }

-func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) {
-	terms, ok := ft[fb.field]
-	if ok {
-		for _, term := range terms {
-			// only consider the values which are shifted 0
-			prefixCoded := numeric.PrefixCoded(term)
-			shift, err := prefixCoded.Shift()
-			if err == nil && shift == 0 {
-				i64, err := prefixCoded.Int64()
-				if err == nil {
-					f64 := numeric.Int64ToFloat64(i64)
+func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
+	if field == fb.field {
+		fb.sawValue = true
+		// only consider the values which are shifted 0
+		prefixCoded := numeric.PrefixCoded(term)
+		shift, err := prefixCoded.Shift()
+		if err == nil && shift == 0 {
+			i64, err := prefixCoded.Int64()
+			if err == nil {
+				f64 := numeric.Int64ToFloat64(i64)

-					// look at each of the ranges for a match
-					for rangeName, r := range fb.ranges {
-
-						if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
-
-							existingCount, existed := fb.termsCount[rangeName]
-							if existed {
-								fb.termsCount[rangeName] = existingCount + 1
-							} else {
-								fb.termsCount[rangeName] = 1
-							}
-							fb.total++
-						}
+				// look at each of the ranges for a match
+				for rangeName, r := range fb.ranges {
+					if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
+						fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
+						fb.total++
 					}
 				}
 			}
 		}
-	} else {
+	}
+}
+
+func (fb *NumericFacetBuilder) StartDoc() {
+	fb.sawValue = false
+}
+
+func (fb *NumericFacetBuilder) EndDoc() {
+	if !fb.sawValue {
 		fb.missing++
 	}
 }
--- a/search/facet/facet_builder_numeric_test.go
+++ b/search/facet/facet_builder_numeric_test.go
@ -18,7 +18,6 @@ import (
 	"strconv"
 	"testing"

-	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/numeric"
 )

@ -52,7 +51,9 @@ func numericFacetN(b *testing.B, numTerms int) {
 		nfb.AddRange("rangename"+strconv.Itoa(i), &min, &max)

 		for _, pv := range pcodedvalues {
-			nfb.Update(index.FieldTerms{field: []string{string(pv)}})
+			nfb.StartDoc()
+			nfb.UpdateVisitor(field, pv)
+			nfb.EndDoc()
 		}
 	}

--- a/search/facet/facet_builder_terms.go
+++ b/search/facet/facet_builder_terms.go
@ -17,7 +17,6 @@ package facet
 import (
 	"sort"

-	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )

@ -27,6 +26,7 @@ type TermsFacetBuilder struct {
 	termsCount map[string]int
 	total      int
 	missing    int
+	sawValue   bool
 }

 func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
@ -41,19 +41,20 @@ func (fb *TermsFacetBuilder) Field() string {
 	return fb.field
 }

-func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) {
-	terms, ok := ft[fb.field]
-	if ok {
-		for _, term := range terms {
-			existingCount, existed := fb.termsCount[term]
-			if existed {
-				fb.termsCount[term] = existingCount + 1
-			} else {
-				fb.termsCount[term] = 1
-			}
-			fb.total++
-		}
-	} else {
+func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
+	if field == fb.field {
+		fb.sawValue = true
+		fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
+		fb.total++
+	}
+}
+
+func (fb *TermsFacetBuilder) StartDoc() {
+	fb.sawValue = false
+}
+
+func (fb *TermsFacetBuilder) EndDoc() {
+	if !fb.sawValue {
 		fb.missing++
 	}
 }
--- a/search/facet/facet_builder_terms_test.go
+++ b/search/facet/facet_builder_terms_test.go
@ -18,8 +18,6 @@ import (
 	"io/ioutil"
 	"regexp"
 	"testing"
-
-	"github.com/blevesearch/bleve/index"
 )

 var terms []string
@ -61,7 +59,9 @@ func termsFacetN(b *testing.B, numTerms int) {
 	for len(tfb.termsCount) < numTerms && i <= termsLen {
 		j := i % termsLen
 		term := terms[j]
-		tfb.Update(index.FieldTerms{field: []string{term}})
+		tfb.StartDoc()
+		tfb.UpdateVisitor(field, []byte(term))
+		tfb.EndDoc()
 		i++
 	}

--- a/search/facets_builder.go
+++ b/search/facets_builder.go
@ -21,7 +21,10 @@ import (
 )

 type FacetBuilder interface {
-	Update(index.FieldTerms)
+	StartDoc()
+	UpdateVisitor(field string, term []byte)
+	EndDoc()
+
 	Result() *FacetResult
 	Field() string
 }
@ -41,33 +44,29 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {

 func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
 	fb.facets[name] = facetBuilder
+	fb.fields = append(fb.fields, facetBuilder.Field())
 }

-func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
-	if fb.fields == nil {
-		for _, facetBuilder := range fb.facets {
-			fb.fields = append(fb.fields, facetBuilder.Field())
-		}
-	}
+func (fb *FacetsBuilder) RequiredFields() []string {
+	return fb.fields
+}

-	if len(fb.fields) > 0 {
-		// find out which fields haven't been loaded yet
-		fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fb.fields)
-		// look them up
-		fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad)
-		if err != nil {
-			return err
-		}
-		// cache these as well
-		if docMatch.CachedFieldTerms == nil {
-			docMatch.CachedFieldTerms = make(map[string][]string)
-		}
-		docMatch.CachedFieldTerms.Merge(fieldTerms)
-	}
+func (fb *FacetsBuilder) StartDoc() {
 	for _, facetBuilder := range fb.facets {
-		facetBuilder.Update(docMatch.CachedFieldTerms)
+		facetBuilder.StartDoc()
+	}
+}
+
+func (fb *FacetsBuilder) EndDoc() {
+	for _, facetBuilder := range fb.facets {
+		facetBuilder.EndDoc()
+	}
+}
+
+func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
+	for _, facetBuilder := range fb.facets {
+		facetBuilder.UpdateVisitor(field, term)
 	}
-	return nil
 }

 type TermFacet struct {
--- a/search/search.go
+++ b/search/search.go
@ -69,10 +69,6 @@ type DocumentMatch struct {
 	// fields as float64s and date fields as time.RFC3339 formatted strings.
 	Fields map[string]interface{} `json:"fields,omitempty"`

-	// as we learn field terms, we can cache important ones for later use
-	// for example, sorting and building facets need these values
-	CachedFieldTerms index.FieldTerms `json:"-"`
-
 	// if we load the document for this hit, remember it so we dont load again
 	Document *document.Document `json:"-"`

--- a/search/sort.go
+++ b/search/sort.go
@ -27,6 +27,7 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
 var LowTerm = string([]byte{0x00})

 type SearchSort interface {
+	UpdateVisitor(field string, term []byte)
 	Value(a *DocumentMatch) string
 	Descending() bool

@ -171,6 +172,12 @@ func (so SortOrder) Value(doc *DocumentMatch) {
 	}
 }

+func (so SortOrder) UpdateVisitor(field string, term []byte) {
+	for _, soi := range so {
+		soi.UpdateVisitor(field, term)
+	}
+}
+
 // Compare will compare two document matches using the specified sort order
 // if both are numbers, we avoid converting back to term
 func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
@ -300,13 +307,24 @@ type SortField struct {
 	Type    SortFieldType
 	Mode    SortFieldMode
 	Missing SortFieldMissing
+	values  []string
+}
+
+// UpdateVisitor notifies this sort field that in this document
+// this field has the specified term
+func (s *SortField) UpdateVisitor(field string, term []byte) {
+	if field == s.Field {
+		s.values = append(s.values, string(term))
+	}
 }

 // Value returns the sort value of the DocumentMatch
+// it also resets the state of this SortField for
+// processing the next document
 func (s *SortField) Value(i *DocumentMatch) string {
-	iTerms := i.CachedFieldTerms[s.Field]
-	iTerms = s.filterTermsByType(iTerms)
+	iTerms := s.filterTermsByType(s.values)
 	iTerm := s.filterTermsByMode(iTerms)
+	s.values = nil
 	return iTerm
 }

@ -435,6 +453,12 @@ type SortDocID struct {
 	Desc bool
 }

+// UpdateVisitor is a no-op for SortDocID as it's value
+// is not dependent on any field terms
+func (s *SortDocID) UpdateVisitor(field string, term []byte) {
+
+}
+
 // Value returns the sort value of the DocumentMatch
 func (s *SortDocID) Value(i *DocumentMatch) string {
 	return i.ID
@ -466,6 +490,12 @@ type SortScore struct {
 	Desc bool
 }

+// UpdateVisitor is a no-op for SortScore as it's value
+// is not dependent on any field terms
+func (s *SortScore) UpdateVisitor(field string, term []byte) {
+
+}
+
 // Value returns the sort value of the DocumentMatch
 func (s *SortScore) Value(i *DocumentMatch) string {
 	return "_score"