From 8389027ae8dcc681f804efe8b2a3290adb352442 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 6 Jan 2016 15:32:09 -0800 Subject: [PATCH 01/11] firestorm TermFreqRow.ParseKey() func --- index/firestorm/termfreq.go | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/index/firestorm/termfreq.go b/index/firestorm/termfreq.go index 1d36ec26..6ba6078d 100644 --- a/index/firestorm/termfreq.go +++ b/index/firestorm/termfreq.go @@ -62,32 +62,39 @@ func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte, func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) { rv := TermFreqRow{} + err := rv.ParseKey(key) + if err != nil { + return nil, err + } + err = rv.value.Unmarshal(value) + if err != nil { + return nil, err + } + return &rv, nil +} + +func (tfr *TermFreqRow) ParseKey(key []byte) error { keyLen := len(key) if keyLen < 3 { - return nil, fmt.Errorf("invalid term frequency key, no valid field") + return fmt.Errorf("invalid term frequency key, no valid field") } - rv.field = binary.LittleEndian.Uint16(key[1:3]) + tfr.field = binary.LittleEndian.Uint16(key[1:3]) termStartPos := 3 termEndPos := bytes.IndexByte(key[termStartPos:], ByteSeparator) if termEndPos < 0 { - return nil, fmt.Errorf("invalid term frequency key, no byte separator terminating term") + return fmt.Errorf("invalid term frequency key, no byte separator terminating term") } - rv.term = key[termStartPos : termStartPos+termEndPos] + tfr.term = key[termStartPos : termStartPos+termEndPos] docStartPos := termStartPos + termEndPos + 1 docEndPos := bytes.IndexByte(key[docStartPos:], ByteSeparator) - rv.docID = key[docStartPos : docStartPos+docEndPos] + tfr.docID = key[docStartPos : docStartPos+docEndPos] docNumPos := docStartPos + docEndPos + 1 - rv.docNum, _ = binary.Uvarint(key[docNumPos:]) + tfr.docNum, _ = binary.Uvarint(key[docNumPos:]) - err := rv.value.Unmarshal(value) - if err != nil { - return nil, err - } - - return &rv, nil + return nil } func (tfr *TermFreqRow) KeySize() int { From 009d59222a6446a8e79aca6d5f326896371198d1 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 6 Jan 2016 15:46:26 -0800 Subject: [PATCH 02/11] firestorm StoredRow.ParseKey() func --- index/firestorm/stored.go | 66 +++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/index/firestorm/stored.go b/index/firestorm/stored.go index ad7c8519..a8a5917e 100644 --- a/index/firestorm/stored.go +++ b/index/firestorm/stored.go @@ -41,46 +41,52 @@ func NewStoredRow(docID []byte, docNum uint64, field uint16, arrayPositions []ui func NewStoredRowKV(key, value []byte) (*StoredRow, error) { rv := StoredRow{} - - buf := bytes.NewBuffer(key) - _, err := buf.ReadByte() // type + err := rv.ParseKey(key) if err != nil { return nil, err } - - rv.docID, err = buf.ReadBytes(ByteSeparator) - if len(rv.docID) < 2 { // 1 for min doc id length, 1 for separator - err = fmt.Errorf("invalid doc length 0") - return nil, err - } - - rv.docID = rv.docID[:len(rv.docID)-1] // trim off separator byte - - rv.docNum, err = binary.ReadUvarint(buf) - if err != nil { - return nil, err - } - - err = binary.Read(buf, binary.LittleEndian, &rv.field) - if err != nil { - return nil, err - } - - rv.arrayPositions = make([]uint64, 0) - nextArrayPos, err := binary.ReadUvarint(buf) - for err == nil { - rv.arrayPositions = append(rv.arrayPositions, nextArrayPos) - nextArrayPos, err = binary.ReadUvarint(buf) - } - err = rv.value.Unmarshal(value) if err != nil { return nil, err } - return &rv, nil } +func (sr *StoredRow) ParseKey(key []byte) error { + buf := bytes.NewBuffer(key) + _, err := buf.ReadByte() // type + if err != nil { + return err + } + + sr.docID, err = buf.ReadBytes(ByteSeparator) + if len(sr.docID) < 2 { // 1 for min doc id length, 1 for separator + err = fmt.Errorf("invalid doc length 0") + return err + } + + sr.docID = sr.docID[:len(sr.docID)-1] // trim off separator byte + + sr.docNum, err = binary.ReadUvarint(buf) + if err != nil { + return err + } + + err = binary.Read(buf, binary.LittleEndian, &sr.field) + if err != nil { + return err + } + + sr.arrayPositions = make([]uint64, 0) + nextArrayPos, err := binary.ReadUvarint(buf) + for err == nil { + sr.arrayPositions = append(sr.arrayPositions, nextArrayPos) + nextArrayPos, err = binary.ReadUvarint(buf) + } + + return nil +} + func (sr *StoredRow) KeySize() int { return 1 + len(sr.docID) + 1 + binary.MaxVarintLen64 + 2 + (binary.MaxVarintLen64 * len(sr.arrayPositions)) } From 7df07f94fa102969252f6ca085cae52468f85315 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 6 Jan 2016 15:53:12 -0800 Subject: [PATCH 03/11] firestorm use the ParseKey() funcs to avoid unneeded value parsing With this change, the row allocation also happens only once per loop, instead of once per item. --- index/firestorm/garbage.go | 6 ++++-- index/firestorm/lookup.go | 3 ++- index/firestorm/warmup.go | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/index/firestorm/garbage.go b/index/firestorm/garbage.go index d70d5036..7a2abe40 100644 --- a/index/firestorm/garbage.go +++ b/index/firestorm/garbage.go @@ -135,9 +135,10 @@ func (gc *GarbageCollector) cleanup() { termFreqStart := TermFreqIteratorStart(0, []byte{ByteSeparator}) termFreqEnd := TermFreqIteratorStart(math.MaxUint16, []byte{ByteSeparator}) + var tfr TermFreqRow dictionaryDeltas := make(map[string]int64) err = visitRange(reader, termFreqStart, termFreqEnd, func(key, val []byte) (bool, error) { - tfr, err := NewTermFreqRowKV(key, val) + err := tfr.ParseKey(key) if err != nil { return false, err } @@ -158,8 +159,9 @@ func (gc *GarbageCollector) cleanup() { } // walk all the stored rows + var sr StoredRow err = visitPrefix(reader, StoredKeyPrefix, func(key, val []byte) (bool, error) { - sr, err := NewStoredRowKV(key, val) + err := sr.ParseKey(key) if err != nil { return false, err } diff --git a/index/firestorm/lookup.go b/index/firestorm/lookup.go index 0964f29d..d58640e6 100644 --- a/index/firestorm/lookup.go +++ b/index/firestorm/lookup.go @@ -91,10 +91,11 @@ func (l *Lookuper) lookup(item *InFlightItem) { prefix := TermFreqPrefixFieldTermDocId(0, nil, item.docID) logger.Printf("lookuper prefix - % x", prefix) + var tfk TermFreqRow docNums := make(DocNumberList, 0) err = visitPrefix(reader, prefix, func(key, val []byte) (bool, error) { logger.Printf("lookuper sees key % x", key) - tfk, err := NewTermFreqRowKV(key, val) + err := tfk.ParseKey(key) if err != nil { return false, err } diff --git a/index/firestorm/warmup.go b/index/firestorm/warmup.go index 299f6fb4..02e3b21a 100644 --- a/index/firestorm/warmup.go +++ b/index/firestorm/warmup.go @@ -67,10 +67,11 @@ func (f *Firestorm) warmup(reader store.KVReader) error { tfkPrefix := TermFreqIteratorStart(idField, nil) + var tfk TermFreqRow var lastDocId []byte lastDocNumbers := make(DocNumberList, 1) err = visitPrefix(reader, tfkPrefix, func(key, val []byte) (bool, error) { - tfk, err := NewTermFreqRowKV(key, val) + err := tfk.ParseKey(key) if err != nil { return false, err } From 024848ac913cf1f6a7d879b1168cbff71ccbb81e Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 6 Jan 2016 16:04:56 -0800 Subject: [PATCH 04/11] firestorm valid docNum finding, fixes #310 --- index/firestorm/comp.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/index/firestorm/comp.go b/index/firestorm/comp.go index 5fad468d..13edc70c 100644 --- a/index/firestorm/comp.go +++ b/index/firestorm/comp.go @@ -45,9 +45,10 @@ type Snapshot struct { // if none, then 0 func (s *Snapshot) Which(docID []byte, docNumList DocNumberList) uint64 { sort.Sort(docNumList) - highestValidDocNum := docNumList.HighestValid(s.maxRead) - if highestValidDocNum > 0 && s.Valid(docID, highestValidDocNum) { - return highestValidDocNum + for _, docNum := range docNumList { // docNumList is sorted descending. + if docNum > 0 && s.Valid(docID, docNum) { + return docNum + } } return 0 } From d6a997d8c1f437d6c8e87334c1aa593a14f564e4 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 6 Jan 2016 16:46:15 -0800 Subject: [PATCH 05/11] firestorm gtreap lookup once per snapshot docID Previously, firestorm would lookup docID's in the inFlight gtreap for every candidate docNum, and this change moves the lookup to outside of the loop. --- index/firestorm/comp.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/index/firestorm/comp.go b/index/firestorm/comp.go index 13edc70c..b4115630 100644 --- a/index/firestorm/comp.go +++ b/index/firestorm/comp.go @@ -44,9 +44,14 @@ type Snapshot struct { // returns which doc number is valid // if none, then 0 func (s *Snapshot) Which(docID []byte, docNumList DocNumberList) uint64 { - sort.Sort(docNumList) - for _, docNum := range docNumList { // docNumList is sorted descending. - if docNum > 0 && s.Valid(docID, docNum) { + inFlightVal := s.inFlight.Get(&InFlightItem{docID: docID}) + + sort.Sort(docNumList) // Descending ordering. + + for _, docNum := range docNumList { + if docNum > 0 && docNum <= s.maxRead && + (inFlightVal == nil || inFlightVal.(*InFlightItem).docNum == docNum) && + !s.deletedDocNumbers.Test(uint(docNum)) { return docNum } } From 82b8b3468ecd78b9e878707281e3d8285c74ff90 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 6 Jan 2016 23:38:02 -0800 Subject: [PATCH 06/11] upside_down analysis converts to docIDBytes once --- index/upside_down/analysis.go | 10 ++++++---- index/upside_down/dump.go | 6 ++++-- index/upside_down/index_reader.go | 2 +- index/upside_down/reader.go | 16 +++++++++------- index/upside_down/row.go | 20 ++++++++++---------- index/upside_down/row_test.go | 26 +++++++++++++------------- index/upside_down/upside_down.go | 9 +++++---- 7 files changed, 48 insertions(+), 41 deletions(-) diff --git a/index/upside_down/analysis.go b/index/upside_down/analysis.go index b02ce9cd..2c32649b 100644 --- a/index/upside_down/analysis.go +++ b/index/upside_down/analysis.go @@ -21,6 +21,8 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult Rows: make([]index.IndexRow, 0, 100), } + docIDBytes := []byte(d.ID) + // track our back index entries backIndexTermEntries := make([]*BackIndexTermEntry, 0) backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) @@ -56,7 +58,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult } if field.Options().IsStored() { - storeRows, indexBackIndexStoreEntries := udc.storeField(d.ID, field, fieldIndex) + storeRows, indexBackIndexStoreEntries := udc.storeField(docIDBytes, field, fieldIndex) rv.Rows = append(rv.Rows, storeRows...) backIndexStoredEntries = append(backIndexStoredEntries, indexBackIndexStoreEntries...) } @@ -75,7 +77,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult } // encode this field - indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, includeTermVectors, fieldIndex, fieldLength, tokenFreqs) + indexRows, indexBackIndexTermEntries := udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs) rv.Rows = append(rv.Rows, indexRows...) backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...) } @@ -89,14 +91,14 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult if compositeField.Options().IsIndexed() { fieldLength, tokenFreqs := compositeField.Analyze() // encode this field - indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, compositeField.Options().IncludeTermVectors(), fieldIndex, fieldLength, tokenFreqs) + indexRows, indexBackIndexTermEntries := udc.indexField(docIDBytes, compositeField.Options().IncludeTermVectors(), fieldIndex, fieldLength, tokenFreqs) rv.Rows = append(rv.Rows, indexRows...) backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...) } } // build the back index row - backIndexRow := NewBackIndexRow(d.ID, backIndexTermEntries, backIndexStoredEntries) + backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) rv.Rows = append(rv.Rows, backIndexRow) return rv diff --git a/index/upside_down/dump.go b/index/upside_down/dump.go index 69b7012b..023ae458 100644 --- a/index/upside_down/dump.go +++ b/index/upside_down/dump.go @@ -131,6 +131,8 @@ func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 } // DumpDoc returns all rows in the index related to this doc id func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} { + idBytes := []byte(id) + rv := make(chan interface{}) go func() { @@ -162,14 +164,14 @@ func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} { // build sorted list of term keys keys := make(keyset, 0) for _, entry := range back.termEntries { - tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), id, 0, 0) + tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0) key := tfr.Key() keys = append(keys, key) } sort.Sort(keys) // first add all the stored rows - storedRowPrefix := NewStoredRow(id, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc() + storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc() udc.dumpPrefix(kvreader, rv, storedRowPrefix) // now walk term keys in order and add them as well diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index 1f905adb..fb43a86e 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -60,7 +60,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { return } doc = document.NewDocument(id) - storedRow := NewStoredRow(id, 0, []uint64{}, 'x', nil) + storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil) storedRowScanPrefix := storedRow.ScanPrefixForDoc() it := i.kvreader.PrefixIterator(storedRowScanPrefix) defer func() { diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index 773d4ed4..2fea9114 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -41,7 +41,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi return nil, err } - tfr := NewTermFrequencyRow(term, field, "", 0, 0) + tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) it := indexReader.kvreader.PrefixIterator(tfr.Key()) return &UpsideDownCouchTermFieldReader{ @@ -80,7 +80,7 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) { func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) { if r.iterator != nil { - tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0) + tfr := NewTermFrequencyRow(r.term, r.field, []byte(docID), 0, 0) r.iterator.Seek(tfr.Key()) key, val, valid := r.iterator.Current() if valid { @@ -114,14 +114,16 @@ type UpsideDownCouchDocIDReader struct { } func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) (*UpsideDownCouchDocIDReader, error) { + startBytes := []byte(start) if start == "" { - start = string([]byte{0x0}) + startBytes = []byte{0x0} } + endBytes := []byte(end) if end == "" { - end = string([]byte{0xff}) + endBytes = []byte{0xff} } - bisr := NewBackIndexRow(start, nil, nil) - bier := NewBackIndexRow(end, nil, nil) + bisr := NewBackIndexRow(startBytes, nil, nil) + bier := NewBackIndexRow(endBytes, nil, nil) it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) return &UpsideDownCouchDocIDReader{ @@ -145,7 +147,7 @@ func (r *UpsideDownCouchDocIDReader) Next() (string, error) { } func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) { - bir := NewBackIndexRow(docID, nil, nil) + bir := NewBackIndexRow([]byte(docID), nil, nil) r.iterator.Seek(bir.Key()) key, val, valid := r.iterator.Current() if valid { diff --git a/index/upside_down/row.go b/index/upside_down/row.go index 84bcd35f..e6663bf4 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -459,21 +459,21 @@ func (tfr *TermFrequencyRow) String() string { return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors) } -func NewTermFrequencyRow(term []byte, field uint16, doc string, freq uint64, norm float32) *TermFrequencyRow { +func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { return &TermFrequencyRow{ term: term, field: field, - doc: []byte(doc), + doc: docID, freq: freq, norm: norm, } } -func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, doc string, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow { +func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow { return &TermFrequencyRow{ term: term, field: field, - doc: []byte(doc), + doc: docID, freq: freq, norm: norm, vectors: vectors, @@ -605,7 +605,7 @@ func (br *BackIndexRow) AllTermKeys() [][]byte { } rv := make([][]byte, len(br.termEntries)) for i, termEntry := range br.termEntries { - termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), string(br.doc), 0, 0) + termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0) rv[i] = termRow.Key() } return rv @@ -617,7 +617,7 @@ func (br *BackIndexRow) AllStoredKeys() [][]byte { } rv := make([][]byte, len(br.storedEntries)) for i, storedEntry := range br.storedEntries { - storedRow := NewStoredRow(string(br.doc), uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{}) + storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{}) rv[i] = storedRow.Key() } return rv @@ -665,9 +665,9 @@ func (br *BackIndexRow) String() string { return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries) } -func NewBackIndexRow(doc string, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { +func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { return &BackIndexRow{ - doc: []byte(doc), + doc: docID, termEntries: entries, storedEntries: storedFields, } @@ -766,9 +766,9 @@ func (s *StoredRow) ScanPrefixForDoc() []byte { return buf } -func NewStoredRow(doc string, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow { +func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow { return &StoredRow{ - doc: []byte(doc), + doc: docID, field: field, arrayPositions: arrayPositions, typ: typ, diff --git a/index/upside_down/row_test.go b/index/upside_down/row_test.go index b99207ae..690a91dc 100644 --- a/index/upside_down/row_test.go +++ b/index/upside_down/row_test.go @@ -49,54 +49,54 @@ func TestRows(t *testing.T) { []byte{27}, }, { - NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "catz", 3, 3.14), + NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("catz"), 3, 3.14), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'c', 'a', 't', 'z'}, []byte{3, 195, 235, 163, 130, 4}, }, { - NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14), + NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 195, 235, 163, 130, 4}, }, { - NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}), + NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 0, 2, 23, 31, 0, 0, 3, 43, 51, 0}, }, // test larger varints { - NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2198, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}), + NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2198, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 0, 0, 150, 17, 23, 31, 0, 0, 3, 43, 51, 0}, }, // test vectors with arrayPositions { - NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11, arrayPositions: []uint64{0}}, &TermVector{field: 0, pos: 2198, start: 23, end: 31, arrayPositions: []uint64{1, 2}}, &TermVector{field: 0, pos: 3, start: 43, end: 51, arrayPositions: []uint64{3, 4, 5}}}), + NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11, arrayPositions: []uint64{0}}, &TermVector{field: 0, pos: 2198, start: 23, end: 31, arrayPositions: []uint64{1, 2}}, &TermVector{field: 0, pos: 3, start: 43, end: 51, arrayPositions: []uint64{3, 4, 5}}}), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 1, 0, 0, 150, 17, 23, 31, 2, 1, 2, 0, 3, 43, 51, 3, 3, 4, 5}, }, { - NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}}, nil), + NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}}, nil), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0}, }, { - NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, nil), + NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, nil), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1}, }, { - NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, []*BackIndexStoreEntry{&BackIndexStoreEntry{Field: proto.Uint32(3)}, &BackIndexStoreEntry{Field: proto.Uint32(4)}, &BackIndexStoreEntry{Field: proto.Uint32(5)}}), + NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, []*BackIndexStoreEntry{&BackIndexStoreEntry{Field: proto.Uint32(3)}, &BackIndexStoreEntry{Field: proto.Uint32(4)}, &BackIndexStoreEntry{Field: proto.Uint32(5)}}), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1, 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}, }, { - NewStoredRow("budweiser", 0, []uint64{}, byte('t'), []byte("an american beer")), + NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer")), []byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0}, []byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'}, }, { - NewStoredRow("budweiser", 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")), + NewStoredRow([]byte("budweiser"), 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")), []byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0, 2, 166, 2, 134, 24}, []byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'}, }, @@ -259,7 +259,7 @@ func BenchmarkTermFrequencyRowEncode(b *testing.B) { row := NewTermFrequencyRowWithTermVectors( []byte{'b', 'e', 'e', 'r'}, 0, - "budweiser", + []byte("budweiser"), 3, 3.14, []*TermVector{ @@ -304,7 +304,7 @@ func BenchmarkTermFrequencyRowDecode(b *testing.B) { func BenchmarkBackIndexRowEncode(b *testing.B) { field := uint32(1) t1 := "term1" - row := NewBackIndexRow("beername", + row := NewBackIndexRow([]byte("beername"), []*BackIndexTermEntry{ &BackIndexTermEntry{ Term: &t1, @@ -336,7 +336,7 @@ func BenchmarkBackIndexRowDecode(b *testing.B) { } func BenchmarkStoredRowEncode(b *testing.B) { - row := NewStoredRow("budweiser", 0, []uint64{}, byte('t'), []byte("an american beer")) + row := NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer")) b.ResetTimer() for i := 0; i < b.N; i++ { row.Key() diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 139bcdc1..96295214 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -473,7 +473,7 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in return addRows, updateRows, deleteRows } -func (udc *UpsideDownCouch) storeField(docID string, field document.Field, fieldIndex uint16) ([]index.IndexRow, []*BackIndexStoreEntry) { +func (udc *UpsideDownCouch) storeField(docID []byte, field document.Field, fieldIndex uint16) ([]index.IndexRow, []*BackIndexStoreEntry) { rows := make([]index.IndexRow, 0, 100) backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) fieldType := encodeFieldType(field) @@ -502,7 +502,7 @@ func encodeFieldType(f document.Field) byte { return fieldType } -func (udc *UpsideDownCouch) indexField(docID string, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) ([]index.IndexRow, []*BackIndexTermEntry) { +func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) ([]index.IndexRow, []*BackIndexTermEntry) { rows := make([]index.IndexRow, 0, 100) backIndexTermEntries := make([]*BackIndexTermEntry, 0, len(tokenFreqs)) @@ -592,13 +592,14 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) { } func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow { + idBytes := []byte(id) for _, backIndexEntry := range backIndexRow.termEntries { - tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), id, 0, 0) + tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0) deleteRows = append(deleteRows, tfr) } for _, se := range backIndexRow.storedEntries { - sf := NewStoredRow(id, uint16(*se.Field), se.ArrayPositions, 'x', nil) + sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil) deleteRows = append(deleteRows, sf) } From 1af2927967d3ace4deb8de206dc67fd429eff65a Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 6 Jan 2016 23:53:13 -0800 Subject: [PATCH 07/11] upside_down gets analysis perf rows optimizations from firestorm --- index/upside_down/analysis.go | 56 ++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/index/upside_down/analysis.go b/index/upside_down/analysis.go index 2c32649b..c6d94df3 100644 --- a/index/upside_down/analysis.go +++ b/index/upside_down/analysis.go @@ -33,11 +33,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult fieldIncludeTermVectors := make(map[uint16]bool) fieldNames := make(map[uint16]string) - // walk all the fields, record stored fields now - // place information about indexed fields into map - // this collates information across fields with - // same names (arrays) - for _, field := range d.Fields { + analyzeField := func(field document.Field, storable bool) { fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name()) if newFieldRow != nil { rv.Rows = append(rv.Rows, newFieldRow) @@ -57,46 +53,52 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors() } - if field.Options().IsStored() { + if storable && field.Options().IsStored() { storeRows, indexBackIndexStoreEntries := udc.storeField(docIDBytes, field, fieldIndex) rv.Rows = append(rv.Rows, storeRows...) backIndexStoredEntries = append(backIndexStoredEntries, indexBackIndexStoreEntries...) } - } + // walk all the fields, record stored fields now + // place information about indexed fields into map + // this collates information across fields with + // same names (arrays) + for _, field := range d.Fields { + analyzeField(field, true) + } + + for fieldIndex, tokenFreqs := range fieldTermFreqs { + // see if any of the composite fields need this + for _, compositeField := range d.CompositeFields { + compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs) + } + } + + for _, compositeField := range d.CompositeFields { + analyzeField(compositeField, false) + } + + rowsCapNeeded := len(rv.Rows) + 1 + for _, tokenFreqs := range fieldTermFreqs { + rowsCapNeeded += len(tokenFreqs) + } + + rows := make([]index.IndexRow, 0, rowsCapNeeded) + rv.Rows = append(rows, rv.Rows...) + // walk through the collated information and proccess // once for each indexed field (unique name) for fieldIndex, tokenFreqs := range fieldTermFreqs { fieldLength := fieldLengths[fieldIndex] includeTermVectors := fieldIncludeTermVectors[fieldIndex] - // see if any of the composite fields need this - for _, compositeField := range d.CompositeFields { - compositeField.Compose(fieldNames[fieldIndex], fieldLength, tokenFreqs) - } - // encode this field indexRows, indexBackIndexTermEntries := udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs) rv.Rows = append(rv.Rows, indexRows...) backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...) } - // now index the composite fields - for _, compositeField := range d.CompositeFields { - fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(compositeField.Name()) - if newFieldRow != nil { - rv.Rows = append(rv.Rows, newFieldRow) - } - if compositeField.Options().IsIndexed() { - fieldLength, tokenFreqs := compositeField.Analyze() - // encode this field - indexRows, indexBackIndexTermEntries := udc.indexField(docIDBytes, compositeField.Options().IncludeTermVectors(), fieldIndex, fieldLength, tokenFreqs) - rv.Rows = append(rv.Rows, indexRows...) - backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...) - } - } - // build the back index row backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) rv.Rows = append(rv.Rows, backIndexRow) From 4eee8821f94542bf1e410c30c6cd301e7fdcc721 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 7 Jan 2016 00:13:38 -0800 Subject: [PATCH 08/11] upside_down storeField/indexField append to provided arrays Taking another optimization from firestorm, upside_down's storeField()/indexField() funcs now also append() to passed-in arrays rather than always allocating their own arrays. --- index/upside_down/analysis.go | 8 ++------ index/upside_down/upside_down.go | 13 +++---------- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/index/upside_down/analysis.go b/index/upside_down/analysis.go index c6d94df3..43bf99f1 100644 --- a/index/upside_down/analysis.go +++ b/index/upside_down/analysis.go @@ -54,9 +54,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult } if storable && field.Options().IsStored() { - storeRows, indexBackIndexStoreEntries := udc.storeField(docIDBytes, field, fieldIndex) - rv.Rows = append(rv.Rows, storeRows...) - backIndexStoredEntries = append(backIndexStoredEntries, indexBackIndexStoreEntries...) + rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries) } } @@ -94,9 +92,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult includeTermVectors := fieldIncludeTermVectors[fieldIndex] // encode this field - indexRows, indexBackIndexTermEntries := udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs) - rv.Rows = append(rv.Rows, indexRows...) - backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...) + rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries) } // build the back index row diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 96295214..03b660e1 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -473,18 +473,14 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in return addRows, updateRows, deleteRows } -func (udc *UpsideDownCouch) storeField(docID []byte, field document.Field, fieldIndex uint16) ([]index.IndexRow, []*BackIndexStoreEntry) { - rows := make([]index.IndexRow, 0, 100) - backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) +func (udc *UpsideDownCouch) storeField(docID []byte, field document.Field, fieldIndex uint16, rows []index.IndexRow, backIndexStoredEntries []*BackIndexStoreEntry) ([]index.IndexRow, []*BackIndexStoreEntry) { fieldType := encodeFieldType(field) storedRow := NewStoredRow(docID, fieldIndex, field.ArrayPositions(), fieldType, field.Value()) // record the back index entry backIndexStoredEntry := BackIndexStoreEntry{Field: proto.Uint32(uint32(fieldIndex)), ArrayPositions: field.ArrayPositions()} - backIndexStoredEntries = append(backIndexStoredEntries, &backIndexStoredEntry) - rows = append(rows, storedRow) - return rows, backIndexStoredEntries + return append(rows, storedRow), append(backIndexStoredEntries, &backIndexStoredEntry) } func encodeFieldType(f document.Field) byte { @@ -502,10 +498,7 @@ func encodeFieldType(f document.Field) byte { return fieldType } -func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) ([]index.IndexRow, []*BackIndexTermEntry) { - - rows := make([]index.IndexRow, 0, 100) - backIndexTermEntries := make([]*BackIndexTermEntry, 0, len(tokenFreqs)) +func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) { fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) for k, tf := range tokenFreqs { From fbd0e7bfe987fec604a1735fdf7012e6d033d9f3 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 7 Jan 2016 00:23:25 -0800 Subject: [PATCH 09/11] upside_down backIndexTermEntries precalloc'ed capacity --- index/upside_down/analysis.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/index/upside_down/analysis.go b/index/upside_down/analysis.go index 43bf99f1..dbb732b7 100644 --- a/index/upside_down/analysis.go +++ b/index/upside_down/analysis.go @@ -24,7 +24,6 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult docIDBytes := []byte(d.ID) // track our back index entries - backIndexTermEntries := make([]*BackIndexTermEntry, 0) backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) // information we collate as we merge fields with same name @@ -82,8 +81,9 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult rowsCapNeeded += len(tokenFreqs) } - rows := make([]index.IndexRow, 0, rowsCapNeeded) - rv.Rows = append(rows, rv.Rows...) + rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...) + + backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded) // walk through the collated information and proccess // once for each indexed field (unique name) From 8b980bd2efa5f8239c66b89e4e7c5a97c5cd8ad3 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 7 Jan 2016 00:43:27 -0800 Subject: [PATCH 10/11] firestorm avoid extra goroutine, similar to upside_down --- index/firestorm/firestorm.go | 2 +- index/upside_down/upside_down.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/index/firestorm/firestorm.go b/index/firestorm/firestorm.go index c76b445f..ea721334 100644 --- a/index/firestorm/firestorm.go +++ b/index/firestorm/firestorm.go @@ -146,7 +146,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) { aw := index.NewAnalysisWork(f, doc, resultChan) // put the work on the queue - go f.analysisQueue.Queue(aw) + f.analysisQueue.Queue(aw) // wait for the result result := <-resultChan diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 03b660e1..069871c0 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -29,7 +29,7 @@ import ( const Name = "upside_down" // RowBufferSize should ideally this is sized to be the smallest -// size that can cotain an index row key and its corresponding +// size that can contain an index row key and its corresponding // value. It is not a limit, if need be a larger buffer is // allocated, but performance will be more optimal if *most* // rows fit this size. @@ -344,6 +344,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) + // put the work on the queue udc.analysisQueue.Queue(aw) From 846912d08367a68fa4d8c8794908a89fdb133e3c Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 7 Jan 2016 00:48:34 -0800 Subject: [PATCH 11/11] upside_down udc.termVectorsFromTokenFreq rows append optimization --- index/upside_down/upside_down.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 069871c0..1b57f990 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -505,8 +505,8 @@ func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fi for k, tf := range tokenFreqs { var termFreqRow *TermFrequencyRow if includeTermVectors { - tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, tf) - rows = append(rows, newFieldRows...) + var tv []*TermVector + tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows) termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv) } else { termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm) @@ -662,9 +662,8 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int { return tf.Frequency() } -func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) ([]*TermVector, []index.IndexRow) { +func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) { rv := make([]*TermVector, len(tf.Locations)) - newFieldRows := make([]index.IndexRow, 0) for i, l := range tf.Locations { var newFieldRow *FieldRow @@ -673,7 +672,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. // lookup correct field fieldIndex, newFieldRow = udc.fieldIndexOrNewRow(l.Field) if newFieldRow != nil { - newFieldRows = append(newFieldRows, newFieldRow) + rows = append(rows, newFieldRow) } } tv := TermVector{ @@ -686,7 +685,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. rv[i] = &tv } - return rv, newFieldRows + return rv, rows } func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {