From 790f2e3e320b225d4e39389dbca777f492f6a585 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Mon, 9 Jan 2017 22:42:00 -0800 Subject: [PATCH 1/2] optimize by alloc'ing arrays of TermFrequencyRow/TermVector --- index/upsidedown/upsidedown.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go index 60579366..80bbfeaf 100644 --- a/index/upsidedown/upsidedown.go +++ b/index/upsidedown/upsidedown.go @@ -604,14 +604,18 @@ func encodeFieldType(f document.Field) byte { func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) { fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) + termFreqRows := make([]TermFrequencyRow, len(tokenFreqs)) + termFreqRowsUsed := 0 + for k, tf := range tokenFreqs { - var termFreqRow *TermFrequencyRow + termFreqRow := &termFreqRows[termFreqRowsUsed] + termFreqRowsUsed++ + + InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID, + uint64(frequencyFromTokenFreq(tf)), fieldNorm) + if includeTermVectors { - var tv []*TermVector - tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows) - termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv) - } else { - termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm) + termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows) } // record the back index entry @@ -727,6 +731,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int { } func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) { + a := make([]TermVector, len(tf.Locations)) rv := make([]*TermVector, len(tf.Locations)) for i, l := range tf.Locations { @@ -739,14 +744,14 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. rows = append(rows, newFieldRow) } } - tv := TermVector{ + a[i] = TermVector{ field: fieldIndex, arrayPositions: l.ArrayPositions, pos: uint64(l.Position), start: uint64(l.Start), end: uint64(l.End), } - rv[i] = &tv + rv[i] = &a[i] } return rv, rows From 5927224e15dcecf1b8f48490568492ff4e5c3b56 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Mon, 9 Jan 2017 22:48:55 -0800 Subject: [PATCH 2/2] optimize mergeOldAndNew for case of first time a doc is seen --- index/upsidedown/upsidedown.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go index 80bbfeaf..1bf9c733 100644 --- a/index/upsidedown/upsidedown.go +++ b/index/upsidedown/upsidedown.go @@ -499,6 +499,15 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) { addRows = make([]UpsideDownCouchRow, 0, len(rows)) + + if backIndexRow == nil { + addRows = addRows[0:len(rows)] + for i, row := range rows { + addRows[i] = row + } + return addRows, nil, nil + } + updateRows = make([]UpsideDownCouchRow, 0, len(rows)) deleteRows = make([]UpsideDownCouchRow, 0, len(rows))