0
0

optimize by alloc'ing arrays of TermFrequencyRow/TermVector

This commit is contained in:
Steve Yen 2017-01-09 22:42:00 -08:00
parent 8cd6040b63
commit 790f2e3e32

View File

@ -604,14 +604,18 @@ func encodeFieldType(f document.Field) byte {
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) { func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) {
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
termFreqRows := make([]TermFrequencyRow, len(tokenFreqs))
termFreqRowsUsed := 0
for k, tf := range tokenFreqs { for k, tf := range tokenFreqs {
var termFreqRow *TermFrequencyRow termFreqRow := &termFreqRows[termFreqRowsUsed]
termFreqRowsUsed++
InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID,
uint64(frequencyFromTokenFreq(tf)), fieldNorm)
if includeTermVectors { if includeTermVectors {
var tv []*TermVector termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
} else {
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
} }
// record the back index entry // record the back index entry
@ -727,6 +731,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
} }
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) { func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) {
a := make([]TermVector, len(tf.Locations))
rv := make([]*TermVector, len(tf.Locations)) rv := make([]*TermVector, len(tf.Locations))
for i, l := range tf.Locations { for i, l := range tf.Locations {
@ -739,14 +744,14 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
rows = append(rows, newFieldRow) rows = append(rows, newFieldRow)
} }
} }
tv := TermVector{ a[i] = TermVector{
field: fieldIndex, field: fieldIndex,
arrayPositions: l.ArrayPositions, arrayPositions: l.ArrayPositions,
pos: uint64(l.Position), pos: uint64(l.Position),
start: uint64(l.Start), start: uint64(l.Start),
end: uint64(l.End), end: uint64(l.End),
} }
rv[i] = &tv rv[i] = &a[i]
} }
return rv, rows return rv, rows