firestorm.indexField() allocs up-front array of TermFreqRow's

This uses the "backing array" technique to allocate many TermFreqRow's at the front of firestorm.indexField(), instead of the previous one-by-one, as-needed TermFreqRow allocation approach. Results from micro-benchmark, null-firestorm, bleve-blast has this change producing a ~half MB/sec improvement.
2015-12-30 21:15:24 -08:00 · 2015-12-30 21:15:24 -08:00 · 45e9eaaacb
commit 45e9eaaacb
parent 7ae696d661
2 changed files with 17 additions and 13 deletions
--- a/index/firestorm/analysis.go
+++ b/index/firestorm/analysis.go
@ -97,19 +97,23 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
 func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
 	rows := make([]index.IndexRow, 0, len(tokenFreqs))
 	tfrs := make([]TermFreqRow, len(tokenFreqs))
 	fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
 	i := 0
 	for _, tf := range tokenFreqs {
 		var termFreqRow *TermFreqRow
 		if includeTermVectors {
 			tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf)
 			rows = append(rows, newFieldRows...)
-			termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
+			termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
 		} else {
-			termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
+			termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
 		}
 		rows = append(rows, termFreqRow)
 		i++
 	}
 	return rows
--- a/index/firestorm/termfreq.go
+++ b/index/firestorm/termfreq.go
@ -46,18 +46,18 @@ func NewTermVector(field uint16, pos uint64, start uint64, end uint64, arrayPos
 }
 func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
-	rv := TermFreqRow{
+	return InitTermFreqRow(&TermFreqRow{}, field, term, docID, docNum, freq, norm, termVectors)
-		field:  field,
+}
 		term:   term,
 		docID:  docID,
 		docNum: docNum,
 	}
-	rv.value.Freq = proto.Uint64(freq)
+func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
-	rv.value.Norm = proto.Float32(norm)
+	tfr.field = field
-	rv.value.Vectors = termVectors
+	tfr.term = term
-
+	tfr.docID = docID
-	return &rv
+	tfr.docNum = docNum
 	tfr.value.Freq = proto.Uint64(freq)
 	tfr.value.Norm = proto.Float32(norm)
 	tfr.value.Vectors = termVectors
 	return tfr
 }
 func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {