From 45e9eaaacbdc885efee9c9efefc41ebc92099a92 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 30 Dec 2015 21:15:24 -0800 Subject: [PATCH] firestorm.indexField() allocs up-front array of TermFreqRow's This uses the "backing array" technique to allocate many TermFreqRow's at the front of firestorm.indexField(), instead of the previous one-by-one, as-needed TermFreqRow allocation approach. Results from micro-benchmark, null-firestorm, bleve-blast has this change producing a ~half MB/sec improvement. --- index/firestorm/analysis.go | 8 ++++++-- index/firestorm/termfreq.go | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/index/firestorm/analysis.go b/index/firestorm/analysis.go index 4e5b71dc..32c6cf66 100644 --- a/index/firestorm/analysis.go +++ b/index/firestorm/analysis.go @@ -97,19 +97,23 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult { func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow { rows := make([]index.IndexRow, 0, len(tokenFreqs)) + tfrs := make([]TermFreqRow, len(tokenFreqs)) + fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) + i := 0 for _, tf := range tokenFreqs { var termFreqRow *TermFreqRow if includeTermVectors { tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf) rows = append(rows, newFieldRows...) - termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv) + termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv) } else { - termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil) + termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil) } rows = append(rows, termFreqRow) + i++ } return rows diff --git a/index/firestorm/termfreq.go b/index/firestorm/termfreq.go index 0d5cc9e8..1d36ec26 100644 --- a/index/firestorm/termfreq.go +++ b/index/firestorm/termfreq.go @@ -46,18 +46,18 @@ func NewTermVector(field uint16, pos uint64, start uint64, end uint64, arrayPos } func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow { - rv := TermFreqRow{ - field: field, - term: term, - docID: docID, - docNum: docNum, - } + return InitTermFreqRow(&TermFreqRow{}, field, term, docID, docNum, freq, norm, termVectors) +} - rv.value.Freq = proto.Uint64(freq) - rv.value.Norm = proto.Float32(norm) - rv.value.Vectors = termVectors - - return &rv +func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow { + tfr.field = field + tfr.term = term + tfr.docID = docID + tfr.docNum = docNum + tfr.value.Freq = proto.Uint64(freq) + tfr.value.Norm = proto.Float32(norm) + tfr.value.Vectors = termVectors + return tfr } func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {