0
0

firestorm.indexField() allocs up-front array of TermFreqRow's

This uses the "backing array" technique to allocate many TermFreqRow's
at the front of firestorm.indexField(), instead of the previous
one-by-one, as-needed TermFreqRow allocation approach.

Results from micro-benchmark, null-firestorm, bleve-blast has this
change producing a ~half MB/sec improvement.
This commit is contained in:
Steve Yen 2015-12-30 21:15:24 -08:00
parent 7ae696d661
commit 45e9eaaacb
2 changed files with 17 additions and 13 deletions

View File

@ -97,19 +97,23 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
rows := make([]index.IndexRow, 0, len(tokenFreqs))
tfrs := make([]TermFreqRow, len(tokenFreqs))
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
i := 0
for _, tf := range tokenFreqs {
var termFreqRow *TermFreqRow
if includeTermVectors {
tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf)
rows = append(rows, newFieldRows...)
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
} else {
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
}
rows = append(rows, termFreqRow)
i++
}
return rows

View File

@ -46,18 +46,18 @@ func NewTermVector(field uint16, pos uint64, start uint64, end uint64, arrayPos
}
func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
rv := TermFreqRow{
field: field,
term: term,
docID: docID,
docNum: docNum,
}
return InitTermFreqRow(&TermFreqRow{}, field, term, docID, docNum, freq, norm, termVectors)
}
rv.value.Freq = proto.Uint64(freq)
rv.value.Norm = proto.Float32(norm)
rv.value.Vectors = termVectors
return &rv
func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
tfr.field = field
tfr.term = term
tfr.docID = docID
tfr.docNum = docNum
tfr.value.Freq = proto.Uint64(freq)
tfr.value.Norm = proto.Float32(norm)
tfr.value.Vectors = termVectors
return tfr
}
func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {