0
0

firestorm.indexField() allocs up-front array of TermFreqRow's

This uses the "backing array" technique to allocate many TermFreqRow's
at the front of firestorm.indexField(), instead of the previous
one-by-one, as-needed TermFreqRow allocation approach.

Results from micro-benchmark, null-firestorm, bleve-blast has this
change producing a ~half MB/sec improvement.
This commit is contained in:
Steve Yen 2015-12-30 21:15:24 -08:00
parent 7ae696d661
commit 45e9eaaacb
2 changed files with 17 additions and 13 deletions

View File

@ -97,19 +97,23 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow { func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
rows := make([]index.IndexRow, 0, len(tokenFreqs)) rows := make([]index.IndexRow, 0, len(tokenFreqs))
tfrs := make([]TermFreqRow, len(tokenFreqs))
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
i := 0
for _, tf := range tokenFreqs { for _, tf := range tokenFreqs {
var termFreqRow *TermFreqRow var termFreqRow *TermFreqRow
if includeTermVectors { if includeTermVectors {
tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf) tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf)
rows = append(rows, newFieldRows...) rows = append(rows, newFieldRows...)
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv) termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
} else { } else {
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil) termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
} }
rows = append(rows, termFreqRow) rows = append(rows, termFreqRow)
i++
} }
return rows return rows

View File

@ -46,18 +46,18 @@ func NewTermVector(field uint16, pos uint64, start uint64, end uint64, arrayPos
} }
func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow { func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
rv := TermFreqRow{ return InitTermFreqRow(&TermFreqRow{}, field, term, docID, docNum, freq, norm, termVectors)
field: field, }
term: term,
docID: docID,
docNum: docNum,
}
rv.value.Freq = proto.Uint64(freq) func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
rv.value.Norm = proto.Float32(norm) tfr.field = field
rv.value.Vectors = termVectors tfr.term = term
tfr.docID = docID
return &rv tfr.docNum = docNum
tfr.value.Freq = proto.Uint64(freq)
tfr.value.Norm = proto.Float32(norm)
tfr.value.Vectors = termVectors
return tfr
} }
func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) { func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {