firestorm.indexField() allocs up-front array of TermFreqRow's
This uses the "backing array" technique to allocate many TermFreqRow's at the front of firestorm.indexField(), instead of the previous one-by-one, as-needed TermFreqRow allocation approach. Results from micro-benchmark, null-firestorm, bleve-blast has this change producing a ~half MB/sec improvement.
This commit is contained in:
parent
7ae696d661
commit
45e9eaaacb
|
@ -97,19 +97,23 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
|
|||
func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
|
||||
|
||||
rows := make([]index.IndexRow, 0, len(tokenFreqs))
|
||||
tfrs := make([]TermFreqRow, len(tokenFreqs))
|
||||
|
||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||
|
||||
i := 0
|
||||
for _, tf := range tokenFreqs {
|
||||
var termFreqRow *TermFreqRow
|
||||
if includeTermVectors {
|
||||
tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf)
|
||||
rows = append(rows, newFieldRows...)
|
||||
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
|
||||
termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
|
||||
} else {
|
||||
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
|
||||
termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
|
||||
}
|
||||
|
||||
rows = append(rows, termFreqRow)
|
||||
i++
|
||||
}
|
||||
|
||||
return rows
|
||||
|
|
|
@ -46,18 +46,18 @@ func NewTermVector(field uint16, pos uint64, start uint64, end uint64, arrayPos
|
|||
}
|
||||
|
||||
func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
|
||||
rv := TermFreqRow{
|
||||
field: field,
|
||||
term: term,
|
||||
docID: docID,
|
||||
docNum: docNum,
|
||||
return InitTermFreqRow(&TermFreqRow{}, field, term, docID, docNum, freq, norm, termVectors)
|
||||
}
|
||||
|
||||
rv.value.Freq = proto.Uint64(freq)
|
||||
rv.value.Norm = proto.Float32(norm)
|
||||
rv.value.Vectors = termVectors
|
||||
|
||||
return &rv
|
||||
func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
|
||||
tfr.field = field
|
||||
tfr.term = term
|
||||
tfr.docID = docID
|
||||
tfr.docNum = docNum
|
||||
tfr.value.Freq = proto.Uint64(freq)
|
||||
tfr.value.Norm = proto.Float32(norm)
|
||||
tfr.value.Vectors = termVectors
|
||||
return tfr
|
||||
}
|
||||
|
||||
func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user