firestorm.indexField() allocs up-front array of TermFreqRow's
This uses the "backing array" technique to allocate many TermFreqRow's at the front of firestorm.indexField(), instead of the previous one-by-one, as-needed TermFreqRow allocation approach. Results from micro-benchmark, null-firestorm, bleve-blast has this change producing a ~half MB/sec improvement.
This commit is contained in:
parent
7ae696d661
commit
45e9eaaacb
@ -97,19 +97,23 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
|
|||||||
func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
|
func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
|
||||||
|
|
||||||
rows := make([]index.IndexRow, 0, len(tokenFreqs))
|
rows := make([]index.IndexRow, 0, len(tokenFreqs))
|
||||||
|
tfrs := make([]TermFreqRow, len(tokenFreqs))
|
||||||
|
|
||||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||||
|
|
||||||
|
i := 0
|
||||||
for _, tf := range tokenFreqs {
|
for _, tf := range tokenFreqs {
|
||||||
var termFreqRow *TermFreqRow
|
var termFreqRow *TermFreqRow
|
||||||
if includeTermVectors {
|
if includeTermVectors {
|
||||||
tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf)
|
tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf)
|
||||||
rows = append(rows, newFieldRows...)
|
rows = append(rows, newFieldRows...)
|
||||||
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
|
termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv)
|
||||||
} else {
|
} else {
|
||||||
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
|
termFreqRow = InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
rows = append(rows, termFreqRow)
|
rows = append(rows, termFreqRow)
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
return rows
|
return rows
|
||||||
|
@ -46,18 +46,18 @@ func NewTermVector(field uint16, pos uint64, start uint64, end uint64, arrayPos
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
|
func NewTermFreqRow(field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
|
||||||
rv := TermFreqRow{
|
return InitTermFreqRow(&TermFreqRow{}, field, term, docID, docNum, freq, norm, termVectors)
|
||||||
field: field,
|
}
|
||||||
term: term,
|
|
||||||
docID: docID,
|
|
||||||
docNum: docNum,
|
|
||||||
}
|
|
||||||
|
|
||||||
rv.value.Freq = proto.Uint64(freq)
|
func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte, docNum uint64, freq uint64, norm float32, termVectors []*TermVector) *TermFreqRow {
|
||||||
rv.value.Norm = proto.Float32(norm)
|
tfr.field = field
|
||||||
rv.value.Vectors = termVectors
|
tfr.term = term
|
||||||
|
tfr.docID = docID
|
||||||
return &rv
|
tfr.docNum = docNum
|
||||||
|
tfr.value.Freq = proto.Uint64(freq)
|
||||||
|
tfr.value.Norm = proto.Float32(norm)
|
||||||
|
tfr.value.Vectors = termVectors
|
||||||
|
return tfr
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {
|
func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {
|
||||||
|
Loading…
Reference in New Issue
Block a user