0
0

upside_down gets analysis perf rows optimizations from firestorm

This commit is contained in:
Steve Yen 2016-01-06 23:53:13 -08:00
parent 82b8b3468e
commit 1af2927967

View File

@ -33,11 +33,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
fieldIncludeTermVectors := make(map[uint16]bool)
fieldNames := make(map[uint16]string)
// walk all the fields, record stored fields now
// place information about indexed fields into map
// this collates information across fields with
// same names (arrays)
for _, field := range d.Fields {
analyzeField := func(field document.Field, storable bool) {
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
if newFieldRow != nil {
rv.Rows = append(rv.Rows, newFieldRow)
@ -57,46 +53,52 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
}
if field.Options().IsStored() {
if storable && field.Options().IsStored() {
storeRows, indexBackIndexStoreEntries := udc.storeField(docIDBytes, field, fieldIndex)
rv.Rows = append(rv.Rows, storeRows...)
backIndexStoredEntries = append(backIndexStoredEntries, indexBackIndexStoreEntries...)
}
}
// walk all the fields, record stored fields now
// place information about indexed fields into map
// this collates information across fields with
// same names (arrays)
for _, field := range d.Fields {
analyzeField(field, true)
}
for fieldIndex, tokenFreqs := range fieldTermFreqs {
// see if any of the composite fields need this
for _, compositeField := range d.CompositeFields {
compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
}
}
for _, compositeField := range d.CompositeFields {
analyzeField(compositeField, false)
}
rowsCapNeeded := len(rv.Rows) + 1
for _, tokenFreqs := range fieldTermFreqs {
rowsCapNeeded += len(tokenFreqs)
}
rows := make([]index.IndexRow, 0, rowsCapNeeded)
rv.Rows = append(rows, rv.Rows...)
// walk through the collated information and proccess
// once for each indexed field (unique name)
for fieldIndex, tokenFreqs := range fieldTermFreqs {
fieldLength := fieldLengths[fieldIndex]
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
// see if any of the composite fields need this
for _, compositeField := range d.CompositeFields {
compositeField.Compose(fieldNames[fieldIndex], fieldLength, tokenFreqs)
}
// encode this field
indexRows, indexBackIndexTermEntries := udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs)
rv.Rows = append(rv.Rows, indexRows...)
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
}
// now index the composite fields
for _, compositeField := range d.CompositeFields {
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(compositeField.Name())
if newFieldRow != nil {
rv.Rows = append(rv.Rows, newFieldRow)
}
if compositeField.Options().IsIndexed() {
fieldLength, tokenFreqs := compositeField.Analyze()
// encode this field
indexRows, indexBackIndexTermEntries := udc.indexField(docIDBytes, compositeField.Options().IncludeTermVectors(), fieldIndex, fieldLength, tokenFreqs)
rv.Rows = append(rv.Rows, indexRows...)
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
}
}
// build the back index row
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries)
rv.Rows = append(rv.Rows, backIndexRow)