scorch sorts fields by name to assign fieldID's
This is a stepping stone to allow easier future comparisons of field maps and potential merge optimizations. In bleve-blast tests on a 2015 macbook (50K wikipedia docs, 8 indexers, batch size 100, ssd), this does not seem to have a distinct effect on indexing throughput.
This commit is contained in:
parent
1af90936c4
commit
ffdeb8055e
|
@ -95,6 +95,21 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
|
|||
var numTokenFrequencies int
|
||||
var totLocs int
|
||||
|
||||
// initial scan for all fieldID's to sort them
|
||||
for _, result := range results {
|
||||
for _, field := range result.Document.CompositeFields {
|
||||
s.getOrDefineField(field.Name())
|
||||
}
|
||||
for _, field := range result.Document.Fields {
|
||||
s.getOrDefineField(field.Name())
|
||||
}
|
||||
}
|
||||
sort.Strings(s.FieldsInv[1:]) // keep _id as first field
|
||||
s.FieldsMap = make(map[string]uint16, len(s.FieldsInv))
|
||||
for fieldID, fieldName := range s.FieldsInv {
|
||||
s.FieldsMap[fieldName] = uint16(fieldID + 1)
|
||||
}
|
||||
|
||||
processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
|
||||
for term, tf := range tfs {
|
||||
pidPlus1, exists := s.Dicts[fieldID][term]
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/Smerity/govarint"
|
||||
|
@ -545,5 +546,8 @@ func mergeFields(segments []*SegmentBase) []string {
|
|||
rv = append(rv, k)
|
||||
}
|
||||
}
|
||||
|
||||
sort.Strings(rv[1:]) // leave _id as first
|
||||
|
||||
return rv
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
|
@ -574,6 +575,7 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
|
|||
t.Fatalf("segment VisitableDocValueFields err: %v", err)
|
||||
}
|
||||
|
||||
sort.Strings(expectedFields[1:]) // keep _id as first field
|
||||
if !reflect.DeepEqual(fields, expectedFields) {
|
||||
t.Errorf("expected field terms: %#v, got: %#v", expectedFields, fields)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue