0
0
Fork 0

scorch zap VERSION bump: check whether fields are the same at merge

COMPATIBILITY NOTE: scorch zap version bumped in this commit.

The version bump is because mergeFields() now computes whether fields
are the same across segments and it relies on the previous commit
where fieldID's are assigned in field name sorted order (albeit with
_id field always having fieldID of 0).

Potential future commits might rely on this info that "fields are the
same across segments" for more optimizations, etc.
This commit is contained in:
Steve Yen 2018-02-05 16:03:17 -08:00
parent ffdeb8055e
commit 822457542e
2 changed files with 21 additions and 8 deletions

View File

@ -28,7 +28,7 @@ import (
"github.com/golang/snappy"
)
const version uint32 = 2
const version uint32 = 3
const fieldNotUninverted = math.MaxUint64

View File

@ -102,13 +102,13 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
var dictLocs []uint64
fieldsInv := mergeFields(segments)
fieldsSame, fieldsInv := mergeFields(segments)
fieldsMap := mapFields(fieldsInv)
numDocs = computeNewDocCount(segments, drops)
if numDocs > 0 {
storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
fieldsMap, fieldsInv, numDocs, cr)
fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
if err != nil {
return nil, 0, 0, 0, 0, err
}
@ -415,7 +415,7 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
const docDropped = math.MaxUint64
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
w *CountHashWriter) (uint64, [][]uint64, error) {
var rv [][]uint64 // The remapped or newDocNums for each segment.
@ -528,13 +528,26 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
return storedIndexOffset, rv, nil
}
// mergeFields builds a unified list of fields used across all the input segments
func mergeFields(segments []*SegmentBase) []string {
// mergeFields builds a unified list of fields used across all the
// input segments, and computes whether the fields are the same across
// segments (which depends on fields to be sorted in the same way
// across segments)
func mergeFields(segments []*SegmentBase) (bool, []string) {
fieldsSame := true
var segment0Fields []string
if len(segments) > 0 {
segment0Fields = segments[0].Fields()
}
fieldsMap := map[string]struct{}{}
for _, segment := range segments {
fields := segment.Fields()
for _, field := range fields {
for fieldi, field := range fields {
fieldsMap[field] = struct{}{}
if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
fieldsSame = false
}
}
}
@ -549,5 +562,5 @@ func mergeFields(segments []*SegmentBase) []string {
sort.Strings(rv[1:]) // leave _id as first
return rv
return fieldsSame, rv
}