From 822457542efe74b6b6b228ef52db2f56b8f3aea2 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Mon, 5 Feb 2018 16:03:17 -0800 Subject: [PATCH] scorch zap VERSION bump: check whether fields are the same at merge COMPATIBILITY NOTE: scorch zap version bumped in this commit. The version bump is because mergeFields() now computes whether fields are the same across segments and it relies on the previous commit where fieldID's are assigned in field name sorted order (albeit with _id field always having fieldID of 0). Potential future commits might rely on this info that "fields are the same across segments" for more optimizations, etc. --- index/scorch/segment/zap/build.go | 2 +- index/scorch/segment/zap/merge.go | 27 ++++++++++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index 60d168e6..b3bbbab5 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -28,7 +28,7 @@ import ( "github.com/golang/snappy" ) -const version uint32 = 2 +const version uint32 = 3 const fieldNotUninverted = math.MaxUint64 diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go index 53b1ffe5..bef2a087 100644 --- a/index/scorch/segment/zap/merge.go +++ b/index/scorch/segment/zap/merge.go @@ -102,13 +102,13 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, var dictLocs []uint64 - fieldsInv := mergeFields(segments) + fieldsSame, fieldsInv := mergeFields(segments) fieldsMap := mapFields(fieldsInv) numDocs = computeNewDocCount(segments, drops) if numDocs > 0 { storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops, - fieldsMap, fieldsInv, numDocs, cr) + fieldsMap, fieldsInv, fieldsSame, numDocs, cr) if err != nil { return nil, 0, 0, 0, 0, err } @@ -415,7 +415,7 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap, const docDropped = math.MaxUint64 func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, - fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64, + fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64, w *CountHashWriter) (uint64, [][]uint64, error) { var rv [][]uint64 // The remapped or newDocNums for each segment. @@ -528,13 +528,26 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, return storedIndexOffset, rv, nil } -// mergeFields builds a unified list of fields used across all the input segments -func mergeFields(segments []*SegmentBase) []string { +// mergeFields builds a unified list of fields used across all the +// input segments, and computes whether the fields are the same across +// segments (which depends on fields to be sorted in the same way +// across segments) +func mergeFields(segments []*SegmentBase) (bool, []string) { + fieldsSame := true + + var segment0Fields []string + if len(segments) > 0 { + segment0Fields = segments[0].Fields() + } + fieldsMap := map[string]struct{}{} for _, segment := range segments { fields := segment.Fields() - for _, field := range fields { + for fieldi, field := range fields { fieldsMap[field] = struct{}{} + if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field { + fieldsSame = false + } } } @@ -549,5 +562,5 @@ func mergeFields(segments []*SegmentBase) []string { sort.Strings(rv[1:]) // leave _id as first - return rv + return fieldsSame, rv }