From 67e0e5973b8cba53e7150adf29bfe54ed71db258 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 20 Dec 2017 15:18:22 -0800 Subject: [PATCH] scorch mergeStoredAndRemap() memory reuse In mergeStoredAndRemap(), instead of allocating new hashmaps for each document, this commit reuses some arrays that are indexed by fieldId. --- index/scorch/segment/zap/merge.go | 96 ++++++++++++++++--------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go index 500830d0..5d845bbd 100644 --- a/index/scorch/segment/zap/merge.go +++ b/index/scorch/segment/zap/merge.go @@ -333,6 +333,10 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap, var metaBuf bytes.Buffer var data, compressed []byte + vals := make([][][]byte, len(fieldsInv)) + typs := make([][]byte, len(fieldsInv)) + poss := make([][][]uint64, len(fieldsInv)) + docNumOffsets := make([]uint64, newSegDocCount) // for each segment @@ -353,11 +357,13 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap, } else { segNewDocNums = append(segNewDocNums, uint64(newDocNum)) // collect all the data - vals := make(map[uint16][][]byte) - typs := make(map[uint16][]byte) - poss := make(map[uint16][][]uint64) + for i := 0; i < len(fieldsInv); i++ { + vals[i] = vals[i][:0] + typs[i] = typs[i][:0] + poss[i] = poss[i][:0] + } err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool { - fieldID := fieldsMap[field] + fieldID := int(fieldsMap[field]) vals[fieldID] = append(vals[fieldID], value) typs[fieldID] = append(typs[fieldID], typ) poss[fieldID] = append(poss[fieldID], pos) @@ -370,51 +376,49 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap, // now walk the fields in order for fieldID := range fieldsInv { - if storedFieldValues, ok := vals[uint16(fieldID)]; ok { + storedFieldValues := vals[int(fieldID)] - // has stored values for this field - num := len(storedFieldValues) - - // process each value - for i := 0; i < num; i++ { - // encode field - _, err2 := metaEncoder.PutU64(uint64(fieldID)) - if err2 != nil { - return 0, nil, err2 - } - // encode type - _, err2 = metaEncoder.PutU64(uint64(typs[uint16(fieldID)][i])) - if err2 != nil { - return 0, nil, err2 - } - // encode start offset - _, err2 = metaEncoder.PutU64(uint64(curr)) - if err2 != nil { - return 0, nil, err2 - } - // end len - _, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i]))) - if err2 != nil { - return 0, nil, err2 - } - // encode number of array pos - _, err2 = metaEncoder.PutU64(uint64(len(poss[uint16(fieldID)][i]))) - if err2 != nil { - return 0, nil, err2 - } - // encode all array positions - for j := 0; j < len(poss[uint16(fieldID)][i]); j++ { - _, err2 = metaEncoder.PutU64(poss[uint16(fieldID)][i][j]) - if err2 != nil { - return 0, nil, err2 - } - } - // append data - data = append(data, storedFieldValues[i]...) - // update curr - curr += len(storedFieldValues[i]) + // has stored values for this field + num := len(storedFieldValues) + // process each value + for i := 0; i < num; i++ { + // encode field + _, err2 := metaEncoder.PutU64(uint64(fieldID)) + if err2 != nil { + return 0, nil, err2 } + // encode type + _, err2 = metaEncoder.PutU64(uint64(typs[int(fieldID)][i])) + if err2 != nil { + return 0, nil, err2 + } + // encode start offset + _, err2 = metaEncoder.PutU64(uint64(curr)) + if err2 != nil { + return 0, nil, err2 + } + // end len + _, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i]))) + if err2 != nil { + return 0, nil, err2 + } + // encode number of array pos + _, err2 = metaEncoder.PutU64(uint64(len(poss[int(fieldID)][i]))) + if err2 != nil { + return 0, nil, err2 + } + // encode all array positions + for j := 0; j < len(poss[int(fieldID)][i]); j++ { + _, err2 = metaEncoder.PutU64(poss[int(fieldID)][i][j]) + if err2 != nil { + return 0, nil, err2 + } + } + // append data + data = append(data, storedFieldValues[i]...) + // update curr + curr += len(storedFieldValues[i]) } }