From 1cd3fd7fbe661f0f29656155aa48fa864b0551e9 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 13 Dec 2017 14:06:54 -0500 Subject: [PATCH] extrac common functionality between build/merge --- index/scorch/segment/zap/build.go | 47 +-------------- index/scorch/segment/zap/merge.go | 76 +------------------------ index/scorch/segment/zap/write.go | 95 +++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 121 deletions(-) create mode 100644 index/scorch/segment/zap/write.go diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index 2bc520f6..2cad61a5 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -77,7 +77,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e } var fieldIndexStart uint64 - fieldIndexStart, err = persistFields(memSegment, cr, dictLocs) + fieldIndexStart, err = persistFields(memSegment.FieldsInv, cr, dictLocs) if err != nil { return err } @@ -573,51 +573,6 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs return rv, nil } -func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint64) (uint64, error) { - var rv uint64 - - var fieldStarts []uint64 - for fieldID, fieldName := range memSegment.FieldsInv { - - // record start of this field - fieldStarts = append(fieldStarts, uint64(w.Count())) - - buf := make([]byte, binary.MaxVarintLen64) - // write out dict location for this field - n := binary.PutUvarint(buf, dictLocs[fieldID]) - _, err := w.Write(buf[:n]) - if err != nil { - return 0, err - } - - // write out the length of the field name - n = binary.PutUvarint(buf, uint64(len(fieldName))) - _, err = w.Write(buf[:n]) - if err != nil { - return 0, err - } - - // write out the field name - _, err = w.Write([]byte(fieldName)) - if err != nil { - return 0, err - } - } - - // now write out the fields index - rv = uint64(w.Count()) - - // now write out the stored doc index - for fieldID := range memSegment.FieldsInv { - err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID]) - if err != nil { - return 0, err - } - } - - return rv, nil -} - // FooterSize is the size of the footer record in bytes // crc + ver + chunk + field offset + stored offset + num docs const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go index 7652c221..972b1d16 100644 --- a/index/scorch/segment/zap/merge.go +++ b/index/scorch/segment/zap/merge.go @@ -5,7 +5,6 @@ import ( "bytes" "encoding/binary" "fmt" - "io" "math" "os" @@ -36,7 +35,6 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, fieldsInv := mergeFields(segments) fieldsMap := mapFields(fieldsInv) - newSegDocCount := computeNewDocCount(segments, drops) var newDocNums [][]uint64 @@ -57,7 +55,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, } var fieldsIndexOffset uint64 - fieldsIndexOffset, err = persistMergedFields(fieldsInv, cr, dictLocs) + fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs) if err != nil { return nil, err } @@ -106,33 +104,6 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 { return newSegDocCount } -func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) { - var buffer bytes.Buffer - // write out postings list to memory so we know the len - postingsListLen, err := r.WriteTo(&buffer) - if err != nil { - return 0, err - } - var tw int - // write out the length of this postings list - buf := make([]byte, binary.MaxVarintLen64) - n := binary.PutUvarint(buf, uint64(postingsListLen)) - nw, err := w.Write(buf[:n]) - tw += nw - if err != nil { - return tw, err - } - - // write out the postings list itself - nw, err = w.Write(buffer.Bytes()) - tw += nw - if err != nil { - return tw, err - } - - return tw, nil -} - func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap, fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64, w *CountHashWriter) ([]uint64, error) { @@ -489,48 +460,3 @@ func mergeFields(segments []*Segment) []string { return rv } - -func persistMergedFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) { - var rv uint64 - - var fieldStarts []uint64 - for fieldID, fieldName := range fieldsInv { - - // record start of this field - fieldStarts = append(fieldStarts, uint64(w.Count())) - - buf := make([]byte, binary.MaxVarintLen64) - // write out dict location for this field - n := binary.PutUvarint(buf, dictLocs[fieldID]) - _, err := w.Write(buf[:n]) - if err != nil { - return 0, err - } - - // write out the length of the field name - n = binary.PutUvarint(buf, uint64(len(fieldName))) - _, err = w.Write(buf[:n]) - if err != nil { - return 0, err - } - - // write out the field name - _, err = w.Write([]byte(fieldName)) - if err != nil { - return 0, err - } - } - - // now write out the fields index - rv = uint64(w.Count()) - - // now write out the stored doc index - for fieldID := range fieldsInv { - err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID]) - if err != nil { - return 0, err - } - } - - return rv, nil -} diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go new file mode 100644 index 00000000..9772b3a6 --- /dev/null +++ b/index/scorch/segment/zap/write.go @@ -0,0 +1,95 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "io" + + "github.com/RoaringBitmap/roaring" +) + +// writes out the length of the roaring bitmap in bytes as varint +// then writs out the roaring bitmap itself +func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) { + var buffer bytes.Buffer + // write out postings list to memory so we know the len + postingsListLen, err := r.WriteTo(&buffer) + if err != nil { + return 0, err + } + var tw int + // write out the length of this postings list + buf := make([]byte, binary.MaxVarintLen64) + n := binary.PutUvarint(buf, uint64(postingsListLen)) + nw, err := w.Write(buf[:n]) + tw += nw + if err != nil { + return tw, err + } + + // write out the postings list itself + nw, err = w.Write(buffer.Bytes()) + tw += nw + if err != nil { + return tw, err + } + + return tw, nil +} + +func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) { + var rv uint64 + + var fieldStarts []uint64 + for fieldID, fieldName := range fieldsInv { + + // record start of this field + fieldStarts = append(fieldStarts, uint64(w.Count())) + + buf := make([]byte, binary.MaxVarintLen64) + // write out dict location for this field + n := binary.PutUvarint(buf, dictLocs[fieldID]) + _, err := w.Write(buf[:n]) + if err != nil { + return 0, err + } + + // write out the length of the field name + n = binary.PutUvarint(buf, uint64(len(fieldName))) + _, err = w.Write(buf[:n]) + if err != nil { + return 0, err + } + + // write out the field name + _, err = w.Write([]byte(fieldName)) + if err != nil { + return 0, err + } + } + + // now write out the fields index + rv = uint64(w.Count()) + for fieldID := range fieldsInv { + err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID]) + if err != nil { + return 0, err + } + } + + return rv, nil +}