0
0

extrac common functionality between build/merge

This commit is contained in:
Marty Schoch 2017-12-13 14:06:54 -05:00
parent cd45487cb3
commit 1cd3fd7fbe
3 changed files with 97 additions and 121 deletions

View File

@ -77,7 +77,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
}
var fieldIndexStart uint64
fieldIndexStart, err = persistFields(memSegment, cr, dictLocs)
fieldIndexStart, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
if err != nil {
return err
}
@ -573,51 +573,6 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
return rv, nil
}
func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
var rv uint64
var fieldStarts []uint64
for fieldID, fieldName := range memSegment.FieldsInv {
// record start of this field
fieldStarts = append(fieldStarts, uint64(w.Count()))
buf := make([]byte, binary.MaxVarintLen64)
// write out dict location for this field
n := binary.PutUvarint(buf, dictLocs[fieldID])
_, err := w.Write(buf[:n])
if err != nil {
return 0, err
}
// write out the length of the field name
n = binary.PutUvarint(buf, uint64(len(fieldName)))
_, err = w.Write(buf[:n])
if err != nil {
return 0, err
}
// write out the field name
_, err = w.Write([]byte(fieldName))
if err != nil {
return 0, err
}
}
// now write out the fields index
rv = uint64(w.Count())
// now write out the stored doc index
for fieldID := range memSegment.FieldsInv {
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
if err != nil {
return 0, err
}
}
return rv, nil
}
// FooterSize is the size of the footer record in bytes
// crc + ver + chunk + field offset + stored offset + num docs
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8

View File

@ -5,7 +5,6 @@ import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"os"
@ -36,7 +35,6 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
fieldsInv := mergeFields(segments)
fieldsMap := mapFields(fieldsInv)
newSegDocCount := computeNewDocCount(segments, drops)
var newDocNums [][]uint64
@ -57,7 +55,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
}
var fieldsIndexOffset uint64
fieldsIndexOffset, err = persistMergedFields(fieldsInv, cr, dictLocs)
fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
if err != nil {
return nil, err
}
@ -106,33 +104,6 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
return newSegDocCount
}
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
var buffer bytes.Buffer
// write out postings list to memory so we know the len
postingsListLen, err := r.WriteTo(&buffer)
if err != nil {
return 0, err
}
var tw int
// write out the length of this postings list
buf := make([]byte, binary.MaxVarintLen64)
n := binary.PutUvarint(buf, uint64(postingsListLen))
nw, err := w.Write(buf[:n])
tw += nw
if err != nil {
return tw, err
}
// write out the postings list itself
nw, err = w.Write(buffer.Bytes())
tw += nw
if err != nil {
return tw, err
}
return tw, nil
}
func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64,
w *CountHashWriter) ([]uint64, error) {
@ -489,48 +460,3 @@ func mergeFields(segments []*Segment) []string {
return rv
}
func persistMergedFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
var rv uint64
var fieldStarts []uint64
for fieldID, fieldName := range fieldsInv {
// record start of this field
fieldStarts = append(fieldStarts, uint64(w.Count()))
buf := make([]byte, binary.MaxVarintLen64)
// write out dict location for this field
n := binary.PutUvarint(buf, dictLocs[fieldID])
_, err := w.Write(buf[:n])
if err != nil {
return 0, err
}
// write out the length of the field name
n = binary.PutUvarint(buf, uint64(len(fieldName)))
_, err = w.Write(buf[:n])
if err != nil {
return 0, err
}
// write out the field name
_, err = w.Write([]byte(fieldName))
if err != nil {
return 0, err
}
}
// now write out the fields index
rv = uint64(w.Count())
// now write out the stored doc index
for fieldID := range fieldsInv {
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
if err != nil {
return 0, err
}
}
return rv, nil
}

View File

@ -0,0 +1,95 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"encoding/binary"
"io"
"github.com/RoaringBitmap/roaring"
)
// writes out the length of the roaring bitmap in bytes as varint
// then writs out the roaring bitmap itself
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
var buffer bytes.Buffer
// write out postings list to memory so we know the len
postingsListLen, err := r.WriteTo(&buffer)
if err != nil {
return 0, err
}
var tw int
// write out the length of this postings list
buf := make([]byte, binary.MaxVarintLen64)
n := binary.PutUvarint(buf, uint64(postingsListLen))
nw, err := w.Write(buf[:n])
tw += nw
if err != nil {
return tw, err
}
// write out the postings list itself
nw, err = w.Write(buffer.Bytes())
tw += nw
if err != nil {
return tw, err
}
return tw, nil
}
func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
var rv uint64
var fieldStarts []uint64
for fieldID, fieldName := range fieldsInv {
// record start of this field
fieldStarts = append(fieldStarts, uint64(w.Count()))
buf := make([]byte, binary.MaxVarintLen64)
// write out dict location for this field
n := binary.PutUvarint(buf, dictLocs[fieldID])
_, err := w.Write(buf[:n])
if err != nil {
return 0, err
}
// write out the length of the field name
n = binary.PutUvarint(buf, uint64(len(fieldName)))
_, err = w.Write(buf[:n])
if err != nil {
return 0, err
}
// write out the field name
_, err = w.Write([]byte(fieldName))
if err != nil {
return 0, err
}
}
// now write out the fields index
rv = uint64(w.Count())
for fieldID := range fieldsInv {
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
if err != nil {
return 0, err
}
}
return rv, nil
}