extrac common functionality between build/merge
This commit is contained in:
parent
cd45487cb3
commit
1cd3fd7fbe
@ -77,7 +77,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
|
||||
}
|
||||
|
||||
var fieldIndexStart uint64
|
||||
fieldIndexStart, err = persistFields(memSegment, cr, dictLocs)
|
||||
fieldIndexStart, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -573,51 +573,6 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
|
||||
var rv uint64
|
||||
|
||||
var fieldStarts []uint64
|
||||
for fieldID, fieldName := range memSegment.FieldsInv {
|
||||
|
||||
// record start of this field
|
||||
fieldStarts = append(fieldStarts, uint64(w.Count()))
|
||||
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
// write out dict location for this field
|
||||
n := binary.PutUvarint(buf, dictLocs[fieldID])
|
||||
_, err := w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write out the length of the field name
|
||||
n = binary.PutUvarint(buf, uint64(len(fieldName)))
|
||||
_, err = w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write out the field name
|
||||
_, err = w.Write([]byte(fieldName))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// now write out the fields index
|
||||
rv = uint64(w.Count())
|
||||
|
||||
// now write out the stored doc index
|
||||
for fieldID := range memSegment.FieldsInv {
|
||||
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// FooterSize is the size of the footer record in bytes
|
||||
// crc + ver + chunk + field offset + stored offset + num docs
|
||||
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
|
||||
|
@ -5,7 +5,6 @@ import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
@ -36,7 +35,6 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
||||
|
||||
fieldsInv := mergeFields(segments)
|
||||
fieldsMap := mapFields(fieldsInv)
|
||||
|
||||
newSegDocCount := computeNewDocCount(segments, drops)
|
||||
|
||||
var newDocNums [][]uint64
|
||||
@ -57,7 +55,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
||||
}
|
||||
|
||||
var fieldsIndexOffset uint64
|
||||
fieldsIndexOffset, err = persistMergedFields(fieldsInv, cr, dictLocs)
|
||||
fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -106,33 +104,6 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
|
||||
return newSegDocCount
|
||||
}
|
||||
|
||||
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
|
||||
var buffer bytes.Buffer
|
||||
// write out postings list to memory so we know the len
|
||||
postingsListLen, err := r.WriteTo(&buffer)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
var tw int
|
||||
// write out the length of this postings list
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
n := binary.PutUvarint(buf, uint64(postingsListLen))
|
||||
nw, err := w.Write(buf[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
// write out the postings list itself
|
||||
nw, err = w.Write(buffer.Bytes())
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
||||
fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64,
|
||||
w *CountHashWriter) ([]uint64, error) {
|
||||
@ -489,48 +460,3 @@ func mergeFields(segments []*Segment) []string {
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func persistMergedFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
|
||||
var rv uint64
|
||||
|
||||
var fieldStarts []uint64
|
||||
for fieldID, fieldName := range fieldsInv {
|
||||
|
||||
// record start of this field
|
||||
fieldStarts = append(fieldStarts, uint64(w.Count()))
|
||||
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
// write out dict location for this field
|
||||
n := binary.PutUvarint(buf, dictLocs[fieldID])
|
||||
_, err := w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write out the length of the field name
|
||||
n = binary.PutUvarint(buf, uint64(len(fieldName)))
|
||||
_, err = w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write out the field name
|
||||
_, err = w.Write([]byte(fieldName))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// now write out the fields index
|
||||
rv = uint64(w.Count())
|
||||
|
||||
// now write out the stored doc index
|
||||
for fieldID := range fieldsInv {
|
||||
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
95
index/scorch/segment/zap/write.go
Normal file
95
index/scorch/segment/zap/write.go
Normal file
@ -0,0 +1,95 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
)
|
||||
|
||||
// writes out the length of the roaring bitmap in bytes as varint
|
||||
// then writs out the roaring bitmap itself
|
||||
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
|
||||
var buffer bytes.Buffer
|
||||
// write out postings list to memory so we know the len
|
||||
postingsListLen, err := r.WriteTo(&buffer)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
var tw int
|
||||
// write out the length of this postings list
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
n := binary.PutUvarint(buf, uint64(postingsListLen))
|
||||
nw, err := w.Write(buf[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
// write out the postings list itself
|
||||
nw, err = w.Write(buffer.Bytes())
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
|
||||
var rv uint64
|
||||
|
||||
var fieldStarts []uint64
|
||||
for fieldID, fieldName := range fieldsInv {
|
||||
|
||||
// record start of this field
|
||||
fieldStarts = append(fieldStarts, uint64(w.Count()))
|
||||
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
// write out dict location for this field
|
||||
n := binary.PutUvarint(buf, dictLocs[fieldID])
|
||||
_, err := w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write out the length of the field name
|
||||
n = binary.PutUvarint(buf, uint64(len(fieldName)))
|
||||
_, err = w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write out the field name
|
||||
_, err = w.Write([]byte(fieldName))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// now write out the fields index
|
||||
rv = uint64(w.Count())
|
||||
for fieldID := range fieldsInv {
|
||||
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user