extrac common functionality between build/merge
This commit is contained in:
parent
cd45487cb3
commit
1cd3fd7fbe
@ -77,7 +77,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
|
|||||||
}
|
}
|
||||||
|
|
||||||
var fieldIndexStart uint64
|
var fieldIndexStart uint64
|
||||||
fieldIndexStart, err = persistFields(memSegment, cr, dictLocs)
|
fieldIndexStart, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -573,51 +573,6 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
|
|||||||
return rv, nil
|
return rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
|
|
||||||
var rv uint64
|
|
||||||
|
|
||||||
var fieldStarts []uint64
|
|
||||||
for fieldID, fieldName := range memSegment.FieldsInv {
|
|
||||||
|
|
||||||
// record start of this field
|
|
||||||
fieldStarts = append(fieldStarts, uint64(w.Count()))
|
|
||||||
|
|
||||||
buf := make([]byte, binary.MaxVarintLen64)
|
|
||||||
// write out dict location for this field
|
|
||||||
n := binary.PutUvarint(buf, dictLocs[fieldID])
|
|
||||||
_, err := w.Write(buf[:n])
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// write out the length of the field name
|
|
||||||
n = binary.PutUvarint(buf, uint64(len(fieldName)))
|
|
||||||
_, err = w.Write(buf[:n])
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// write out the field name
|
|
||||||
_, err = w.Write([]byte(fieldName))
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now write out the fields index
|
|
||||||
rv = uint64(w.Count())
|
|
||||||
|
|
||||||
// now write out the stored doc index
|
|
||||||
for fieldID := range memSegment.FieldsInv {
|
|
||||||
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// FooterSize is the size of the footer record in bytes
|
// FooterSize is the size of the footer record in bytes
|
||||||
// crc + ver + chunk + field offset + stored offset + num docs
|
// crc + ver + chunk + field offset + stored offset + num docs
|
||||||
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
|
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
|
||||||
|
@ -5,7 +5,6 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
@ -36,7 +35,6 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
|||||||
|
|
||||||
fieldsInv := mergeFields(segments)
|
fieldsInv := mergeFields(segments)
|
||||||
fieldsMap := mapFields(fieldsInv)
|
fieldsMap := mapFields(fieldsInv)
|
||||||
|
|
||||||
newSegDocCount := computeNewDocCount(segments, drops)
|
newSegDocCount := computeNewDocCount(segments, drops)
|
||||||
|
|
||||||
var newDocNums [][]uint64
|
var newDocNums [][]uint64
|
||||||
@ -57,7 +55,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
|||||||
}
|
}
|
||||||
|
|
||||||
var fieldsIndexOffset uint64
|
var fieldsIndexOffset uint64
|
||||||
fieldsIndexOffset, err = persistMergedFields(fieldsInv, cr, dictLocs)
|
fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -106,33 +104,6 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
|
|||||||
return newSegDocCount
|
return newSegDocCount
|
||||||
}
|
}
|
||||||
|
|
||||||
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
|
|
||||||
var buffer bytes.Buffer
|
|
||||||
// write out postings list to memory so we know the len
|
|
||||||
postingsListLen, err := r.WriteTo(&buffer)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
var tw int
|
|
||||||
// write out the length of this postings list
|
|
||||||
buf := make([]byte, binary.MaxVarintLen64)
|
|
||||||
n := binary.PutUvarint(buf, uint64(postingsListLen))
|
|
||||||
nw, err := w.Write(buf[:n])
|
|
||||||
tw += nw
|
|
||||||
if err != nil {
|
|
||||||
return tw, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// write out the postings list itself
|
|
||||||
nw, err = w.Write(buffer.Bytes())
|
|
||||||
tw += nw
|
|
||||||
if err != nil {
|
|
||||||
return tw, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return tw, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
||||||
fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64,
|
fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64,
|
||||||
w *CountHashWriter) ([]uint64, error) {
|
w *CountHashWriter) ([]uint64, error) {
|
||||||
@ -489,48 +460,3 @@ func mergeFields(segments []*Segment) []string {
|
|||||||
|
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
|
||||||
func persistMergedFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
|
|
||||||
var rv uint64
|
|
||||||
|
|
||||||
var fieldStarts []uint64
|
|
||||||
for fieldID, fieldName := range fieldsInv {
|
|
||||||
|
|
||||||
// record start of this field
|
|
||||||
fieldStarts = append(fieldStarts, uint64(w.Count()))
|
|
||||||
|
|
||||||
buf := make([]byte, binary.MaxVarintLen64)
|
|
||||||
// write out dict location for this field
|
|
||||||
n := binary.PutUvarint(buf, dictLocs[fieldID])
|
|
||||||
_, err := w.Write(buf[:n])
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// write out the length of the field name
|
|
||||||
n = binary.PutUvarint(buf, uint64(len(fieldName)))
|
|
||||||
_, err = w.Write(buf[:n])
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// write out the field name
|
|
||||||
_, err = w.Write([]byte(fieldName))
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now write out the fields index
|
|
||||||
rv = uint64(w.Count())
|
|
||||||
|
|
||||||
// now write out the stored doc index
|
|
||||||
for fieldID := range fieldsInv {
|
|
||||||
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return rv, nil
|
|
||||||
}
|
|
||||||
|
95
index/scorch/segment/zap/write.go
Normal file
95
index/scorch/segment/zap/write.go
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
// Copyright (c) 2017 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package zap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/RoaringBitmap/roaring"
|
||||||
|
)
|
||||||
|
|
||||||
|
// writes out the length of the roaring bitmap in bytes as varint
|
||||||
|
// then writs out the roaring bitmap itself
|
||||||
|
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
|
||||||
|
var buffer bytes.Buffer
|
||||||
|
// write out postings list to memory so we know the len
|
||||||
|
postingsListLen, err := r.WriteTo(&buffer)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
var tw int
|
||||||
|
// write out the length of this postings list
|
||||||
|
buf := make([]byte, binary.MaxVarintLen64)
|
||||||
|
n := binary.PutUvarint(buf, uint64(postingsListLen))
|
||||||
|
nw, err := w.Write(buf[:n])
|
||||||
|
tw += nw
|
||||||
|
if err != nil {
|
||||||
|
return tw, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// write out the postings list itself
|
||||||
|
nw, err = w.Write(buffer.Bytes())
|
||||||
|
tw += nw
|
||||||
|
if err != nil {
|
||||||
|
return tw, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tw, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
|
||||||
|
var rv uint64
|
||||||
|
|
||||||
|
var fieldStarts []uint64
|
||||||
|
for fieldID, fieldName := range fieldsInv {
|
||||||
|
|
||||||
|
// record start of this field
|
||||||
|
fieldStarts = append(fieldStarts, uint64(w.Count()))
|
||||||
|
|
||||||
|
buf := make([]byte, binary.MaxVarintLen64)
|
||||||
|
// write out dict location for this field
|
||||||
|
n := binary.PutUvarint(buf, dictLocs[fieldID])
|
||||||
|
_, err := w.Write(buf[:n])
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// write out the length of the field name
|
||||||
|
n = binary.PutUvarint(buf, uint64(len(fieldName)))
|
||||||
|
_, err = w.Write(buf[:n])
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// write out the field name
|
||||||
|
_, err = w.Write([]byte(fieldName))
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now write out the fields index
|
||||||
|
rv = uint64(w.Count())
|
||||||
|
for fieldID := range fieldsInv {
|
||||||
|
err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv, nil
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user