2017-12-28 07:35:33 +01:00
|
|
|
// Copyright (c) 2017 Couchbase, Inc.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package zap
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/binary"
|
|
|
|
"fmt"
|
|
|
|
"math"
|
2018-03-02 02:12:16 +01:00
|
|
|
"reflect"
|
2017-12-28 07:35:33 +01:00
|
|
|
"sort"
|
|
|
|
|
|
|
|
"github.com/blevesearch/bleve/index"
|
2018-03-02 02:12:16 +01:00
|
|
|
"github.com/blevesearch/bleve/size"
|
2017-12-28 07:35:33 +01:00
|
|
|
"github.com/golang/snappy"
|
|
|
|
)
|
|
|
|
|
2018-03-02 02:12:16 +01:00
|
|
|
var reflectStaticSizedocValueIterator int
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
var dvi docValueIterator
|
|
|
|
reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
|
|
|
|
}
|
|
|
|
|
2017-12-28 07:35:33 +01:00
|
|
|
type docValueIterator struct {
|
|
|
|
field string
|
|
|
|
curChunkNum uint64
|
|
|
|
numChunks uint64
|
|
|
|
chunkLens []uint64
|
|
|
|
dvDataLoc uint64
|
2017-12-29 17:09:29 +01:00
|
|
|
curChunkHeader []MetaData
|
2017-12-28 07:35:33 +01:00
|
|
|
curChunkData []byte // compressed data cache
|
|
|
|
}
|
|
|
|
|
2018-03-02 02:12:16 +01:00
|
|
|
func (di *docValueIterator) size() int {
|
|
|
|
return reflectStaticSizedocValueIterator + size.SizeOfPtr +
|
|
|
|
len(di.field) +
|
|
|
|
len(di.chunkLens)*size.SizeOfUint64 +
|
|
|
|
len(di.curChunkHeader)*reflectStaticSizeMetaData +
|
|
|
|
len(di.curChunkData)
|
2018-01-12 21:11:11 +01:00
|
|
|
}
|
|
|
|
|
2017-12-28 07:35:33 +01:00
|
|
|
func (di *docValueIterator) fieldName() string {
|
|
|
|
return di.field
|
|
|
|
}
|
|
|
|
|
|
|
|
func (di *docValueIterator) curChunkNumber() uint64 {
|
|
|
|
return di.curChunkNum
|
|
|
|
}
|
|
|
|
|
2018-01-18 03:46:57 +01:00
|
|
|
func (s *SegmentBase) loadFieldDocValueIterator(field string,
|
2017-12-28 07:35:33 +01:00
|
|
|
fieldDvLoc uint64) (*docValueIterator, error) {
|
|
|
|
// get the docValue offset for the given fields
|
2017-12-30 12:24:06 +01:00
|
|
|
if fieldDvLoc == fieldNotUninverted {
|
2018-01-17 20:29:32 +01:00
|
|
|
return nil, fmt.Errorf("loadFieldDocValueIterator: "+
|
2017-12-28 07:35:33 +01:00
|
|
|
"no docValues found for field: %s", field)
|
|
|
|
}
|
|
|
|
|
|
|
|
// read the number of chunks, chunk lengths
|
2017-12-28 08:35:25 +01:00
|
|
|
var offset, clen uint64
|
2018-01-18 03:46:57 +01:00
|
|
|
numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
|
2017-12-28 07:35:33 +01:00
|
|
|
if read <= 0 {
|
|
|
|
return nil, fmt.Errorf("failed to read the field "+
|
|
|
|
"doc values for field %s", field)
|
|
|
|
}
|
|
|
|
offset += uint64(read)
|
|
|
|
|
|
|
|
fdvIter := &docValueIterator{
|
|
|
|
curChunkNum: math.MaxUint64,
|
|
|
|
field: field,
|
|
|
|
chunkLens: make([]uint64, int(numChunks)),
|
|
|
|
}
|
|
|
|
for i := 0; i < int(numChunks); i++ {
|
2018-01-18 03:46:57 +01:00
|
|
|
clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
|
2017-12-28 08:35:25 +01:00
|
|
|
if read <= 0 {
|
|
|
|
return nil, fmt.Errorf("corrupted chunk length during segment load")
|
|
|
|
}
|
|
|
|
fdvIter.chunkLens[i] = clen
|
2017-12-28 07:35:33 +01:00
|
|
|
offset += uint64(read)
|
|
|
|
}
|
|
|
|
|
|
|
|
fdvIter.dvDataLoc = fieldDvLoc + offset
|
|
|
|
return fdvIter, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (di *docValueIterator) loadDvChunk(chunkNumber,
|
2018-01-18 03:46:57 +01:00
|
|
|
localDocNum uint64, s *SegmentBase) error {
|
2017-12-28 07:35:33 +01:00
|
|
|
// advance to the chunk where the docValues
|
2018-02-03 19:51:24 +01:00
|
|
|
// reside for the given docNum
|
2017-12-28 07:35:33 +01:00
|
|
|
destChunkDataLoc := di.dvDataLoc
|
|
|
|
for i := 0; i < int(chunkNumber); i++ {
|
|
|
|
destChunkDataLoc += di.chunkLens[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
curChunkSize := di.chunkLens[chunkNumber]
|
|
|
|
// read the number of docs reside in the chunk
|
2018-01-18 03:46:57 +01:00
|
|
|
numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
|
2017-12-28 07:35:33 +01:00
|
|
|
if read <= 0 {
|
|
|
|
return fmt.Errorf("failed to read the chunk")
|
|
|
|
}
|
|
|
|
chunkMetaLoc := destChunkDataLoc + uint64(read)
|
|
|
|
|
|
|
|
offset := uint64(0)
|
2017-12-29 17:09:29 +01:00
|
|
|
di.curChunkHeader = make([]MetaData, int(numDocs))
|
2017-12-28 07:35:33 +01:00
|
|
|
for i := 0; i < int(numDocs); i++ {
|
2018-02-03 19:51:24 +01:00
|
|
|
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
2017-12-28 07:35:33 +01:00
|
|
|
offset += uint64(read)
|
2018-03-12 11:06:46 +01:00
|
|
|
di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
2017-12-28 07:35:33 +01:00
|
|
|
offset += uint64(read)
|
|
|
|
}
|
|
|
|
|
|
|
|
compressedDataLoc := chunkMetaLoc + offset
|
|
|
|
dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
|
2018-01-18 03:46:57 +01:00
|
|
|
di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
|
2017-12-28 07:35:33 +01:00
|
|
|
di.curChunkNum = chunkNumber
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-02-03 19:51:24 +01:00
|
|
|
func (di *docValueIterator) visitDocValues(docNum uint64,
|
2017-12-28 07:35:33 +01:00
|
|
|
visitor index.DocumentFieldTermVisitor) error {
|
2018-02-03 19:51:24 +01:00
|
|
|
// binary search the term locations for the docNum
|
2018-03-12 11:06:46 +01:00
|
|
|
start, end := di.getDocValueLocs(docNum)
|
|
|
|
if start == math.MaxUint64 || end == math.MaxUint64 {
|
2017-12-28 07:35:33 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// uncompress the already loaded data
|
|
|
|
uncompressed, err := snappy.Decode(nil, di.curChunkData)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2018-02-03 19:51:24 +01:00
|
|
|
// pick the terms for the given docNum
|
2018-03-12 11:06:46 +01:00
|
|
|
uncompressed = uncompressed[start:end]
|
2017-12-28 07:35:33 +01:00
|
|
|
for {
|
|
|
|
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
|
|
|
if i < 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
visitor(di.field, uncompressed[0:i])
|
|
|
|
uncompressed = uncompressed[i+1:]
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-02-03 19:51:24 +01:00
|
|
|
func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
2017-12-28 07:35:33 +01:00
|
|
|
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
|
2018-02-03 19:51:24 +01:00
|
|
|
return di.curChunkHeader[i].DocNum >= docNum
|
2017-12-28 07:35:33 +01:00
|
|
|
})
|
2018-02-03 19:51:24 +01:00
|
|
|
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
|
2018-03-13 09:36:48 +01:00
|
|
|
return ReadDocValueBoundary(i, di.curChunkHeader)
|
2017-12-28 07:35:33 +01:00
|
|
|
}
|
|
|
|
return math.MaxUint64, math.MaxUint64
|
|
|
|
}
|
|
|
|
|
2018-01-08 06:28:33 +01:00
|
|
|
// VisitDocumentFieldTerms is an implementation of the
|
|
|
|
// DocumentFieldTermVisitable interface
|
2018-01-18 03:46:57 +01:00
|
|
|
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
2017-12-28 07:35:33 +01:00
|
|
|
visitor index.DocumentFieldTermVisitor) error {
|
2018-01-18 03:46:57 +01:00
|
|
|
fieldIDPlus1 := uint16(0)
|
2017-12-28 07:35:33 +01:00
|
|
|
ok := true
|
|
|
|
for _, field := range fields {
|
2018-01-18 03:46:57 +01:00
|
|
|
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
|
2017-12-28 07:35:33 +01:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
// find the chunkNumber where the docValues are stored
|
|
|
|
docInChunk := localDocNum / uint64(s.chunkFactor)
|
|
|
|
|
2018-01-18 03:46:57 +01:00
|
|
|
if dvIter, exists := s.fieldDvIterMap[fieldIDPlus1-1]; exists &&
|
2017-12-28 07:35:33 +01:00
|
|
|
dvIter != nil {
|
|
|
|
// check if the chunk is already loaded
|
|
|
|
if docInChunk != dvIter.curChunkNumber() {
|
|
|
|
err := dvIter.loadDvChunk(docInChunk, localDocNum, s)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-28 08:53:57 +01:00
|
|
|
_ = dvIter.visitDocValues(localDocNum, visitor)
|
2017-12-28 07:35:33 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2018-01-08 06:28:33 +01:00
|
|
|
|
|
|
|
// VisitableDocValueFields returns the list of fields with
|
|
|
|
// persisted doc value terms ready to be visitable using the
|
|
|
|
// VisitDocumentFieldTerms method.
|
|
|
|
func (s *Segment) VisitableDocValueFields() ([]string, error) {
|
|
|
|
var rv []string
|
|
|
|
for fieldID, field := range s.fieldsInv {
|
|
|
|
if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok &&
|
|
|
|
dvIter != nil {
|
|
|
|
rv = append(rv, field)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return rv, nil
|
|
|
|
}
|