scorch zap optimize FST val encoding for terms with 1 hit
NOTE: this is a scorch zap file format change / bump to version 4. In this optimization, the uint64 val stored in the vellum FST (term dictionary) now may either be a uint64 postingsOffset (same as before this change) or a uint64 encoding of the docNum + norm (in the case where a term appears in just a single doc).
This commit is contained in:
parent
f04226d10b
commit
eac9808990
|
@ -18,7 +18,9 @@ import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||||
"github.com/couchbase/vellum"
|
"github.com/couchbase/vellum"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
@ -57,7 +59,19 @@ var exploreCmd = &cobra.Command{
|
||||||
return fmt.Errorf("error looking for term : %v", err)
|
return fmt.Errorf("error looking for term : %v", err)
|
||||||
}
|
}
|
||||||
if exists {
|
if exists {
|
||||||
fmt.Printf("postings list begins at %d (%x)\n", postingsAddr, postingsAddr)
|
fmt.Printf("fst val is %d (%x)\n", postingsAddr, postingsAddr)
|
||||||
|
|
||||||
|
if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
|
||||||
|
docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
|
||||||
|
norm := math.Float32frombits(uint32(normBits))
|
||||||
|
fmt.Printf("Posting List is 1-hit encoded, docNum: %d, norm: %f\n",
|
||||||
|
docNum, norm)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if postingsAddr&zap.FSTValEncodingMask != zap.FSTValEncodingGeneral {
|
||||||
|
return fmt.Errorf("unknown fst val encoding")
|
||||||
|
}
|
||||||
|
|
||||||
var n uint64
|
var n uint64
|
||||||
freqAddr, read := binary.Uvarint(data[postingsAddr : postingsAddr+binary.MaxVarintLen64])
|
freqAddr, read := binary.Uvarint(data[postingsAddr : postingsAddr+binary.MaxVarintLen64])
|
||||||
|
|
|
@ -28,7 +28,7 @@ import (
|
||||||
"github.com/golang/snappy"
|
"github.com/golang/snappy"
|
||||||
)
|
)
|
||||||
|
|
||||||
const version uint32 = 3
|
const version uint32 = 4
|
||||||
|
|
||||||
const fieldNotUninverted = math.MaxUint64
|
const fieldNotUninverted = math.MaxUint64
|
||||||
|
|
||||||
|
|
|
@ -130,3 +130,7 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
|
||||||
}
|
}
|
||||||
return tw, nil
|
return tw, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *chunkedIntCoder) FinalSize() int {
|
||||||
|
return len(c.final)
|
||||||
|
}
|
||||||
|
|
|
@ -225,6 +225,21 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
newRoaring := roaring.NewBitmap()
|
newRoaring := roaring.NewBitmap()
|
||||||
newRoaringLocs := roaring.NewBitmap()
|
newRoaringLocs := roaring.NewBitmap()
|
||||||
|
|
||||||
|
var lastDocNum, lastFreq, lastNorm uint64
|
||||||
|
|
||||||
|
// determines whether to use "1-hit" encoding optimization
|
||||||
|
// when a term appears in only 1 doc, with no loc info,
|
||||||
|
// has freq of 1, and the docNum fits into 31-bits
|
||||||
|
use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) {
|
||||||
|
if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 {
|
||||||
|
docNum := uint64(newRoaring.Minimum())
|
||||||
|
if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 {
|
||||||
|
return true, docNum, lastNorm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
finishTerm := func(term []byte) error {
|
finishTerm := func(term []byte) error {
|
||||||
if term == nil {
|
if term == nil {
|
||||||
return nil
|
return nil
|
||||||
|
@ -233,8 +248,16 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
tfEncoder.Close()
|
tfEncoder.Close()
|
||||||
locEncoder.Close()
|
locEncoder.Close()
|
||||||
|
|
||||||
if newRoaring.GetCardinality() > 0 {
|
termCardinality := newRoaring.GetCardinality()
|
||||||
// this field/term actually has hits in the new segment, lets write it down
|
|
||||||
|
encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
|
||||||
|
if encodeAs1Hit {
|
||||||
|
err = newVellum.Insert(term, FSTValEncode1Hit(docNum1Hit, normBits1Hit))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else if termCardinality > 0 {
|
||||||
|
// this field/term has hits in the new segment
|
||||||
freqOffset := uint64(w.Count())
|
freqOffset := uint64(w.Count())
|
||||||
_, err := tfEncoder.Write(w)
|
_, err := tfEncoder.Write(w)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -251,7 +274,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
postingOffset := uint64(w.Count())
|
postingOffset := uint64(w.Count())
|
||||||
|
|
||||||
// write out the start of the term info
|
// write out the start of the term info
|
||||||
n := binary.PutUvarint(bufMaxVarintLen64, freqOffset)
|
n := binary.PutUvarint(bufMaxVarintLen64, freqOffset)
|
||||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
_, err = w.Write(bufMaxVarintLen64[:n])
|
||||||
|
@ -287,6 +309,10 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
tfEncoder.Reset()
|
tfEncoder.Reset()
|
||||||
locEncoder.Reset()
|
locEncoder.Reset()
|
||||||
|
|
||||||
|
lastDocNum = 0
|
||||||
|
lastFreq = 0
|
||||||
|
lastNorm = 0
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -315,7 +341,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
|
|
||||||
postItr = postings.iterator(postItr)
|
postItr = postings.iterator(postItr)
|
||||||
|
|
||||||
nextDocNum, nextFreqNormBytes, nextLocBytes, err2 := postItr.nextBytes()
|
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err2 :=
|
||||||
|
postItr.nextBytes()
|
||||||
for err2 == nil && len(nextFreqNormBytes) > 0 {
|
for err2 == nil && len(nextFreqNormBytes) > 0 {
|
||||||
hitNewDocNum := newDocNumsI[nextDocNum]
|
hitNewDocNum := newDocNumsI[nextDocNum]
|
||||||
if hitNewDocNum == docDropped {
|
if hitNewDocNum == docDropped {
|
||||||
|
@ -339,7 +366,12 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
docTermMap[hitNewDocNum] =
|
docTermMap[hitNewDocNum] =
|
||||||
append(append(docTermMap[hitNewDocNum], term...), termSeparator)
|
append(append(docTermMap[hitNewDocNum], term...), termSeparator)
|
||||||
|
|
||||||
nextDocNum, nextFreqNormBytes, nextLocBytes, err2 = postItr.nextBytes()
|
lastDocNum = hitNewDocNum
|
||||||
|
lastFreq = nextFreq
|
||||||
|
lastNorm = nextNorm
|
||||||
|
|
||||||
|
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err2 =
|
||||||
|
postItr.nextBytes()
|
||||||
}
|
}
|
||||||
if err2 != nil {
|
if err2 != nil {
|
||||||
return nil, 0, err2
|
return nil, 0, err2
|
||||||
|
|
|
@ -859,3 +859,12 @@ func TestMergeBytesWritten(t *testing.T) {
|
||||||
|
|
||||||
testMergeWithSelf(t, seg3, 4)
|
testMergeWithSelf(t, seg3, 4)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUnder32Bits(t *testing.T) {
|
||||||
|
if !under32Bits(0) || !under32Bits(uint64(0x7fffffff)) {
|
||||||
|
t.Errorf("under32Bits bad")
|
||||||
|
}
|
||||||
|
if under32Bits(uint64(0x80000000)) || under32Bits(uint64(0x80000001)) {
|
||||||
|
t.Errorf("under32Bits wrong")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -43,6 +43,55 @@ func init() {
|
||||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FST or vellum value (uint64) encoding is determined by the top two
|
||||||
|
// highest-order or most significant bits...
|
||||||
|
//
|
||||||
|
// encoding : MSB
|
||||||
|
// name : 63 62 61...to...bit #0 (LSB)
|
||||||
|
// ----------+---+---+---------------------------------------------------
|
||||||
|
// general : 0 | 0 | 62-bits of postingsOffset.
|
||||||
|
// ~ : 0 | 1 | reserved for future.
|
||||||
|
// 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
|
||||||
|
// ~ : 1 | 1 | reserved for future.
|
||||||
|
//
|
||||||
|
// Encoding "general" is able to handle all cases, where the
|
||||||
|
// postingsOffset points to more information about the postings for
|
||||||
|
// the term.
|
||||||
|
//
|
||||||
|
// Encoding "1-hit" is used to optimize a commonly seen case when a
|
||||||
|
// term has only a single hit. For example, a term in the _id field
|
||||||
|
// will have only 1 hit. The "1-hit" encoding is used for a term
|
||||||
|
// in a field when...
|
||||||
|
//
|
||||||
|
// - term vector info is disabled for that field;
|
||||||
|
// - and, the term appears in only a single doc for that field;
|
||||||
|
// - and, the term's freq is exactly 1 in that single doc for that field;
|
||||||
|
// - and, the docNum must fit into 31-bits;
|
||||||
|
//
|
||||||
|
// Otherwise, the "general" encoding is used instead.
|
||||||
|
//
|
||||||
|
// In the "1-hit" encoding, the field in that single doc may have
|
||||||
|
// other terms, which is supported in the "1-hit" encoding by the
|
||||||
|
// positive float31 norm.
|
||||||
|
|
||||||
|
const FSTValEncodingMask = uint64(0xc000000000000000)
|
||||||
|
const FSTValEncodingGeneral = uint64(0x0000000000000000)
|
||||||
|
const FSTValEncoding1Hit = uint64(0x8000000000000000)
|
||||||
|
|
||||||
|
func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
|
||||||
|
return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
|
||||||
|
}
|
||||||
|
|
||||||
|
func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
|
||||||
|
return (mask31Bits & v), (mask31Bits & (v >> 31))
|
||||||
|
}
|
||||||
|
|
||||||
|
const mask31Bits = uint64(0x000000007fffffff)
|
||||||
|
|
||||||
|
func under32Bits(x uint64) bool {
|
||||||
|
return x <= mask31Bits
|
||||||
|
}
|
||||||
|
|
||||||
// PostingsList is an in-memory represenation of a postings list
|
// PostingsList is an in-memory represenation of a postings list
|
||||||
type PostingsList struct {
|
type PostingsList struct {
|
||||||
sb *SegmentBase
|
sb *SegmentBase
|
||||||
|
@ -52,6 +101,10 @@ type PostingsList struct {
|
||||||
locBitmap *roaring.Bitmap
|
locBitmap *roaring.Bitmap
|
||||||
postings *roaring.Bitmap
|
postings *roaring.Bitmap
|
||||||
except *roaring.Bitmap
|
except *roaring.Bitmap
|
||||||
|
|
||||||
|
// when postingsOffset == freqOffset == 0, then the postings list
|
||||||
|
// represents a "1-hit" encoding, and has the following norm
|
||||||
|
normBits1Hit uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *PostingsList) Size() int {
|
func (p *PostingsList) Size() int {
|
||||||
|
@ -85,6 +138,8 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
||||||
}
|
}
|
||||||
locDecoder := rv.locDecoder
|
locDecoder := rv.locDecoder
|
||||||
|
|
||||||
|
buf := rv.buf
|
||||||
|
|
||||||
*rv = PostingsIterator{} // clear the struct
|
*rv = PostingsIterator{} // clear the struct
|
||||||
|
|
||||||
rv.freqNormReader = freqNormReader
|
rv.freqNormReader = freqNormReader
|
||||||
|
@ -92,11 +147,17 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
||||||
|
|
||||||
rv.locReader = locReader
|
rv.locReader = locReader
|
||||||
rv.locDecoder = locDecoder
|
rv.locDecoder = locDecoder
|
||||||
|
|
||||||
|
rv.buf = buf
|
||||||
}
|
}
|
||||||
rv.postings = p
|
rv.postings = p
|
||||||
|
|
||||||
if p.postings != nil {
|
if p.postings == nil {
|
||||||
// prepare the freq chunk details
|
return rv
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.freqOffset > 0 && p.locOffset > 0 {
|
||||||
|
// "general" encoding, so prepare the freq chunk details
|
||||||
var n uint64
|
var n uint64
|
||||||
var read int
|
var read int
|
||||||
var numFreqChunks uint64
|
var numFreqChunks uint64
|
||||||
|
@ -120,15 +181,19 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
||||||
n += uint64(read)
|
n += uint64(read)
|
||||||
}
|
}
|
||||||
rv.locChunkStart = p.locOffset + n
|
rv.locChunkStart = p.locOffset + n
|
||||||
rv.locBitmap = p.locBitmap
|
} else {
|
||||||
|
// "1-hit" encoding
|
||||||
|
rv.normBits1Hit = p.normBits1Hit
|
||||||
|
}
|
||||||
|
|
||||||
rv.all = p.postings.Iterator()
|
rv.locBitmap = p.locBitmap
|
||||||
if p.except != nil {
|
|
||||||
allExcept := roaring.AndNot(p.postings, p.except)
|
rv.all = p.postings.Iterator()
|
||||||
rv.actual = allExcept.Iterator()
|
if p.except != nil {
|
||||||
} else {
|
allExcept := roaring.AndNot(p.postings, p.except)
|
||||||
rv.actual = p.postings.Iterator()
|
rv.actual = allExcept.Iterator()
|
||||||
}
|
} else {
|
||||||
|
rv.actual = p.postings.Iterator()
|
||||||
}
|
}
|
||||||
|
|
||||||
return rv
|
return rv
|
||||||
|
@ -153,6 +218,11 @@ func (p *PostingsList) Count() uint64 {
|
||||||
func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
|
func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
|
||||||
rv.postingsOffset = postingsOffset
|
rv.postingsOffset = postingsOffset
|
||||||
|
|
||||||
|
// handle "1-hit" encoding special case
|
||||||
|
if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
|
||||||
|
return rv.init1Hit(postingsOffset)
|
||||||
|
}
|
||||||
|
|
||||||
// read the location of the freq/norm details
|
// read the location of the freq/norm details
|
||||||
var n uint64
|
var n uint64
|
||||||
var read int
|
var read int
|
||||||
|
@ -193,6 +263,24 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var emptyRoaring = roaring.NewBitmap()
|
||||||
|
|
||||||
|
func (rv *PostingsList) init1Hit(fstVal uint64) error {
|
||||||
|
docNum, normBits := FSTValDecode1Hit(fstVal)
|
||||||
|
|
||||||
|
rv.locBitmap = emptyRoaring
|
||||||
|
|
||||||
|
rv.postings = roaring.NewBitmap()
|
||||||
|
rv.postings.Add(uint32(docNum))
|
||||||
|
|
||||||
|
// TODO: we can likely do better than allocating a roaring bitmap
|
||||||
|
// with just 1 entry, but for now reuse existing machinery
|
||||||
|
|
||||||
|
rv.normBits1Hit = normBits
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// PostingsIterator provides a way to iterate through the postings list
|
// PostingsIterator provides a way to iterate through the postings list
|
||||||
type PostingsIterator struct {
|
type PostingsIterator struct {
|
||||||
postings *PostingsList
|
postings *PostingsList
|
||||||
|
@ -219,6 +307,10 @@ type PostingsIterator struct {
|
||||||
|
|
||||||
next Posting // reused across Next() calls
|
next Posting // reused across Next() calls
|
||||||
nextLocs []Location // reused across Next() calls
|
nextLocs []Location // reused across Next() calls
|
||||||
|
|
||||||
|
normBits1Hit uint64
|
||||||
|
|
||||||
|
buf []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *PostingsIterator) Size() int {
|
func (i *PostingsIterator) Size() int {
|
||||||
|
@ -244,7 +336,8 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||||
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
|
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
|
||||||
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
|
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
|
||||||
}
|
}
|
||||||
// load correct chunk bytes
|
|
||||||
|
// load freq chunk bytes
|
||||||
start := i.freqChunkStart
|
start := i.freqChunkStart
|
||||||
for j := 0; j < chunk; j++ {
|
for j := 0; j < chunk; j++ {
|
||||||
start += i.freqChunkLens[j]
|
start += i.freqChunkLens[j]
|
||||||
|
@ -258,6 +351,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||||
i.freqNormReader.Reset(i.currChunkFreqNorm)
|
i.freqNormReader.Reset(i.currChunkFreqNorm)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// load loc chunk bytes
|
||||||
start = i.locChunkStart
|
start = i.locChunkStart
|
||||||
for j := 0; j < chunk; j++ {
|
for j := 0; j < chunk; j++ {
|
||||||
start += i.locChunkLens[j]
|
start += i.locChunkLens[j]
|
||||||
|
@ -270,11 +364,16 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||||
} else {
|
} else {
|
||||||
i.locReader.Reset(i.currChunkLoc)
|
i.locReader.Reset(i.currChunkLoc)
|
||||||
}
|
}
|
||||||
|
|
||||||
i.currChunk = uint32(chunk)
|
i.currChunk = uint32(chunk)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
|
func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
|
||||||
|
if i.normBits1Hit != 0 {
|
||||||
|
return 1, i.normBits1Hit, nil
|
||||||
|
}
|
||||||
|
|
||||||
freq, err := i.freqNormDecoder.GetU64()
|
freq, err := i.freqNormDecoder.GetU64()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, fmt.Errorf("error reading frequency: %v", err)
|
return 0, 0, fmt.Errorf("error reading frequency: %v", err)
|
||||||
|
@ -360,6 +459,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
rv.norm = math.Float32frombits(uint32(normBits))
|
rv.norm = math.Float32frombits(uint32(normBits))
|
||||||
|
|
||||||
if i.locBitmap.Contains(uint32(docNum)) {
|
if i.locBitmap.Contains(uint32(docNum)) {
|
||||||
// read off 'freq' locations, into reused slices
|
// read off 'freq' locations, into reused slices
|
||||||
if cap(i.nextLocs) >= int(rv.freq) {
|
if cap(i.nextLocs) >= int(rv.freq) {
|
||||||
|
@ -386,33 +486,40 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||||
|
|
||||||
// nextBytes returns the docNum and the encoded freq & loc bytes for
|
// nextBytes returns the docNum and the encoded freq & loc bytes for
|
||||||
// the next posting
|
// the next posting
|
||||||
func (i *PostingsIterator) nextBytes() (uint64, []byte, []byte, error) {
|
func (i *PostingsIterator) nextBytes() (
|
||||||
|
docNumOut uint64, freq uint64, normBits uint64,
|
||||||
|
bytesFreqNorm []byte, bytesLoc []byte, err error) {
|
||||||
docNum, exists, err := i.nextDocNum()
|
docNum, exists, err := i.nextDocNum()
|
||||||
if err != nil {
|
if err != nil || !exists {
|
||||||
return 0, nil, nil, err
|
return 0, 0, 0, nil, nil, err
|
||||||
}
|
}
|
||||||
if !exists {
|
|
||||||
return 0, nil, nil, nil
|
if i.normBits1Hit != 0 {
|
||||||
|
if i.buf == nil {
|
||||||
|
i.buf = make([]byte, binary.MaxVarintLen64*2)
|
||||||
|
}
|
||||||
|
n := binary.PutUvarint(i.buf, uint64(1))
|
||||||
|
n += binary.PutUvarint(i.buf, i.normBits1Hit)
|
||||||
|
return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||||
|
|
||||||
freq, _, err := i.readFreqNorm()
|
freq, normBits, err = i.readFreqNorm()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, nil, nil, err
|
return 0, 0, 0, nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||||
bytesFreqNorm := i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
|
bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
|
||||||
|
|
||||||
var bytesLoc []byte
|
|
||||||
if i.locBitmap.Contains(uint32(docNum)) {
|
if i.locBitmap.Contains(uint32(docNum)) {
|
||||||
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||||
|
|
||||||
for j := uint64(0); j < freq; j++ {
|
for j := uint64(0); j < freq; j++ {
|
||||||
err := i.readLocation(nil)
|
err := i.readLocation(nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, nil, nil, err
|
return 0, 0, 0, nil, nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -420,7 +527,7 @@ func (i *PostingsIterator) nextBytes() (uint64, []byte, []byte, error) {
|
||||||
bytesLoc = i.currChunkLoc[startLoc:endLoc]
|
bytesLoc = i.currChunkLoc[startLoc:endLoc]
|
||||||
}
|
}
|
||||||
|
|
||||||
return docNum, bytesFreqNorm, bytesLoc, nil
|
return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// nextDocNum returns the next docNum on the postings list, and also
|
// nextDocNum returns the next docNum on the postings list, and also
|
||||||
|
@ -431,8 +538,13 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
n := i.actual.Next()
|
n := i.actual.Next()
|
||||||
nChunk := n / i.postings.sb.chunkFactor
|
|
||||||
allN := i.all.Next()
|
allN := i.all.Next()
|
||||||
|
|
||||||
|
if i.normBits1Hit != 0 {
|
||||||
|
return uint64(n), true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
nChunk := n / i.postings.sb.chunkFactor
|
||||||
allNChunk := allN / i.postings.sb.chunkFactor
|
allNChunk := allN / i.postings.sb.chunkFactor
|
||||||
|
|
||||||
// n is the next actual hit (excluding some postings)
|
// n is the next actual hit (excluding some postings)
|
||||||
|
|
Loading…
Reference in New Issue