Merge pull request #857 from steveyen/replace-locsBitmap-attempt2
optimization to replace locations bitmap, attempt #2
This commit is contained in:
commit
33b1f065dc
|
@ -81,10 +81,6 @@ var exploreCmd = &cobra.Command{
|
||||||
locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||||
n += uint64(read)
|
n += uint64(read)
|
||||||
|
|
||||||
var locBitmapAddr uint64
|
|
||||||
locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
|
||||||
n += uint64(read)
|
|
||||||
|
|
||||||
var postingListLen uint64
|
var postingListLen uint64
|
||||||
postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||||
n += uint64(read)
|
n += uint64(read)
|
||||||
|
@ -131,8 +127,6 @@ var exploreCmd = &cobra.Command{
|
||||||
running2 += offset
|
running2 += offset
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("dictionary does not contain term '%s'\n", args[2])
|
fmt.Printf("dictionary does not contain term '%s'\n", args[2])
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ import (
|
||||||
"github.com/Smerity/govarint"
|
"github.com/Smerity/govarint"
|
||||||
)
|
)
|
||||||
|
|
||||||
const version uint32 = 6
|
const version uint32 = 8
|
||||||
|
|
||||||
const fieldNotUninverted = math.MaxUint64
|
const fieldNotUninverted = math.MaxUint64
|
||||||
|
|
||||||
|
|
|
@ -72,15 +72,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
|
||||||
if postings != nil {
|
if postings != nil {
|
||||||
postings.Clear()
|
postings.Clear()
|
||||||
}
|
}
|
||||||
locBitmap := rv.locBitmap
|
|
||||||
if locBitmap != nil {
|
|
||||||
locBitmap.Clear()
|
|
||||||
}
|
|
||||||
|
|
||||||
*rv = PostingsList{} // clear the struct
|
*rv = PostingsList{} // clear the struct
|
||||||
|
|
||||||
rv.postings = postings
|
rv.postings = postings
|
||||||
rv.locBitmap = locBitmap
|
|
||||||
}
|
}
|
||||||
rv.sb = d.sb
|
rv.sb = d.sb
|
||||||
rv.except = except
|
rv.except = except
|
||||||
|
|
|
@ -188,7 +188,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
newRoaring := roaring.NewBitmap()
|
newRoaring := roaring.NewBitmap()
|
||||||
newRoaringLocs := roaring.NewBitmap()
|
|
||||||
|
|
||||||
// for each field
|
// for each field
|
||||||
for fieldID, fieldName := range fieldsInv {
|
for fieldID, fieldName := range fieldsInv {
|
||||||
|
@ -234,7 +233,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
var prevTerm []byte
|
var prevTerm []byte
|
||||||
|
|
||||||
newRoaring.Clear()
|
newRoaring.Clear()
|
||||||
newRoaringLocs.Clear()
|
|
||||||
|
|
||||||
var lastDocNum, lastFreq, lastNorm uint64
|
var lastDocNum, lastFreq, lastNorm uint64
|
||||||
|
|
||||||
|
@ -259,9 +257,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
tfEncoder.Close()
|
tfEncoder.Close()
|
||||||
locEncoder.Close()
|
locEncoder.Close()
|
||||||
|
|
||||||
postingsOffset, err := writePostings(
|
postingsOffset, err := writePostings(newRoaring,
|
||||||
newRoaring, newRoaringLocs, tfEncoder, locEncoder,
|
tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
|
||||||
use1HitEncoding, w, bufMaxVarintLen64)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -274,7 +271,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
newRoaring.Clear()
|
newRoaring.Clear()
|
||||||
newRoaringLocs.Clear()
|
|
||||||
|
|
||||||
tfEncoder.Reset()
|
tfEncoder.Reset()
|
||||||
locEncoder.Reset()
|
locEncoder.Reset()
|
||||||
|
@ -312,11 +308,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
if fieldsSame {
|
if fieldsSame {
|
||||||
// can optimize by copying freq/norm/loc bytes directly
|
// can optimize by copying freq/norm/loc bytes directly
|
||||||
lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
|
lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
|
||||||
term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
|
term, postItr, newDocNums[itrI], newRoaring,
|
||||||
tfEncoder, locEncoder, docTermMap)
|
tfEncoder, locEncoder, docTermMap)
|
||||||
} else {
|
} else {
|
||||||
lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
|
lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
|
||||||
fieldsMap, term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
|
fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
|
||||||
tfEncoder, locEncoder, docTermMap, bufLoc)
|
tfEncoder, locEncoder, docTermMap, bufLoc)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -407,7 +403,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
|
func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
|
||||||
newDocNums []uint64, newRoaring *roaring.Bitmap, newRoaringLocs *roaring.Bitmap,
|
newDocNums []uint64, newRoaring *roaring.Bitmap,
|
||||||
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte,
|
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte,
|
||||||
bufLoc []uint64) (
|
bufLoc []uint64) (
|
||||||
lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
|
lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
|
||||||
|
@ -423,15 +419,15 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
|
||||||
nextFreq := next.Frequency()
|
nextFreq := next.Frequency()
|
||||||
nextNorm := uint64(math.Float32bits(float32(next.Norm())))
|
nextNorm := uint64(math.Float32bits(float32(next.Norm())))
|
||||||
|
|
||||||
err = tfEncoder.Add(hitNewDocNum, nextFreq, nextNorm)
|
locs := next.Locations()
|
||||||
|
|
||||||
|
err = tfEncoder.Add(hitNewDocNum,
|
||||||
|
encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, 0, nil, err
|
return 0, 0, 0, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
locs := next.Locations()
|
|
||||||
if len(locs) > 0 {
|
if len(locs) > 0 {
|
||||||
newRoaringLocs.Add(uint32(hitNewDocNum))
|
|
||||||
|
|
||||||
for _, loc := range locs {
|
for _, loc := range locs {
|
||||||
if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
|
if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
|
||||||
bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
|
bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
|
||||||
|
@ -464,7 +460,7 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
|
||||||
}
|
}
|
||||||
|
|
||||||
func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
|
func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
|
||||||
newDocNums []uint64, newRoaring *roaring.Bitmap, newRoaringLocs *roaring.Bitmap,
|
newDocNums []uint64, newRoaring *roaring.Bitmap,
|
||||||
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (
|
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (
|
||||||
lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
|
lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
|
||||||
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
|
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
|
||||||
|
@ -482,7 +478,6 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(nextLocBytes) > 0 {
|
if len(nextLocBytes) > 0 {
|
||||||
newRoaringLocs.Add(uint32(hitNewDocNum))
|
|
||||||
err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
|
err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, 0, err
|
return 0, 0, 0, err
|
||||||
|
@ -503,8 +498,7 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
|
||||||
return lastDocNum, lastFreq, lastNorm, err
|
return lastDocNum, lastFreq, lastNorm, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func writePostings(postings, postingLocs *roaring.Bitmap,
|
func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
|
||||||
tfEncoder, locEncoder *chunkedIntCoder,
|
|
||||||
use1HitEncoding func(uint64) (bool, uint64, uint64),
|
use1HitEncoding func(uint64) (bool, uint64, uint64),
|
||||||
w *CountHashWriter, bufMaxVarintLen64 []byte) (
|
w *CountHashWriter, bufMaxVarintLen64 []byte) (
|
||||||
offset uint64, err error) {
|
offset uint64, err error) {
|
||||||
|
@ -532,12 +526,6 @@ func writePostings(postings, postingLocs *roaring.Bitmap,
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
postingLocsOffset := uint64(w.Count())
|
|
||||||
_, err = writeRoaringWithLen(postingLocs, w, bufMaxVarintLen64)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
postingsOffset := uint64(w.Count())
|
postingsOffset := uint64(w.Count())
|
||||||
|
|
||||||
n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
|
n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
|
||||||
|
@ -552,12 +540,6 @@ func writePostings(postings, postingLocs *roaring.Bitmap,
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
n = binary.PutUvarint(bufMaxVarintLen64, postingLocsOffset)
|
|
||||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
|
_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
|
|
|
@ -103,9 +103,6 @@ type interim struct {
|
||||||
// postings id -> bitmap of docNums
|
// postings id -> bitmap of docNums
|
||||||
Postings []*roaring.Bitmap
|
Postings []*roaring.Bitmap
|
||||||
|
|
||||||
// postings id -> bitmap of docNums that have locations
|
|
||||||
PostingsLocs []*roaring.Bitmap
|
|
||||||
|
|
||||||
// postings id -> freq/norm's, one for each docNum in postings
|
// postings id -> freq/norm's, one for each docNum in postings
|
||||||
FreqNorms [][]interimFreqNorm
|
FreqNorms [][]interimFreqNorm
|
||||||
freqNormsBacking []interimFreqNorm
|
freqNormsBacking []interimFreqNorm
|
||||||
|
@ -151,10 +148,6 @@ func (s *interim) reset() (err error) {
|
||||||
idn.Clear()
|
idn.Clear()
|
||||||
}
|
}
|
||||||
s.Postings = s.Postings[:0]
|
s.Postings = s.Postings[:0]
|
||||||
for _, idn := range s.PostingsLocs {
|
|
||||||
idn.Clear()
|
|
||||||
}
|
|
||||||
s.PostingsLocs = s.PostingsLocs[:0]
|
|
||||||
s.FreqNorms = s.FreqNorms[:0]
|
s.FreqNorms = s.FreqNorms[:0]
|
||||||
for i := range s.freqNormsBacking {
|
for i := range s.freqNormsBacking {
|
||||||
s.freqNormsBacking[i] = interimFreqNorm{}
|
s.freqNormsBacking[i] = interimFreqNorm{}
|
||||||
|
@ -196,8 +189,9 @@ type interimStoredField struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type interimFreqNorm struct {
|
type interimFreqNorm struct {
|
||||||
freq uint64
|
freq uint64
|
||||||
norm float32
|
norm float32
|
||||||
|
hasLocs bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type interimLoc struct {
|
type interimLoc struct {
|
||||||
|
@ -356,19 +350,6 @@ func (s *interim) prepareDicts() {
|
||||||
s.Postings = postings
|
s.Postings = postings
|
||||||
}
|
}
|
||||||
|
|
||||||
if cap(s.PostingsLocs) >= numPostingsLists {
|
|
||||||
s.PostingsLocs = s.PostingsLocs[:numPostingsLists]
|
|
||||||
} else {
|
|
||||||
postingsLocs := make([]*roaring.Bitmap, numPostingsLists)
|
|
||||||
copy(postingsLocs, s.PostingsLocs[:cap(s.PostingsLocs)])
|
|
||||||
for i := 0; i < numPostingsLists; i++ {
|
|
||||||
if postingsLocs[i] == nil {
|
|
||||||
postingsLocs[i] = roaring.New()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s.PostingsLocs = postingsLocs
|
|
||||||
}
|
|
||||||
|
|
||||||
if cap(s.FreqNorms) >= numPostingsLists {
|
if cap(s.FreqNorms) >= numPostingsLists {
|
||||||
s.FreqNorms = s.FreqNorms[:numPostingsLists]
|
s.FreqNorms = s.FreqNorms[:numPostingsLists]
|
||||||
} else {
|
} else {
|
||||||
|
@ -464,14 +445,12 @@ func (s *interim) processDocument(docNum uint64,
|
||||||
|
|
||||||
s.FreqNorms[pid] = append(s.FreqNorms[pid],
|
s.FreqNorms[pid] = append(s.FreqNorms[pid],
|
||||||
interimFreqNorm{
|
interimFreqNorm{
|
||||||
freq: uint64(tf.Frequency()),
|
freq: uint64(tf.Frequency()),
|
||||||
norm: norm,
|
norm: norm,
|
||||||
|
hasLocs: len(tf.Locations) > 0,
|
||||||
})
|
})
|
||||||
|
|
||||||
if len(tf.Locations) > 0 {
|
if len(tf.Locations) > 0 {
|
||||||
locBS := s.PostingsLocs[pid]
|
|
||||||
locBS.Add(uint32(docNum))
|
|
||||||
|
|
||||||
locs := s.Locs[pid]
|
locs := s.Locs[pid]
|
||||||
|
|
||||||
for _, loc := range tf.Locations {
|
for _, loc := range tf.Locations {
|
||||||
|
@ -625,7 +604,6 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
|
||||||
pid := dict[term] - 1
|
pid := dict[term] - 1
|
||||||
|
|
||||||
postingsBS := s.Postings[pid]
|
postingsBS := s.Postings[pid]
|
||||||
postingsLocsBS := s.PostingsLocs[pid]
|
|
||||||
|
|
||||||
freqNorms := s.FreqNorms[pid]
|
freqNorms := s.FreqNorms[pid]
|
||||||
freqNormOffset := 0
|
freqNormOffset := 0
|
||||||
|
@ -639,7 +617,8 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
|
||||||
|
|
||||||
freqNorm := freqNorms[freqNormOffset]
|
freqNorm := freqNorms[freqNormOffset]
|
||||||
|
|
||||||
err = tfEncoder.Add(docNum, freqNorm.freq,
|
err = tfEncoder.Add(docNum,
|
||||||
|
encodeFreqHasLocs(freqNorm.freq, freqNorm.hasLocs),
|
||||||
uint64(math.Float32bits(freqNorm.norm)))
|
uint64(math.Float32bits(freqNorm.norm)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, nil, err
|
return 0, nil, err
|
||||||
|
@ -675,9 +654,8 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
|
||||||
tfEncoder.Close()
|
tfEncoder.Close()
|
||||||
locEncoder.Close()
|
locEncoder.Close()
|
||||||
|
|
||||||
postingsOffset, err := writePostings(
|
postingsOffset, err :=
|
||||||
postingsBS, postingsLocsBS, tfEncoder, locEncoder,
|
writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
|
||||||
nil, s.w, buf)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, nil, err
|
return 0, nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,7 +100,6 @@ type PostingsList struct {
|
||||||
postingsOffset uint64
|
postingsOffset uint64
|
||||||
freqOffset uint64
|
freqOffset uint64
|
||||||
locOffset uint64
|
locOffset uint64
|
||||||
locBitmap *roaring.Bitmap
|
|
||||||
postings *roaring.Bitmap
|
postings *roaring.Bitmap
|
||||||
except *roaring.Bitmap
|
except *roaring.Bitmap
|
||||||
|
|
||||||
|
@ -222,8 +221,6 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
||||||
}
|
}
|
||||||
rv.locChunkStart = p.locOffset + n
|
rv.locChunkStart = p.locOffset + n
|
||||||
|
|
||||||
rv.locBitmap = p.locBitmap
|
|
||||||
|
|
||||||
rv.all = p.postings.Iterator()
|
rv.all = p.postings.Iterator()
|
||||||
if p.except != nil {
|
if p.except != nil {
|
||||||
allExcept := roaring.AndNot(p.postings, p.except)
|
allExcept := roaring.AndNot(p.postings, p.except)
|
||||||
|
@ -271,23 +268,6 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
|
||||||
rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
||||||
n += uint64(read)
|
n += uint64(read)
|
||||||
|
|
||||||
var locBitmapOffset uint64
|
|
||||||
locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
|
||||||
n += uint64(read)
|
|
||||||
|
|
||||||
var locBitmapLen uint64
|
|
||||||
locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
|
|
||||||
|
|
||||||
locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
|
|
||||||
|
|
||||||
if rv.locBitmap == nil {
|
|
||||||
rv.locBitmap = roaring.NewBitmap()
|
|
||||||
}
|
|
||||||
_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var postingsLen uint64
|
var postingsLen uint64
|
||||||
postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
||||||
n += uint64(read)
|
n += uint64(read)
|
||||||
|
@ -297,7 +277,7 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
|
||||||
if rv.postings == nil {
|
if rv.postings == nil {
|
||||||
rv.postings = roaring.NewBitmap()
|
rv.postings = roaring.NewBitmap()
|
||||||
}
|
}
|
||||||
_, err = rv.postings.FromBuffer(roaringBytes)
|
_, err := rv.postings.FromBuffer(roaringBytes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error loading roaring bitmap: %v", err)
|
return fmt.Errorf("error loading roaring bitmap: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -334,8 +314,6 @@ type PostingsIterator struct {
|
||||||
locChunkOffsets []uint64
|
locChunkOffsets []uint64
|
||||||
locChunkStart uint64
|
locChunkStart uint64
|
||||||
|
|
||||||
locBitmap *roaring.Bitmap
|
|
||||||
|
|
||||||
next Posting // reused across Next() calls
|
next Posting // reused across Next() calls
|
||||||
nextLocs []Location // reused across Next() calls
|
nextLocs []Location // reused across Next() calls
|
||||||
|
|
||||||
|
@ -353,10 +331,6 @@ func (i *PostingsIterator) Size() int {
|
||||||
len(i.locChunkOffsets)*size.SizeOfUint64 +
|
len(i.locChunkOffsets)*size.SizeOfUint64 +
|
||||||
i.next.Size()
|
i.next.Size()
|
||||||
|
|
||||||
if i.locBitmap != nil {
|
|
||||||
sizeInBytes += int(i.locBitmap.GetSizeInBytes())
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, entry := range i.nextLocs {
|
for _, entry := range i.nextLocs {
|
||||||
sizeInBytes += entry.Size()
|
sizeInBytes += entry.Size()
|
||||||
}
|
}
|
||||||
|
@ -397,20 +371,37 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
|
func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
|
||||||
if i.normBits1Hit != 0 {
|
if i.normBits1Hit != 0 {
|
||||||
return 1, i.normBits1Hit, nil
|
return 1, i.normBits1Hit, false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
freq, err := i.freqNormDecoder.GetU64()
|
freqHasLocs, err := i.freqNormDecoder.GetU64()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, fmt.Errorf("error reading frequency: %v", err)
|
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
|
||||||
}
|
}
|
||||||
|
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
|
||||||
|
|
||||||
normBits, err := i.freqNormDecoder.GetU64()
|
normBits, err := i.freqNormDecoder.GetU64()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, fmt.Errorf("error reading norm: %v", err)
|
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
|
||||||
}
|
}
|
||||||
return freq, normBits, err
|
|
||||||
|
return freq, normBits, hasLocs, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
|
||||||
|
rv := freq << 1
|
||||||
|
if hasLocs {
|
||||||
|
rv = rv | 0x01 // 0'th LSB encodes whether there are locations
|
||||||
|
}
|
||||||
|
return rv
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
|
||||||
|
freq := freqHasLocs >> 1
|
||||||
|
hasLocs := freqHasLocs&0x01 != 0
|
||||||
|
return freq, hasLocs
|
||||||
}
|
}
|
||||||
|
|
||||||
// readLocation processes all the integers on the stream representing a single
|
// readLocation processes all the integers on the stream representing a single
|
||||||
|
@ -484,13 +475,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||||
rv.docNum = docNum
|
rv.docNum = docNum
|
||||||
|
|
||||||
var normBits uint64
|
var normBits uint64
|
||||||
rv.freq, normBits, err = i.readFreqNorm()
|
var hasLocs bool
|
||||||
|
|
||||||
|
rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
rv.norm = math.Float32frombits(uint32(normBits))
|
rv.norm = math.Float32frombits(uint32(normBits))
|
||||||
|
|
||||||
if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
|
if hasLocs {
|
||||||
// read off 'freq' locations, into reused slices
|
// read off 'freq' locations, into reused slices
|
||||||
if cap(i.nextLocs) >= int(rv.freq) {
|
if cap(i.nextLocs) >= int(rv.freq) {
|
||||||
i.nextLocs = i.nextLocs[0:rv.freq]
|
i.nextLocs = i.nextLocs[0:rv.freq]
|
||||||
|
@ -514,6 +508,8 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||||
return rv, nil
|
return rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
|
||||||
|
|
||||||
// nextBytes returns the docNum and the encoded freq & loc bytes for
|
// nextBytes returns the docNum and the encoded freq & loc bytes for
|
||||||
// the next posting
|
// the next posting
|
||||||
func (i *PostingsIterator) nextBytes() (
|
func (i *PostingsIterator) nextBytes() (
|
||||||
|
@ -528,14 +524,16 @@ func (i *PostingsIterator) nextBytes() (
|
||||||
if i.buf == nil {
|
if i.buf == nil {
|
||||||
i.buf = make([]byte, binary.MaxVarintLen64*2)
|
i.buf = make([]byte, binary.MaxVarintLen64*2)
|
||||||
}
|
}
|
||||||
n := binary.PutUvarint(i.buf, uint64(1))
|
n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
|
||||||
n += binary.PutUvarint(i.buf, i.normBits1Hit)
|
n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
|
||||||
return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
|
return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||||
|
|
||||||
freq, normBits, err = i.readFreqNorm()
|
var hasLocs bool
|
||||||
|
|
||||||
|
freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, 0, nil, nil, err
|
return 0, 0, 0, nil, nil, err
|
||||||
}
|
}
|
||||||
|
@ -543,7 +541,7 @@ func (i *PostingsIterator) nextBytes() (
|
||||||
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||||
bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
|
bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
|
||||||
|
|
||||||
if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
|
if hasLocs {
|
||||||
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||||
|
|
||||||
for j := uint64(0); j < freq; j++ {
|
for j := uint64(0); j < freq; j++ {
|
||||||
|
@ -596,11 +594,12 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// read off freq/offsets even though we don't care about them
|
// read off freq/offsets even though we don't care about them
|
||||||
freq, _, err := i.readFreqNorm()
|
freq, _, hasLocs, err := i.readFreqNormHasLocs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, false, err
|
return 0, false, err
|
||||||
}
|
}
|
||||||
if i.locBitmap.Contains(allN) {
|
|
||||||
|
if hasLocs {
|
||||||
for j := 0; j < int(freq); j++ {
|
for j := 0; j < int(freq); j++ {
|
||||||
err := i.readLocation(nil)
|
err := i.readLocation(nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
Loading…
Reference in New Issue