scorch zap postingsIter skips freq/norm/locs parsing if allowed
In this optimization, the zap PostingsIterator skips the parsing of freq/norm/locs chunks based on the includeFreq|Norm|Locs flags. In bleve-query microbenchmark on dev macbookpro, with 50K en-wiki docs, on a medium frequency term search that does not ask for term vectors, throughput was ~750 q/sec before the change and went to ~1400 q/sec after the change.
This commit is contained in:
parent
192621f402
commit
1cab701f85
|
@ -131,11 +131,11 @@ func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
|
|||
}
|
||||
|
||||
// Iterator returns an iterator for this postings list
|
||||
func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocations bool) segment.PostingsIterator {
|
||||
return p.iterator(includeFreq, includeNorm, includeLocations, nil)
|
||||
func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool) segment.PostingsIterator {
|
||||
return p.iterator(includeFreq, includeNorm, includeLocs, nil)
|
||||
}
|
||||
|
||||
func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocations bool,
|
||||
func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
|
||||
rv *PostingsIterator) *PostingsIterator {
|
||||
if rv == nil {
|
||||
rv = &PostingsIterator{}
|
||||
|
@ -195,9 +195,12 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocations bool,
|
|||
return rv
|
||||
}
|
||||
|
||||
// prepare the freq chunk details
|
||||
var n uint64
|
||||
var read int
|
||||
|
||||
// prepare the freq chunk details
|
||||
rv.includeFreqNorm = includeFreq || includeNorm
|
||||
if rv.includeFreqNorm {
|
||||
var numFreqChunks uint64
|
||||
numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
@ -211,8 +214,11 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocations bool,
|
|||
n += uint64(read)
|
||||
}
|
||||
rv.freqChunkStart = p.freqOffset + n
|
||||
}
|
||||
|
||||
// prepare the loc chunk details
|
||||
rv.includeLocs = includeLocs
|
||||
if rv.includeLocs {
|
||||
n = 0
|
||||
var numLocChunks uint64
|
||||
numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
|
||||
|
@ -227,6 +233,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocations bool,
|
|||
n += uint64(read)
|
||||
}
|
||||
rv.locChunkStart = p.locOffset + n
|
||||
}
|
||||
|
||||
rv.all = p.postings.Iterator()
|
||||
if p.except != nil {
|
||||
|
@ -329,6 +336,9 @@ type PostingsIterator struct {
|
|||
normBits1Hit uint64
|
||||
|
||||
buf []byte
|
||||
|
||||
includeFreqNorm bool
|
||||
includeLocs bool
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) Size() int {
|
||||
|
@ -347,8 +357,10 @@ func (i *PostingsIterator) Size() int {
|
|||
}
|
||||
|
||||
func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||
if chunk >= len(i.freqChunkOffsets) || chunk >= len(i.locChunkOffsets) {
|
||||
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkOffsets), len(i.locChunkOffsets))
|
||||
if i.includeFreqNorm {
|
||||
if chunk >= len(i.freqChunkOffsets) {
|
||||
return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
|
||||
chunk, len(i.freqChunkOffsets))
|
||||
}
|
||||
|
||||
end, start := i.freqChunkStart, i.freqChunkStart
|
||||
|
@ -362,9 +374,16 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
|||
} else {
|
||||
i.freqNormReader.Reset(i.currChunkFreqNorm)
|
||||
}
|
||||
}
|
||||
|
||||
end, start = i.locChunkStart, i.locChunkStart
|
||||
s, e = readChunkBoundary(chunk, i.locChunkOffsets)
|
||||
if i.includeLocs {
|
||||
if chunk >= len(i.locChunkOffsets) {
|
||||
return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
|
||||
chunk, len(i.locChunkOffsets))
|
||||
}
|
||||
|
||||
end, start := i.locChunkStart, i.locChunkStart
|
||||
s, e := readChunkBoundary(chunk, i.locChunkOffsets)
|
||||
start += s
|
||||
end += e
|
||||
i.currChunkLoc = i.postings.sb.mem[start:end]
|
||||
|
@ -374,6 +393,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
|||
} else {
|
||||
i.locReader.Reset(i.currChunkLoc)
|
||||
}
|
||||
}
|
||||
|
||||
i.currChunk = uint32(chunk)
|
||||
return nil
|
||||
|
@ -481,6 +501,10 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
|||
rv := &i.next
|
||||
rv.docNum = docNum
|
||||
|
||||
if !i.includeFreqNorm {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
var normBits uint64
|
||||
var hasLocs bool
|
||||
|
||||
|
@ -491,7 +515,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
|||
|
||||
rv.norm = math.Float32frombits(uint32(normBits))
|
||||
|
||||
if hasLocs {
|
||||
if i.includeLocs && hasLocs {
|
||||
// read off 'freq' locations, into reused slices
|
||||
if cap(i.nextLocs) >= int(rv.freq) {
|
||||
i.nextLocs = i.nextLocs[0:rv.freq]
|
||||
|
@ -591,7 +615,7 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
|
|||
// if they don't match, move 'all' forwards until they do
|
||||
for allN != n {
|
||||
// in the same chunk, so move the freq/norm/loc decoders forward
|
||||
if allNChunk == nChunk {
|
||||
if i.includeFreqNorm && allNChunk == nChunk {
|
||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
|
@ -605,7 +629,7 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
|
|||
return 0, false, err
|
||||
}
|
||||
|
||||
if hasLocs {
|
||||
if i.includeLocs && hasLocs {
|
||||
for j := 0; j < int(freq); j++ {
|
||||
err := i.readLocation(nil)
|
||||
if err != nil {
|
||||
|
@ -619,7 +643,7 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
|
|||
allNChunk = allN / i.postings.sb.chunkFactor
|
||||
}
|
||||
|
||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||
if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
return 0, false, fmt.Errorf("error loading chunk: %v", err)
|
||||
|
|
Loading…
Reference in New Issue