0
0
Fork 0

Merge pull request #855 from blevesearch/tfr_advance

TermFieldReader Advance optimisation
This commit is contained in:
Sreekanth Sivasankaran 2018-03-27 22:49:48 +05:30 committed by GitHub
commit 6c6c1419b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 82 additions and 15 deletions

View File

@ -105,3 +105,7 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
func (e *EmptyPostingsIterator) Size() int {
return 0
}
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil
}

View File

@ -155,6 +155,27 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
return &i.reuse, nil
}
func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
if i.reuse.Number() == docNumber {
return &i.reuse, nil
}
next, err := i.Next()
if err != nil || next == nil {
return next, err
}
nnum := next.Number()
for nnum < docNumber {
next, err = i.Next()
if err != nil || next == nil {
return next, err
}
nnum = next.Number()
}
return next, nil
}
// Posting is a single entry in a postings list
type Posting struct {
iterator *PostingsIterator

View File

@ -75,6 +75,10 @@ type PostingsIterator interface {
Next() (Posting, error)
Size() int
// Advance will return the respective posting of the
// sepcified doc number or its immediate follower.
Advance(docNum uint64) (Posting, error)
}
type Posting interface {

View File

@ -588,6 +588,37 @@ func (i *PostingsIterator) nextBytes() (
return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
}
func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
// check if we are already there
if i.next.Number() == docNumber {
return &i.next, nil
}
nChunk := uint32(docNumber) / i.postings.sb.chunkFactor
if i.currChunk != nChunk {
err := i.loadChunk(int(nChunk))
if err != nil {
return nil, fmt.Errorf("Advance, error loading chunk: %v", err)
}
}
next, err := i.Next()
if err != nil || next == nil {
return nil, err
}
nnum := next.Number()
for nnum < docNumber {
next, err = i.Next()
if err != nil || next == nil {
return next, err
}
nnum = next.Number()
}
return next, nil
}
// nextDocNum returns the next docNum on the postings list, and also
// sets up the currChunk / loc related fields of the iterator.
func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {

View File

@ -115,7 +115,8 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
}
}
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID,
preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
// FIXME do something better
// for now, if we need to seek backwards, then restart from the beginning
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
@ -126,24 +127,30 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
}
*i = *(i2.(*IndexSnapshotTermFieldReader))
}
// FIXME do something better
next, err := i.Next(preAlloced)
num, err := docInternalToNumber(ID)
if err != nil {
return nil, err
}
if next == nil {
return nil, nil
}
for bytes.Compare(next.ID, ID) < 0 {
next, err = i.Next(preAlloced)
if err != nil {
return nil, err
}
if next == nil {
break
}
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
if segIndex > len(i.snapshot.segment) {
return nil, nil
}
return next, nil
// skip directly to the target segment
next, err := i.iterators[segIndex].Advance(ldocNum)
if err != nil || next == nil {
return nil, err
}
if preAlloced == nil {
preAlloced = &index.TermFieldDoc{}
}
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
i.snapshot.offsets[segIndex])
i.postingToTermFieldDoc(next, preAlloced)
i.currID = preAlloced.ID
i.currPosting = next
return preAlloced, nil
}
func (i *IndexSnapshotTermFieldReader) Count() uint64 {