bleve/index/scorch/segment/mem/posting.go

package mem

import (
	"github.com/RoaringBitmap/roaring"
	"github.com/blevesearch/bleve/index/scorch/segment"
)

// PostingsList is an in-memory represenation of a postings list
type PostingsList struct {
	dictionary *Dictionary
	term       string
	postingsID uint64
	except     *roaring.Bitmap
}

// Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 {
	var rv uint64
	if p.postingsID > 0 {
		rv = p.dictionary.segment.postings[p.postingsID-1].GetCardinality()
		if p.except != nil {
			except := p.except.GetCardinality()
			if except > rv {
				// avoid underflow
				except = rv
			}
			rv -= except
		}
	}
	return rv
}

// Iterator returns an iterator for this postings list
func (p *PostingsList) Iterator() segment.PostingsIterator {
	rv := &PostingsIterator{
		postings: p,
	}
	if p.postingsID > 0 {
		allbits := p.dictionary.segment.postings[p.postingsID-1]
		rv.all = allbits.Iterator()
		if p.except != nil {
			allExcept := allbits.Clone()
			allExcept.AndNot(p.except)
			rv.actual = allExcept.Iterator()
		} else {
			rv.actual = allbits.Iterator()
		}
	}

	return rv
}

// PostingsIterator provides a way to iterate through the postings list
type PostingsIterator struct {
	postings  *PostingsList
	all       roaring.IntIterable
	offset    int
	locoffset int
	actual    roaring.IntIterable
}

// Next returns the next posting on the postings list, or nil at the end
func (i *PostingsIterator) Next() segment.Posting {
	if i.actual == nil || !i.actual.HasNext() {
		return nil
	}
	n := i.actual.Next()
	allN := i.all.Next()

	// n is the next actual hit (excluding some postings)
	// allN is the next hit in the full postings
	// if they don't match, adjust offsets to factor in item we're skipping over
	// incr the all iterator, and check again
	for allN != n {
		i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
		i.offset++
		allN = i.all.Next()
	}
	rv := &Posting{
		iterator:  i,
		docNum:    uint64(n),
		offset:    i.offset,
		locoffset: i.locoffset,
	}

	i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
	i.offset++
	return rv
}

// Posting is a single entry in a postings list
type Posting struct {
	iterator  *PostingsIterator
	docNum    uint64
	offset    int
	locoffset int
}

// Number returns the document number of this posting in this segment
func (p *Posting) Number() uint64 {
	return p.docNum
}

// Frequency returns the frequence of occurance of this term in this doc/field
func (p *Posting) Frequency() uint64 {
	return p.iterator.postings.dictionary.segment.freqs[p.iterator.postings.postingsID-1][p.offset]
}

// Norm returns the normalization factor for this posting
func (p *Posting) Norm() float64 {
	return float64(p.iterator.postings.dictionary.segment.norms[p.iterator.postings.postingsID-1][p.offset])
}

// Locations returns the location information for each occurance
func (p *Posting) Locations() []segment.Location {
	if !p.iterator.postings.dictionary.segment.fieldsLoc[p.iterator.postings.dictionary.fieldID] {
		return nil
	}
	freq := int(p.Frequency())
	rv := make([]segment.Location, freq)
	for i := 0; i < freq; i++ {
		rv[i] = &Location{
			p:      p,
			offset: p.locoffset + i,
		}
	}
	return rv
}

// Location represents the location of a single occurance
type Location struct {
	p      *Posting
	offset int
}

// Field returns the name of the field (useful in composite fields to know
// which original field the value came from)
func (l *Location) Field() string {
	return l.p.iterator.postings.dictionary.segment.fieldsInv[l.p.iterator.postings.dictionary.segment.locfields[l.p.iterator.postings.postingsID-1][l.offset]]
}

// Start returns the start byte offset of this occurance
func (l *Location) Start() uint64 {
	return l.p.iterator.postings.dictionary.segment.locstarts[l.p.iterator.postings.postingsID-1][l.offset]
}

// End returns the end byte offset of this occurance
func (l *Location) End() uint64 {
	return l.p.iterator.postings.dictionary.segment.locends[l.p.iterator.postings.postingsID-1][l.offset]
}

// Pos returns the 1-based phrase position of this occurance
func (l *Location) Pos() uint64 {
	return l.p.iterator.postings.dictionary.segment.locpos[l.p.iterator.postings.postingsID-1][l.offset]
}

// ArrayPositions returns the array position vector associated with this occurance
func (l *Location) ArrayPositions() []uint64 {
	return l.p.iterator.postings.dictionary.segment.locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
}