0
0
Fork 0
bleve/index/scorch/segment/mem/segment.go

133 lines
3.6 KiB
Go

package mem
import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
)
// KNOWN ISSUES
// - LIMITATION - we decided whether or not to store term vectors for a field
// at the segment level, based on the first definition of a
// field we see. in normal bleve usage this is fine, all
// instances of a field definition will be the same. however,
// advanced users may violate this and provide unique field
// definitions with each document. this segment does not
// support this usage.
// TODO
// - need better testing of multiple docs, iterating freqs, locations and
// and verifying the correct results are returned
// - need tests for term dictionary iteration
// Segment is an in memory implementation of scorch.Segment
type Segment struct {
// fields name -> id+1
fields map[string]uint16
// fields id -> name
fieldsInv []string
// field id -> has location info
fieldsLoc []bool
// term dictionary
// field id -> term -> posting id + 1
dicts []map[string]uint64
// term dictionary keys
// field id -> []dictionary keys
dictKeys [][]string
// postings list
// postings list id -> postings bitmap
postings []*roaring.Bitmap
// term frequencies
// postings list id -> freqs (one for each hit in bitmap)
freqs [][]uint64
// field norms
// postings list id -> norms (one for each hit in bitmap)
norms [][]float32
// field/start/end/pos/locarraypos
// postings list id -> start/end/pos/locarraypos (one for each freq)
locfields [][]uint16
locstarts [][]uint64
locends [][]uint64
locpos [][]uint64
locarraypos [][][]uint64
// stored field values
// docNum -> field id -> slice of values (each value []byte)
stored []map[uint16][][]byte
// stored field types
// docNum -> field id -> slice of types (each type byte)
storedTypes []map[uint16][]byte
// stored field array positions
// docNum -> field id -> slice of array positions (each is []uint64)
storedPos []map[uint16][][]uint64
}
// New builds a new empty Segment
func New() *Segment {
return &Segment{
fields: map[string]uint16{},
}
}
// Fields returns the field names used in this segment
func (s *Segment) Fields() []string {
return s.fieldsInv
}
// VisitDocument invokes the DocFieldValueVistor for each stored field
// for the specified doc number
func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
// ensure document number exists
if int(num) > len(s.stored)-1 {
return nil
}
docFields := s.stored[int(num)]
for field, values := range docFields {
for i, value := range values {
keepGoing := visitor(s.fieldsInv[field], s.storedTypes[int(num)][field][i], value, s.storedPos[int(num)][field][i])
if !keepGoing {
return nil
}
}
}
return nil
}
// Dictionary returns the term dictionary for the specified field
func (s *Segment) Dictionary(field string) segment.TermDictionary {
return &Dictionary{
segment: s,
field: field,
fieldID: uint16(s.getOrDefineField(field, false)),
}
}
// Count returns the number of documents in this segment
// (this has no notion of deleted docs)
func (s *Segment) Count() uint64 {
return uint64(len(s.stored))
}
// DocNumbers returns a bitset corresponding to the doc numbers of all the
// provided _id strings
func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
idDictionary := s.dicts[s.getOrDefineField("_id", false)]
rv := roaring.New()
for _, id := range ids {
postingID := idDictionary[id]
if postingID > 0 {
rv.Or(s.postings[postingID-1])
}
}
return rv
}