Merge branch 'master' into compaction_bytes_stats
This commit is contained in:
commit
e0369a3553
|
@ -14,7 +14,19 @@
|
||||||
|
|
||||||
package document
|
package document
|
||||||
|
|
||||||
import "fmt"
|
import (
|
||||||
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDocument int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var d Document
|
||||||
|
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type Document struct {
|
type Document struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
|
@ -30,6 +42,13 @@ func NewDocument(id string) *Document {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (d *Document) Size() int {
|
||||||
|
return reflectStaticSizeDocument + size.SizeOfPtr +
|
||||||
|
len(d.ID) +
|
||||||
|
len(d.Fields)*size.SizeOfPtr +
|
||||||
|
len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField)
|
||||||
|
}
|
||||||
|
|
||||||
func (d *Document) AddField(f Field) *Document {
|
func (d *Document) AddField(f Field) *Document {
|
||||||
switch f := f.(type) {
|
switch f := f.(type) {
|
||||||
case *CompositeField:
|
case *CompositeField:
|
||||||
|
|
|
@ -15,9 +15,18 @@
|
||||||
package document
|
package document
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
"github.com/blevesearch/bleve/analysis"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeCompositeField int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var cf CompositeField
|
||||||
|
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
|
||||||
|
}
|
||||||
|
|
||||||
const DefaultCompositeIndexingOptions = IndexField
|
const DefaultCompositeIndexingOptions = IndexField
|
||||||
|
|
||||||
type CompositeField struct {
|
type CompositeField struct {
|
||||||
|
|
|
@ -18,11 +18,23 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/document"
|
"github.com/blevesearch/bleve/document"
|
||||||
"github.com/blevesearch/bleve/index/store"
|
"github.com/blevesearch/bleve/index/store"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeTermFieldDoc int
|
||||||
|
var reflectStaticSizeTermFieldVector int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var tfd TermFieldDoc
|
||||||
|
reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
|
||||||
|
var tfv TermFieldVector
|
||||||
|
reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
|
||||||
|
}
|
||||||
|
|
||||||
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
|
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
|
||||||
|
|
||||||
type Index interface {
|
type Index interface {
|
||||||
|
@ -82,6 +94,8 @@ type IndexReader interface {
|
||||||
DumpFields() chan interface{}
|
DumpFields() chan interface{}
|
||||||
|
|
||||||
Close() error
|
Close() error
|
||||||
|
|
||||||
|
Size() int
|
||||||
}
|
}
|
||||||
|
|
||||||
// FieldTerms contains the terms used by a document, keyed by field
|
// FieldTerms contains the terms used by a document, keyed by field
|
||||||
|
@ -115,6 +129,11 @@ type TermFieldVector struct {
|
||||||
End uint64
|
End uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tfv *TermFieldVector) Size() int {
|
||||||
|
return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
|
||||||
|
len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
|
||||||
|
}
|
||||||
|
|
||||||
// IndexInternalID is an opaque document identifier interal to the index impl
|
// IndexInternalID is an opaque document identifier interal to the index impl
|
||||||
type IndexInternalID []byte
|
type IndexInternalID []byte
|
||||||
|
|
||||||
|
@ -134,6 +153,17 @@ type TermFieldDoc struct {
|
||||||
Vectors []*TermFieldVector
|
Vectors []*TermFieldVector
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tfd *TermFieldDoc) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
|
||||||
|
len(tfd.Term) + len(tfd.ID)
|
||||||
|
|
||||||
|
for _, entry := range tfd.Vectors {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
// Reset allows an already allocated TermFieldDoc to be reused
|
// Reset allows an already allocated TermFieldDoc to be reused
|
||||||
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
|
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
|
||||||
// remember the []byte used for the ID
|
// remember the []byte used for the ID
|
||||||
|
@ -161,6 +191,8 @@ type TermFieldReader interface {
|
||||||
// Count returns the number of documents contains the term in this field.
|
// Count returns the number of documents contains the term in this field.
|
||||||
Count() uint64
|
Count() uint64
|
||||||
Close() error
|
Close() error
|
||||||
|
|
||||||
|
Size() int
|
||||||
}
|
}
|
||||||
|
|
||||||
type DictEntry struct {
|
type DictEntry struct {
|
||||||
|
@ -185,6 +217,9 @@ type DocIDReader interface {
|
||||||
// will start there instead. If ID is greater than or equal to the end of
|
// will start there instead. If ID is greater than or equal to the end of
|
||||||
// the range, Next() call will return io.EOF.
|
// the range, Next() call will return io.EOF.
|
||||||
Advance(ID IndexInternalID) (IndexInternalID, error)
|
Advance(ID IndexInternalID) (IndexInternalID, error)
|
||||||
|
|
||||||
|
Size() int
|
||||||
|
|
||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -179,11 +179,21 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
||||||
filename := zapFileName(newSegmentID)
|
filename := zapFileName(newSegmentID)
|
||||||
s.markIneligibleForRemoval(filename)
|
s.markIneligibleForRemoval(filename)
|
||||||
path := s.path + string(os.PathSeparator) + filename
|
path := s.path + string(os.PathSeparator) + filename
|
||||||
|
|
||||||
|
fileMergeZapStartTime := time.Now()
|
||||||
|
|
||||||
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
||||||
newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
|
newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
|
||||||
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
||||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
|
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
|
||||||
if err != nil {
|
|
||||||
|
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
|
||||||
|
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
|
||||||
|
if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
|
||||||
|
atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
s.unmarkIneligibleForRemoval(filename)
|
s.unmarkIneligibleForRemoval(filename)
|
||||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||||
return fmt.Errorf("merging failed: %v", err)
|
return fmt.Errorf("merging failed: %v", err)
|
||||||
|
@ -259,11 +269,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
||||||
|
|
||||||
cr := zap.NewCountHashWriter(&br)
|
cr := zap.NewCountHashWriter(&br)
|
||||||
|
|
||||||
|
memMergeZapStartTime := time.Now()
|
||||||
|
|
||||||
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
|
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
|
||||||
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
|
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
|
||||||
docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
|
docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
|
||||||
zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
|
zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
|
||||||
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
|
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
|
||||||
|
|
||||||
|
memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
|
||||||
|
atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
|
||||||
|
if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
|
||||||
|
atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||||
return 0, nil, 0, err
|
return 0, nil, 0, err
|
||||||
|
|
|
@ -365,7 +365,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||||
introTime := uint64(time.Since(introStartTime))
|
introTime := uint64(time.Since(introStartTime))
|
||||||
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
|
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
|
||||||
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
|
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
|
||||||
atomic.AddUint64(&s.stats.MaxBatchIntroTime, introTime)
|
atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
|
@ -473,20 +473,20 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scorch) MemoryUsed() uint64 {
|
func (s *Scorch) MemoryUsed() uint64 {
|
||||||
var memUsed uint64
|
var memUsed int
|
||||||
s.rootLock.RLock()
|
s.rootLock.RLock()
|
||||||
if s.root != nil {
|
if s.root != nil {
|
||||||
for _, segmentSnapshot := range s.root.segment {
|
for _, segmentSnapshot := range s.root.segment {
|
||||||
memUsed += 8 /* size of id -> uint64 */ +
|
memUsed += 8 /* size of id -> uint64 */ +
|
||||||
segmentSnapshot.segment.SizeInBytes()
|
segmentSnapshot.segment.Size()
|
||||||
if segmentSnapshot.deleted != nil {
|
if segmentSnapshot.deleted != nil {
|
||||||
memUsed += segmentSnapshot.deleted.GetSizeInBytes()
|
memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
|
||||||
}
|
}
|
||||||
memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
|
memUsed += segmentSnapshot.cachedDocs.size()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.rootLock.RUnlock()
|
s.rootLock.RUnlock()
|
||||||
return memUsed
|
return uint64(memUsed)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scorch) markIneligibleForRemoval(filename string) {
|
func (s *Scorch) markIneligibleForRemoval(filename string) {
|
||||||
|
|
|
@ -46,6 +46,10 @@ func (e *EmptySegment) Close() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *EmptySegment) Size() uint64 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
func (e *EmptySegment) AddRef() {
|
func (e *EmptySegment) AddRef() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,6 +88,10 @@ func (e *EmptyPostingsList) Iterator() PostingsIterator {
|
||||||
return &EmptyPostingsIterator{}
|
return &EmptyPostingsIterator{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *EmptyPostingsList) Size() int {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
func (e *EmptyPostingsList) Count() uint64 {
|
func (e *EmptyPostingsList) Count() uint64 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
@ -93,3 +101,7 @@ type EmptyPostingsIterator struct{}
|
||||||
func (e *EmptyPostingsIterator) Next() (Posting, error) {
|
func (e *EmptyPostingsIterator) Next() (Posting, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *EmptyPostingsIterator) Size() int {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
|
@ -45,7 +45,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute memory usage of segment
|
// compute memory usage of segment
|
||||||
s.updateSizeInBytes()
|
s.updateSize()
|
||||||
|
|
||||||
// professional debugging
|
// professional debugging
|
||||||
//
|
//
|
||||||
|
@ -222,12 +222,6 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
|
|
||||||
s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
|
|
||||||
s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
|
|
||||||
s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
|
|
||||||
}
|
|
||||||
|
|
||||||
// walk each composite field
|
// walk each composite field
|
||||||
for _, field := range result.Document.CompositeFields {
|
for _, field := range result.Document.CompositeFields {
|
||||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||||
|
@ -235,6 +229,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
||||||
processField(fieldID, field.Name(), l, tf)
|
processField(fieldID, field.Name(), l, tf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
docStored := s.Stored[docNum]
|
||||||
|
docStoredTypes := s.StoredTypes[docNum]
|
||||||
|
docStoredPos := s.StoredPos[docNum]
|
||||||
|
|
||||||
// walk each field
|
// walk each field
|
||||||
for i, field := range result.Document.Fields {
|
for i, field := range result.Document.Fields {
|
||||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||||
|
@ -242,7 +240,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
||||||
tf := result.Analyzed[i]
|
tf := result.Analyzed[i]
|
||||||
processField(fieldID, field.Name(), l, tf)
|
processField(fieldID, field.Name(), l, tf)
|
||||||
if field.Options().IsStored() {
|
if field.Options().IsStored() {
|
||||||
storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
|
docStored[fieldID] = append(docStored[fieldID], field.Value())
|
||||||
|
docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field))
|
||||||
|
docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions())
|
||||||
}
|
}
|
||||||
|
|
||||||
if field.Options().IncludeDocValues() {
|
if field.Options().IncludeDocValues() {
|
||||||
|
@ -252,12 +252,14 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
||||||
|
|
||||||
// now that its been rolled up into docMap, walk that
|
// now that its been rolled up into docMap, walk that
|
||||||
for fieldID, tokenFrequencies := range docMap {
|
for fieldID, tokenFrequencies := range docMap {
|
||||||
|
dict := s.Dicts[fieldID]
|
||||||
|
norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
|
||||||
for term, tokenFreq := range tokenFrequencies {
|
for term, tokenFreq := range tokenFrequencies {
|
||||||
pid := s.Dicts[fieldID][term] - 1
|
pid := dict[term] - 1
|
||||||
bs := s.Postings[pid]
|
bs := s.Postings[pid]
|
||||||
bs.AddInt(int(docNum))
|
bs.AddInt(int(docNum))
|
||||||
s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
|
s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
|
||||||
s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
|
s.Norms[pid] = append(s.Norms[pid], norm)
|
||||||
locationBS := s.PostingsLocs[pid]
|
locationBS := s.PostingsLocs[pid]
|
||||||
if len(tokenFreq.Locations) > 0 {
|
if len(tokenFreq.Locations) > 0 {
|
||||||
locationBS.AddInt(int(docNum))
|
locationBS.AddInt(int(docNum))
|
||||||
|
|
|
@ -15,14 +15,23 @@
|
||||||
package mem
|
package mem
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/RoaringBitmap/roaring"
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDictionary int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var d Dictionary
|
||||||
|
reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// Dictionary is the in-memory representation of the term dictionary
|
// Dictionary is the in-memory representation of the term dictionary
|
||||||
type Dictionary struct {
|
type Dictionary struct {
|
||||||
segment *Segment
|
segment *Segment
|
||||||
|
@ -30,15 +39,34 @@ type Dictionary struct {
|
||||||
fieldID uint16
|
fieldID uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (d *Dictionary) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr +
|
||||||
|
len(d.field)
|
||||||
|
|
||||||
|
if d.segment != nil {
|
||||||
|
sizeInBytes += int(d.segment.Size())
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
// PostingsList returns the postings list for the specified term
|
// PostingsList returns the postings list for the specified term
|
||||||
func (d *Dictionary) PostingsList(term string,
|
func (d *Dictionary) PostingsList(term string,
|
||||||
except *roaring.Bitmap) (segment.PostingsList, error) {
|
except *roaring.Bitmap) (segment.PostingsList, error) {
|
||||||
return &PostingsList{
|
return d.InitPostingsList(term, except, nil)
|
||||||
dictionary: d,
|
}
|
||||||
term: term,
|
|
||||||
postingsID: d.segment.Dicts[d.fieldID][term],
|
func (d *Dictionary) InitPostingsList(term string, except *roaring.Bitmap,
|
||||||
except: except,
|
prealloc *PostingsList) (*PostingsList, error) {
|
||||||
}, nil
|
rv := prealloc
|
||||||
|
if rv == nil {
|
||||||
|
rv = &PostingsList{}
|
||||||
|
}
|
||||||
|
rv.dictionary = d
|
||||||
|
rv.term = term
|
||||||
|
rv.postingsID = d.segment.Dicts[d.fieldID][term]
|
||||||
|
rv.except = except
|
||||||
|
return rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterator returns an iterator for this dictionary
|
// Iterator returns an iterator for this dictionary
|
||||||
|
|
|
@ -15,10 +15,29 @@
|
||||||
package mem
|
package mem
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/RoaringBitmap/roaring"
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizePostingsList int
|
||||||
|
var reflectStaticSizePostingsIterator int
|
||||||
|
var reflectStaticSizePosting int
|
||||||
|
var reflectStaticSizeLocation int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var pl PostingsList
|
||||||
|
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
|
||||||
|
var pi PostingsIterator
|
||||||
|
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
|
||||||
|
var p Posting
|
||||||
|
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
|
||||||
|
var l Location
|
||||||
|
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// PostingsList is an in-memory represenation of a postings list
|
// PostingsList is an in-memory represenation of a postings list
|
||||||
type PostingsList struct {
|
type PostingsList struct {
|
||||||
dictionary *Dictionary
|
dictionary *Dictionary
|
||||||
|
@ -27,6 +46,20 @@ type PostingsList struct {
|
||||||
except *roaring.Bitmap
|
except *roaring.Bitmap
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *PostingsList) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
|
||||||
|
|
||||||
|
if p.dictionary != nil {
|
||||||
|
sizeInBytes += p.dictionary.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.except != nil {
|
||||||
|
sizeInBytes += int(p.except.GetSizeInBytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
// Count returns the number of items on this postings list
|
// Count returns the number of items on this postings list
|
||||||
func (p *PostingsList) Count() uint64 {
|
func (p *PostingsList) Count() uint64 {
|
||||||
var rv uint64
|
var rv uint64
|
||||||
|
@ -46,9 +79,16 @@ func (p *PostingsList) Count() uint64 {
|
||||||
|
|
||||||
// Iterator returns an iterator for this postings list
|
// Iterator returns an iterator for this postings list
|
||||||
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
||||||
rv := &PostingsIterator{
|
return p.InitIterator(nil)
|
||||||
postings: p,
|
}
|
||||||
|
func (p *PostingsList) InitIterator(prealloc *PostingsIterator) *PostingsIterator {
|
||||||
|
rv := prealloc
|
||||||
|
if rv == nil {
|
||||||
|
rv = &PostingsIterator{postings: p}
|
||||||
|
} else {
|
||||||
|
*rv = PostingsIterator{postings: p}
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.postingsID > 0 {
|
if p.postingsID > 0 {
|
||||||
allbits := p.dictionary.segment.Postings[p.postingsID-1]
|
allbits := p.dictionary.segment.Postings[p.postingsID-1]
|
||||||
rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]
|
rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]
|
||||||
|
@ -73,6 +113,17 @@ type PostingsIterator struct {
|
||||||
offset int
|
offset int
|
||||||
locoffset int
|
locoffset int
|
||||||
actual roaring.IntIterable
|
actual roaring.IntIterable
|
||||||
|
reuse Posting
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *PostingsIterator) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr
|
||||||
|
|
||||||
|
if i.locations != nil {
|
||||||
|
sizeInBytes += int(i.locations.GetSizeInBytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
// Next returns the next posting on the postings list, or nil at the end
|
// Next returns the next posting on the postings list, or nil at the end
|
||||||
|
@ -92,17 +143,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||||
i.offset++
|
i.offset++
|
||||||
allN = i.all.Next()
|
allN = i.all.Next()
|
||||||
}
|
}
|
||||||
rv := &Posting{
|
i.reuse = Posting{
|
||||||
iterator: i,
|
iterator: i,
|
||||||
docNum: uint64(n),
|
docNum: uint64(n),
|
||||||
offset: i.offset,
|
offset: i.offset,
|
||||||
locoffset: i.locoffset,
|
locoffset: i.locoffset,
|
||||||
hasLoc: i.locations.Contains(n),
|
hasLoc: i.locations.Contains(n),
|
||||||
}
|
}
|
||||||
|
|
||||||
i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
|
i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
|
||||||
i.offset++
|
i.offset++
|
||||||
return rv, nil
|
return &i.reuse, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Posting is a single entry in a postings list
|
// Posting is a single entry in a postings list
|
||||||
|
@ -114,6 +164,16 @@ type Posting struct {
|
||||||
hasLoc bool
|
hasLoc bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *Posting) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr
|
||||||
|
|
||||||
|
if p.iterator != nil {
|
||||||
|
sizeInBytes += p.iterator.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
// Number returns the document number of this posting in this segment
|
// Number returns the document number of this posting in this segment
|
||||||
func (p *Posting) Number() uint64 {
|
func (p *Posting) Number() uint64 {
|
||||||
return p.docNum
|
return p.docNum
|
||||||
|
@ -151,6 +211,15 @@ type Location struct {
|
||||||
offset int
|
offset int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *Location) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeLocation
|
||||||
|
if l.p != nil {
|
||||||
|
sizeInBytes += l.p.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
// Field returns the name of the field (useful in composite fields to know
|
// Field returns the name of the field (useful in composite fields to know
|
||||||
// which original field the value came from)
|
// which original field the value came from)
|
||||||
func (l *Location) Field() string {
|
func (l *Location) Field() string {
|
||||||
|
|
|
@ -16,11 +16,20 @@ package mem
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/RoaringBitmap/roaring"
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeSegment int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var s Segment
|
||||||
|
reflectStaticSizeSegment = int(reflect.TypeOf(s).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// _id field is always guaranteed to have fieldID of 0
|
// _id field is always guaranteed to have fieldID of 0
|
||||||
const idFieldID uint16 = 0
|
const idFieldID uint16 = 0
|
||||||
|
|
||||||
|
@ -96,7 +105,7 @@ type Segment struct {
|
||||||
|
|
||||||
// Footprint of the segment, updated when analyzed document mutations
|
// Footprint of the segment, updated when analyzed document mutations
|
||||||
// are added into the segment
|
// are added into the segment
|
||||||
sizeInBytes uint64
|
sizeInBytes int
|
||||||
}
|
}
|
||||||
|
|
||||||
// New builds a new empty Segment
|
// New builds a new empty Segment
|
||||||
|
@ -107,99 +116,87 @@ func New() *Segment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Segment) updateSizeInBytes() {
|
func (s *Segment) updateSize() {
|
||||||
var sizeInBytes uint64
|
sizeInBytes := reflectStaticSizeSegment
|
||||||
|
|
||||||
// FieldsMap, FieldsInv
|
// FieldsMap, FieldsInv
|
||||||
for k, _ := range s.FieldsMap {
|
for k, _ := range s.FieldsMap {
|
||||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
sizeInBytes += (len(k)+size.SizeOfString)*2 +
|
||||||
2 /* size of uint16 */)
|
size.SizeOfUint16
|
||||||
}
|
}
|
||||||
// overhead from the data structures
|
|
||||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
|
||||||
|
|
||||||
// Dicts, DictKeys
|
// Dicts, DictKeys
|
||||||
for _, entry := range s.Dicts {
|
for _, entry := range s.Dicts {
|
||||||
for k, _ := range entry {
|
for k, _ := range entry {
|
||||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
sizeInBytes += (len(k)+size.SizeOfString)*2 +
|
||||||
8 /* size of uint64 */)
|
size.SizeOfUint64
|
||||||
}
|
}
|
||||||
// overhead from the data structures
|
// overhead from the data structures
|
||||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
sizeInBytes += (size.SizeOfMap + size.SizeOfSlice)
|
||||||
}
|
}
|
||||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
|
||||||
|
|
||||||
// Postings, PostingsLocs
|
// Postings, PostingsLocs
|
||||||
for i := 0; i < len(s.Postings); i++ {
|
for i := 0; i < len(s.Postings); i++ {
|
||||||
sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
|
sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) +
|
||||||
(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
|
(int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr)
|
||||||
}
|
}
|
||||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
|
||||||
|
|
||||||
// Freqs, Norms
|
// Freqs, Norms
|
||||||
for i := 0; i < len(s.Freqs); i++ {
|
for i := 0; i < len(s.Freqs); i++ {
|
||||||
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
|
sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 +
|
||||||
len(s.Norms[i])*4 /* size of float32 */) +
|
len(s.Norms[i])*size.SizeOfFloat32) +
|
||||||
(segment.SizeOfSlice * 2)
|
(size.SizeOfSlice * 2)
|
||||||
}
|
}
|
||||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
|
||||||
|
|
||||||
// Location data
|
// Location data
|
||||||
for i := 0; i < len(s.Locfields); i++ {
|
for i := 0; i < len(s.Locfields); i++ {
|
||||||
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
|
sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 +
|
||||||
len(s.Locstarts[i])*8 /* size of uint64 */ +
|
len(s.Locstarts[i])*size.SizeOfUint64 +
|
||||||
len(s.Locends[i])*8 /* size of uint64 */ +
|
len(s.Locends[i])*size.SizeOfUint64 +
|
||||||
len(s.Locpos[i])*8 /* size of uint64 */)
|
len(s.Locpos[i])*size.SizeOfUint64
|
||||||
|
|
||||||
for j := 0; j < len(s.Locarraypos[i]); j++ {
|
for j := 0; j < len(s.Locarraypos[i]); j++ {
|
||||||
sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
|
sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 +
|
||||||
segment.SizeOfSlice
|
size.SizeOfSlice
|
||||||
}
|
}
|
||||||
|
|
||||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
sizeInBytes += (size.SizeOfSlice * 5)
|
||||||
}
|
}
|
||||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
|
||||||
|
|
||||||
// Stored data
|
// Stored data
|
||||||
for i := 0; i < len(s.Stored); i++ {
|
for i := 0; i < len(s.Stored); i++ {
|
||||||
for _, v := range s.Stored[i] {
|
for _, v := range s.Stored[i] {
|
||||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
sizeInBytes += size.SizeOfUint16
|
||||||
for _, arr := range v {
|
for _, arr := range v {
|
||||||
sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
|
sizeInBytes += len(arr) + size.SizeOfSlice
|
||||||
}
|
}
|
||||||
sizeInBytes += segment.SizeOfSlice
|
sizeInBytes += size.SizeOfSlice
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, v := range s.StoredTypes[i] {
|
for _, v := range s.StoredTypes[i] {
|
||||||
sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
|
sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, v := range s.StoredPos[i] {
|
for _, v := range s.StoredPos[i] {
|
||||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
sizeInBytes += size.SizeOfUint16
|
||||||
for _, arr := range v {
|
for _, arr := range v {
|
||||||
sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
|
sizeInBytes += len(arr)*size.SizeOfUint64 +
|
||||||
segment.SizeOfSlice
|
size.SizeOfSlice
|
||||||
}
|
}
|
||||||
sizeInBytes += segment.SizeOfSlice
|
sizeInBytes += size.SizeOfSlice
|
||||||
}
|
}
|
||||||
|
|
||||||
// overhead from map(s) within Stored, StoredTypes, StoredPos
|
// overhead from map(s) within Stored, StoredTypes, StoredPos
|
||||||
sizeInBytes += (segment.SizeOfMap * 3)
|
sizeInBytes += (size.SizeOfMap * 3)
|
||||||
}
|
}
|
||||||
// overhead from data structures: Stored, StoredTypes, StoredPos
|
|
||||||
sizeInBytes += (segment.SizeOfSlice * 3)
|
|
||||||
|
|
||||||
// DocValueFields
|
// DocValueFields
|
||||||
sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
|
sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool)
|
||||||
segment.SizeOfMap
|
|
||||||
|
|
||||||
// SizeInBytes
|
|
||||||
sizeInBytes += uint64(8)
|
|
||||||
|
|
||||||
s.sizeInBytes = sizeInBytes
|
s.sizeInBytes = sizeInBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Segment) SizeInBytes() uint64 {
|
func (s *Segment) Size() int {
|
||||||
return s.sizeInBytes
|
return s.sizeInBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -169,7 +169,7 @@ func TestSingle(t *testing.T) {
|
||||||
t.Fatalf("segment nil, not expected")
|
t.Fatalf("segment nil, not expected")
|
||||||
}
|
}
|
||||||
|
|
||||||
if segment.SizeInBytes() <= 0 {
|
if segment.Size() <= 0 {
|
||||||
t.Fatalf("segment size not updated")
|
t.Fatalf("segment size not updated")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,12 +19,6 @@ import (
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Overhead from go data structures when deployed on a 64-bit system.
|
|
||||||
const SizeOfMap uint64 = 8
|
|
||||||
const SizeOfPointer uint64 = 8
|
|
||||||
const SizeOfSlice uint64 = 24
|
|
||||||
const SizeOfString uint64 = 16
|
|
||||||
|
|
||||||
// DocumentFieldValueVisitor defines a callback to be visited for each
|
// DocumentFieldValueVisitor defines a callback to be visited for each
|
||||||
// stored field value. The return value determines if the visitor
|
// stored field value. The return value determines if the visitor
|
||||||
// should keep going. Returning true continues visiting, false stops.
|
// should keep going. Returning true continues visiting, false stops.
|
||||||
|
@ -42,7 +36,7 @@ type Segment interface {
|
||||||
|
|
||||||
Close() error
|
Close() error
|
||||||
|
|
||||||
SizeInBytes() uint64
|
Size() int
|
||||||
|
|
||||||
AddRef()
|
AddRef()
|
||||||
DecRef() error
|
DecRef() error
|
||||||
|
@ -63,6 +57,8 @@ type DictionaryIterator interface {
|
||||||
type PostingsList interface {
|
type PostingsList interface {
|
||||||
Iterator() PostingsIterator
|
Iterator() PostingsIterator
|
||||||
|
|
||||||
|
Size() int
|
||||||
|
|
||||||
Count() uint64
|
Count() uint64
|
||||||
|
|
||||||
// NOTE deferred for future work
|
// NOTE deferred for future work
|
||||||
|
@ -77,6 +73,8 @@ type PostingsIterator interface {
|
||||||
// implementations may return a shared instance to reduce memory
|
// implementations may return a shared instance to reduce memory
|
||||||
// allocations.
|
// allocations.
|
||||||
Next() (Posting, error)
|
Next() (Posting, error)
|
||||||
|
|
||||||
|
Size() int
|
||||||
}
|
}
|
||||||
|
|
||||||
type Posting interface {
|
type Posting interface {
|
||||||
|
@ -86,6 +84,8 @@ type Posting interface {
|
||||||
Norm() float64
|
Norm() float64
|
||||||
|
|
||||||
Locations() []Location
|
Locations() []Location
|
||||||
|
|
||||||
|
Size() int
|
||||||
}
|
}
|
||||||
|
|
||||||
type Location interface {
|
type Location interface {
|
||||||
|
@ -94,6 +94,7 @@ type Location interface {
|
||||||
End() uint64
|
End() uint64
|
||||||
Pos() uint64
|
Pos() uint64
|
||||||
ArrayPositions() []uint64
|
ArrayPositions() []uint64
|
||||||
|
Size() int
|
||||||
}
|
}
|
||||||
|
|
||||||
// DocumentFieldTermVisitable is implemented by various scorch segment
|
// DocumentFieldTermVisitable is implemented by various scorch segment
|
||||||
|
|
|
@ -308,7 +308,7 @@ func persistStoredFieldValues(fieldID int,
|
||||||
}
|
}
|
||||||
|
|
||||||
func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
|
func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
|
||||||
var freqOffsets, locOfffsets []uint64
|
freqOffsets := make([]uint64, 0, len(memSegment.Postings))
|
||||||
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
||||||
for postingID := range memSegment.Postings {
|
for postingID := range memSegment.Postings {
|
||||||
if postingID != 0 {
|
if postingID != 0 {
|
||||||
|
@ -319,19 +319,10 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
postingsListItr := memSegment.Postings[postingID].Iterator()
|
postingsListItr := memSegment.Postings[postingID].Iterator()
|
||||||
var offset int
|
var offset int
|
||||||
for postingsListItr.HasNext() {
|
for postingsListItr.HasNext() {
|
||||||
|
|
||||||
docNum := uint64(postingsListItr.Next())
|
docNum := uint64(postingsListItr.Next())
|
||||||
|
|
||||||
// put freq
|
// put freq & norm
|
||||||
err := tfEncoder.Add(docNum, freqs[offset])
|
err := tfEncoder.Add(docNum, freqs[offset], uint64(math.Float32bits(norms[offset])))
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// put norm
|
|
||||||
norm := norms[offset]
|
|
||||||
normBits := math.Float32bits(norm)
|
|
||||||
err = tfEncoder.Add(docNum, uint64(normBits))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
@ -347,10 +338,10 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// now do it again for the locations
|
// now do it again for the locations
|
||||||
|
locOffsets := make([]uint64, 0, len(memSegment.Postings))
|
||||||
locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
||||||
for postingID := range memSegment.Postings {
|
for postingID := range memSegment.Postings {
|
||||||
if postingID != 0 {
|
if postingID != 0 {
|
||||||
|
@ -367,45 +358,20 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
var locOffset int
|
var locOffset int
|
||||||
for postingsListItr.HasNext() {
|
for postingsListItr.HasNext() {
|
||||||
docNum := uint64(postingsListItr.Next())
|
docNum := uint64(postingsListItr.Next())
|
||||||
for i := 0; i < int(freqs[offset]); i++ {
|
n := int(freqs[offset])
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
if len(locfields) > 0 {
|
if len(locfields) > 0 {
|
||||||
// put field
|
err := locEncoder.Add(docNum, uint64(locfields[locOffset]),
|
||||||
err := locEncoder.Add(docNum, uint64(locfields[locOffset]))
|
locpos[locOffset], locstarts[locOffset], locends[locOffset],
|
||||||
if err != nil {
|
uint64(len(locarraypos[locOffset])))
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// put pos
|
|
||||||
err = locEncoder.Add(docNum, locpos[locOffset])
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// put start
|
|
||||||
err = locEncoder.Add(docNum, locstarts[locOffset])
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// put end
|
|
||||||
err = locEncoder.Add(docNum, locends[locOffset])
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// put the number of array positions to follow
|
|
||||||
num := len(locarraypos[locOffset])
|
|
||||||
err = locEncoder.Add(docNum, uint64(num))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// put each array position
|
// put each array position
|
||||||
for _, pos := range locarraypos[locOffset] {
|
err = locEncoder.Add(docNum, locarraypos[locOffset]...)
|
||||||
err = locEncoder.Add(docNum, pos)
|
if err != nil {
|
||||||
if err != nil {
|
return nil, nil, err
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
locOffset++
|
locOffset++
|
||||||
|
@ -414,14 +380,16 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
}
|
}
|
||||||
|
|
||||||
// record where this postings loc info starts
|
// record where this postings loc info starts
|
||||||
locOfffsets = append(locOfffsets, uint64(w.Count()))
|
locOffsets = append(locOffsets, uint64(w.Count()))
|
||||||
|
|
||||||
locEncoder.Close()
|
locEncoder.Close()
|
||||||
_, err := locEncoder.Write(w)
|
_, err := locEncoder.Write(w)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return freqOffsets, locOfffsets, nil
|
|
||||||
|
return freqOffsets, locOffsets, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
||||||
|
@ -532,6 +500,9 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
|
fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
|
||||||
fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
||||||
|
|
||||||
|
var postings *mem.PostingsList
|
||||||
|
var postingsItr *mem.PostingsIterator
|
||||||
|
|
||||||
for fieldID := range memSegment.DocValueFields {
|
for fieldID := range memSegment.DocValueFields {
|
||||||
field := memSegment.FieldsInv[fieldID]
|
field := memSegment.FieldsInv[fieldID]
|
||||||
docTermMap := make(map[uint64][]byte, 0)
|
docTermMap := make(map[uint64][]byte, 0)
|
||||||
|
@ -543,17 +514,17 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
dictItr := dict.Iterator()
|
dictItr := dict.Iterator()
|
||||||
next, err := dictItr.Next()
|
next, err := dictItr.Next()
|
||||||
for err == nil && next != nil {
|
for err == nil && next != nil {
|
||||||
postings, err1 := dict.PostingsList(next.Term, nil)
|
var err1 error
|
||||||
|
postings, err1 = dict.(*mem.Dictionary).InitPostingsList(next.Term, nil, postings)
|
||||||
if err1 != nil {
|
if err1 != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
postingsItr := postings.Iterator()
|
postingsItr = postings.InitIterator(postingsItr)
|
||||||
nextPosting, err2 := postingsItr.Next()
|
nextPosting, err2 := postingsItr.Next()
|
||||||
for err2 == nil && nextPosting != nil {
|
for err2 == nil && nextPosting != nil {
|
||||||
docNum := nextPosting.Number()
|
docNum := nextPosting.Number()
|
||||||
docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
|
docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator)
|
||||||
docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
|
|
||||||
nextPosting, err2 = postingsItr.Next()
|
nextPosting, err2 = postingsItr.Next()
|
||||||
}
|
}
|
||||||
if err2 != nil {
|
if err2 != nil {
|
||||||
|
@ -562,12 +533,12 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
|
|
||||||
next, err = dictItr.Next()
|
next, err = dictItr.Next()
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort wrt to docIDs
|
// sort wrt to docIDs
|
||||||
var docNumbers docIDRange
|
docNumbers := make(docIDRange, 0, len(docTermMap))
|
||||||
for k := range docTermMap {
|
for k := range docTermMap {
|
||||||
docNumbers = append(docNumbers, k)
|
docNumbers = append(docNumbers, k)
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,10 +18,18 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"io"
|
"io"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/golang/snappy"
|
"github.com/golang/snappy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeMetaData int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var md MetaData
|
||||||
|
reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
|
||||||
|
}
|
||||||
|
|
||||||
var termSeparator byte = 0xff
|
var termSeparator byte = 0xff
|
||||||
var termSeparatorSplitSlice = []byte{termSeparator}
|
var termSeparatorSplitSlice = []byte{termSeparator}
|
||||||
|
|
||||||
|
|
|
@ -19,13 +19,21 @@ import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/size"
|
||||||
"github.com/golang/snappy"
|
"github.com/golang/snappy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizedocValueIterator int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var dvi docValueIterator
|
||||||
|
reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type docValueIterator struct {
|
type docValueIterator struct {
|
||||||
field string
|
field string
|
||||||
curChunkNum uint64
|
curChunkNum uint64
|
||||||
|
@ -36,21 +44,12 @@ type docValueIterator struct {
|
||||||
curChunkData []byte // compressed data cache
|
curChunkData []byte // compressed data cache
|
||||||
}
|
}
|
||||||
|
|
||||||
func (di *docValueIterator) sizeInBytes() uint64 {
|
func (di *docValueIterator) size() int {
|
||||||
// curChunkNum, numChunks, dvDataLoc --> uint64
|
return reflectStaticSizedocValueIterator + size.SizeOfPtr +
|
||||||
sizeInBytes := 24
|
len(di.field) +
|
||||||
|
len(di.chunkLens)*size.SizeOfUint64 +
|
||||||
// field
|
len(di.curChunkHeader)*reflectStaticSizeMetaData +
|
||||||
sizeInBytes += (len(di.field) + int(segment.SizeOfString))
|
len(di.curChunkData)
|
||||||
|
|
||||||
// chunkLens, curChunkHeader
|
|
||||||
sizeInBytes += len(di.chunkLens)*8 +
|
|
||||||
len(di.curChunkHeader)*24 +
|
|
||||||
int(segment.SizeOfSlice*2) /* overhead from slices */
|
|
||||||
|
|
||||||
// curChunkData is mmap'ed, not included
|
|
||||||
|
|
||||||
return uint64(sizeInBytes)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (di *docValueIterator) fieldName() string {
|
func (di *docValueIterator) fieldName() string {
|
||||||
|
|
|
@ -82,6 +82,19 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
|
||||||
|
chunk := docNum / c.chunkSize
|
||||||
|
if chunk != c.currChunk {
|
||||||
|
// starting a new chunk
|
||||||
|
c.Close()
|
||||||
|
c.chunkBuf.Reset()
|
||||||
|
c.currChunk = chunk
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := c.chunkBuf.Write(buf)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Close indicates you are done calling Add() this allows the final chunk
|
// Close indicates you are done calling Add() this allows the final chunk
|
||||||
// to be encoded.
|
// to be encoded.
|
||||||
func (c *chunkedIntCoder) Close() {
|
func (c *chunkedIntCoder) Close() {
|
||||||
|
|
|
@ -162,7 +162,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
|
|
||||||
var bufReuse bytes.Buffer
|
var bufReuse bytes.Buffer
|
||||||
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
||||||
var bufLoc []uint64
|
|
||||||
|
|
||||||
var postings *PostingsList
|
var postings *PostingsList
|
||||||
var postItr *PostingsIterator
|
var postItr *PostingsIterator
|
||||||
|
@ -316,45 +315,32 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
newDocNumsI := newDocNums[itrI]
|
newDocNumsI := newDocNums[itrI]
|
||||||
|
|
||||||
postItr = postings.iterator(postItr)
|
postItr = postings.iterator(postItr)
|
||||||
next, err2 := postItr.Next()
|
|
||||||
for next != nil && err2 == nil {
|
nextDocNum, nextFreqNormBytes, nextLocBytes, err2 := postItr.nextBytes()
|
||||||
hitNewDocNum := newDocNumsI[next.Number()]
|
for err2 == nil && len(nextFreqNormBytes) > 0 {
|
||||||
|
hitNewDocNum := newDocNumsI[nextDocNum]
|
||||||
if hitNewDocNum == docDropped {
|
if hitNewDocNum == docDropped {
|
||||||
return nil, 0, fmt.Errorf("see hit with dropped doc num")
|
return nil, 0, fmt.Errorf("see hit with dropped doc num")
|
||||||
}
|
}
|
||||||
|
|
||||||
newRoaring.Add(uint32(hitNewDocNum))
|
newRoaring.Add(uint32(hitNewDocNum))
|
||||||
// encode norm bits
|
err2 = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
|
||||||
norm := next.Norm()
|
if err2 != nil {
|
||||||
normBits := math.Float32bits(float32(norm))
|
return nil, 0, err2
|
||||||
err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
}
|
||||||
locs := next.Locations()
|
|
||||||
if len(locs) > 0 {
|
if len(nextLocBytes) > 0 {
|
||||||
newRoaringLocs.Add(uint32(hitNewDocNum))
|
newRoaringLocs.Add(uint32(hitNewDocNum))
|
||||||
for _, loc := range locs {
|
err2 = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
|
||||||
if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
|
if err2 != nil {
|
||||||
bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
|
return nil, 0, err2
|
||||||
}
|
|
||||||
args := bufLoc[0:5]
|
|
||||||
args[0] = uint64(fieldsMap[loc.Field()] - 1)
|
|
||||||
args[1] = loc.Pos()
|
|
||||||
args[2] = loc.Start()
|
|
||||||
args[3] = loc.End()
|
|
||||||
args[4] = uint64(len(loc.ArrayPositions()))
|
|
||||||
args = append(args, loc.ArrayPositions()...)
|
|
||||||
err = locEncoder.Add(hitNewDocNum, args...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
docTermMap[hitNewDocNum] =
|
docTermMap[hitNewDocNum] =
|
||||||
append(append(docTermMap[hitNewDocNum], term...), termSeparator)
|
append(append(docTermMap[hitNewDocNum], term...), termSeparator)
|
||||||
|
|
||||||
next, err2 = postItr.Next()
|
nextDocNum, nextFreqNormBytes, nextLocBytes, err2 = postItr.nextBytes()
|
||||||
}
|
}
|
||||||
if err2 != nil {
|
if err2 != nil {
|
||||||
return nil, 0, err2
|
return nil, 0, err2
|
||||||
|
|
|
@ -19,12 +19,30 @@ import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/RoaringBitmap/roaring"
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/Smerity/govarint"
|
"github.com/Smerity/govarint"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizePostingsList int
|
||||||
|
var reflectStaticSizePostingsIterator int
|
||||||
|
var reflectStaticSizePosting int
|
||||||
|
var reflectStaticSizeLocation int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var pl PostingsList
|
||||||
|
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
|
||||||
|
var pi PostingsIterator
|
||||||
|
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
|
||||||
|
var p Posting
|
||||||
|
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
|
||||||
|
var l Location
|
||||||
|
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// PostingsList is an in-memory represenation of a postings list
|
// PostingsList is an in-memory represenation of a postings list
|
||||||
type PostingsList struct {
|
type PostingsList struct {
|
||||||
sb *SegmentBase
|
sb *SegmentBase
|
||||||
|
@ -36,6 +54,28 @@ type PostingsList struct {
|
||||||
except *roaring.Bitmap
|
except *roaring.Bitmap
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *PostingsList) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
|
||||||
|
|
||||||
|
if p.sb != nil {
|
||||||
|
sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.locBitmap != nil {
|
||||||
|
sizeInBytes += int(p.locBitmap.GetSizeInBytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.postings != nil {
|
||||||
|
sizeInBytes += int(p.postings.GetSizeInBytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.except != nil {
|
||||||
|
sizeInBytes += int(p.except.GetSizeInBytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
// Iterator returns an iterator for this postings list
|
// Iterator returns an iterator for this postings list
|
||||||
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
||||||
return p.iterator(nil)
|
return p.iterator(nil)
|
||||||
|
@ -45,7 +85,25 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
||||||
if rv == nil {
|
if rv == nil {
|
||||||
rv = &PostingsIterator{}
|
rv = &PostingsIterator{}
|
||||||
} else {
|
} else {
|
||||||
|
freqNormReader := rv.freqNormReader
|
||||||
|
if freqNormReader != nil {
|
||||||
|
freqNormReader.Reset([]byte(nil))
|
||||||
|
}
|
||||||
|
freqNormDecoder := rv.freqNormDecoder
|
||||||
|
|
||||||
|
locReader := rv.locReader
|
||||||
|
if locReader != nil {
|
||||||
|
locReader.Reset([]byte(nil))
|
||||||
|
}
|
||||||
|
locDecoder := rv.locDecoder
|
||||||
|
|
||||||
*rv = PostingsIterator{} // clear the struct
|
*rv = PostingsIterator{} // clear the struct
|
||||||
|
|
||||||
|
rv.freqNormReader = freqNormReader
|
||||||
|
rv.freqNormDecoder = freqNormDecoder
|
||||||
|
|
||||||
|
rv.locReader = locReader
|
||||||
|
rv.locDecoder = locDecoder
|
||||||
}
|
}
|
||||||
rv.postings = p
|
rv.postings = p
|
||||||
|
|
||||||
|
@ -175,6 +233,25 @@ type PostingsIterator struct {
|
||||||
nextLocs []Location // reused across Next() calls
|
nextLocs []Location // reused across Next() calls
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *PostingsIterator) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
|
||||||
|
len(i.currChunkFreqNorm) +
|
||||||
|
len(i.currChunkLoc) +
|
||||||
|
len(i.freqChunkLens)*size.SizeOfUint64 +
|
||||||
|
len(i.locChunkLens)*size.SizeOfUint64 +
|
||||||
|
i.next.Size()
|
||||||
|
|
||||||
|
if i.locBitmap != nil {
|
||||||
|
sizeInBytes += int(i.locBitmap.GetSizeInBytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range i.nextLocs {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (i *PostingsIterator) loadChunk(chunk int) error {
|
func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||||
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
|
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
|
||||||
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
|
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
|
||||||
|
@ -279,75 +356,23 @@ func (i *PostingsIterator) readLocation(l *Location) error {
|
||||||
|
|
||||||
// Next returns the next posting on the postings list, or nil at the end
|
// Next returns the next posting on the postings list, or nil at the end
|
||||||
func (i *PostingsIterator) Next() (segment.Posting, error) {
|
func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||||
if i.actual == nil || !i.actual.HasNext() {
|
docNum, exists, err := i.nextDocNum()
|
||||||
return nil, nil
|
if err != nil || !exists {
|
||||||
}
|
return nil, err
|
||||||
n := i.actual.Next()
|
|
||||||
nChunk := n / i.postings.sb.chunkFactor
|
|
||||||
allN := i.all.Next()
|
|
||||||
allNChunk := allN / i.postings.sb.chunkFactor
|
|
||||||
|
|
||||||
// n is the next actual hit (excluding some postings)
|
|
||||||
// allN is the next hit in the full postings
|
|
||||||
// if they don't match, adjust offsets to factor in item we're skipping over
|
|
||||||
// incr the all iterator, and check again
|
|
||||||
for allN != n {
|
|
||||||
|
|
||||||
// in different chunks, reset offsets
|
|
||||||
if allNChunk != nChunk {
|
|
||||||
i.locoffset = 0
|
|
||||||
i.offset = 0
|
|
||||||
} else {
|
|
||||||
|
|
||||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
|
||||||
err := i.loadChunk(int(nChunk))
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error loading chunk: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// read off freq/offsets even though we don't care about them
|
|
||||||
freq, _, err := i.readFreqNorm()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if i.locBitmap.Contains(allN) {
|
|
||||||
for j := 0; j < int(freq); j++ {
|
|
||||||
err := i.readLocation(nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// in same chunk, need to account for offsets
|
|
||||||
i.offset++
|
|
||||||
}
|
|
||||||
|
|
||||||
allN = i.all.Next()
|
|
||||||
}
|
|
||||||
|
|
||||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
|
||||||
err := i.loadChunk(int(nChunk))
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error loading chunk: %v", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
reuseLocs := i.next.locs // hold for reuse before struct clearing
|
reuseLocs := i.next.locs // hold for reuse before struct clearing
|
||||||
i.next = Posting{} // clear the struct
|
i.next = Posting{} // clear the struct
|
||||||
rv := &i.next
|
rv := &i.next
|
||||||
rv.iterator = i
|
rv.docNum = docNum
|
||||||
rv.docNum = uint64(n)
|
|
||||||
|
|
||||||
var err error
|
|
||||||
var normBits uint64
|
var normBits uint64
|
||||||
rv.freq, normBits, err = i.readFreqNorm()
|
rv.freq, normBits, err = i.readFreqNorm()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
rv.norm = math.Float32frombits(uint32(normBits))
|
rv.norm = math.Float32frombits(uint32(normBits))
|
||||||
if i.locBitmap.Contains(n) {
|
if i.locBitmap.Contains(uint32(docNum)) {
|
||||||
// read off 'freq' locations, into reused slices
|
// read off 'freq' locations, into reused slices
|
||||||
if cap(i.nextLocs) >= int(rv.freq) {
|
if cap(i.nextLocs) >= int(rv.freq) {
|
||||||
i.nextLocs = i.nextLocs[0:rv.freq]
|
i.nextLocs = i.nextLocs[0:rv.freq]
|
||||||
|
@ -371,14 +396,121 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||||
return rv, nil
|
return rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nextBytes returns the docNum and the encoded freq & loc bytes for
|
||||||
|
// the next posting
|
||||||
|
func (i *PostingsIterator) nextBytes() (uint64, []byte, []byte, error) {
|
||||||
|
docNum, exists, err := i.nextDocNum()
|
||||||
|
if err != nil {
|
||||||
|
return 0, nil, nil, err
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
return 0, nil, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||||
|
|
||||||
|
freq, _, err := i.readFreqNorm()
|
||||||
|
if err != nil {
|
||||||
|
return 0, nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||||
|
bytesFreqNorm := i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
|
||||||
|
|
||||||
|
var bytesLoc []byte
|
||||||
|
if i.locBitmap.Contains(uint32(docNum)) {
|
||||||
|
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||||
|
|
||||||
|
for j := uint64(0); j < freq; j++ {
|
||||||
|
err := i.readLocation(nil)
|
||||||
|
if err != nil {
|
||||||
|
return 0, nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||||
|
bytesLoc = i.currChunkLoc[startLoc:endLoc]
|
||||||
|
}
|
||||||
|
|
||||||
|
return docNum, bytesFreqNorm, bytesLoc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextDocNum returns the next docNum on the postings list, and also
|
||||||
|
// sets up the currChunk / loc related fields of the iterator.
|
||||||
|
func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
|
||||||
|
if i.actual == nil || !i.actual.HasNext() {
|
||||||
|
return 0, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
n := i.actual.Next()
|
||||||
|
nChunk := n / i.postings.sb.chunkFactor
|
||||||
|
allN := i.all.Next()
|
||||||
|
allNChunk := allN / i.postings.sb.chunkFactor
|
||||||
|
|
||||||
|
// n is the next actual hit (excluding some postings)
|
||||||
|
// allN is the next hit in the full postings
|
||||||
|
// if they don't match, adjust offsets to factor in item we're skipping over
|
||||||
|
// incr the all iterator, and check again
|
||||||
|
for allN != n {
|
||||||
|
// in different chunks, reset offsets
|
||||||
|
if allNChunk != nChunk {
|
||||||
|
i.locoffset = 0
|
||||||
|
i.offset = 0
|
||||||
|
} else {
|
||||||
|
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||||
|
err := i.loadChunk(int(nChunk))
|
||||||
|
if err != nil {
|
||||||
|
return 0, false, fmt.Errorf("error loading chunk: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// read off freq/offsets even though we don't care about them
|
||||||
|
freq, _, err := i.readFreqNorm()
|
||||||
|
if err != nil {
|
||||||
|
return 0, false, err
|
||||||
|
}
|
||||||
|
if i.locBitmap.Contains(allN) {
|
||||||
|
for j := 0; j < int(freq); j++ {
|
||||||
|
err := i.readLocation(nil)
|
||||||
|
if err != nil {
|
||||||
|
return 0, false, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// in same chunk, need to account for offsets
|
||||||
|
i.offset++
|
||||||
|
}
|
||||||
|
|
||||||
|
allN = i.all.Next()
|
||||||
|
}
|
||||||
|
|
||||||
|
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||||
|
err := i.loadChunk(int(nChunk))
|
||||||
|
if err != nil {
|
||||||
|
return 0, false, fmt.Errorf("error loading chunk: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return uint64(n), true, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Posting is a single entry in a postings list
|
// Posting is a single entry in a postings list
|
||||||
type Posting struct {
|
type Posting struct {
|
||||||
iterator *PostingsIterator
|
docNum uint64
|
||||||
docNum uint64
|
freq uint64
|
||||||
|
norm float32
|
||||||
|
locs []segment.Location
|
||||||
|
}
|
||||||
|
|
||||||
freq uint64
|
func (p *Posting) Size() int {
|
||||||
norm float32
|
sizeInBytes := reflectStaticSizePosting
|
||||||
locs []segment.Location
|
|
||||||
|
for _, entry := range p.locs {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
// Number returns the document number of this posting in this segment
|
// Number returns the document number of this posting in this segment
|
||||||
|
@ -410,6 +542,12 @@ type Location struct {
|
||||||
ap []uint64
|
ap []uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *Location) Size() int {
|
||||||
|
return reflectStaticSizeLocation +
|
||||||
|
len(l.field) +
|
||||||
|
len(l.ap)*size.SizeOfUint64
|
||||||
|
}
|
||||||
|
|
||||||
// Field returns the name of the field (useful in composite fields to know
|
// Field returns the name of the field (useful in composite fields to know
|
||||||
// which original field the value came from)
|
// which original field the value came from)
|
||||||
func (l *Location) Field() string {
|
func (l *Location) Field() string {
|
||||||
|
|
|
@ -20,16 +20,25 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"reflect"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/RoaringBitmap/roaring"
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/Smerity/govarint"
|
"github.com/Smerity/govarint"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
"github.com/couchbase/vellum"
|
"github.com/couchbase/vellum"
|
||||||
mmap "github.com/edsrzf/mmap-go"
|
mmap "github.com/edsrzf/mmap-go"
|
||||||
"github.com/golang/snappy"
|
"github.com/golang/snappy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeSegmentBase int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var sb SegmentBase
|
||||||
|
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// Open returns a zap impl of a segment
|
// Open returns a zap impl of a segment
|
||||||
func Open(path string) (segment.Segment, error) {
|
func Open(path string) (segment.Segment, error) {
|
||||||
f, err := os.Open(path)
|
f, err := os.Open(path)
|
||||||
|
@ -92,6 +101,32 @@ type SegmentBase struct {
|
||||||
fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
|
fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sb *SegmentBase) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeSegmentBase +
|
||||||
|
len(sb.mem)
|
||||||
|
|
||||||
|
// fieldsMap
|
||||||
|
for k, _ := range sb.fieldsMap {
|
||||||
|
sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
|
||||||
|
}
|
||||||
|
|
||||||
|
// fieldsInv, dictLocs
|
||||||
|
for _, entry := range sb.fieldsInv {
|
||||||
|
sizeInBytes += len(entry) + size.SizeOfString
|
||||||
|
}
|
||||||
|
sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
|
||||||
|
|
||||||
|
// fieldDvIterMap
|
||||||
|
for _, v := range sb.fieldDvIterMap {
|
||||||
|
sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
|
||||||
|
if v != nil {
|
||||||
|
sizeInBytes += v.size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (sb *SegmentBase) AddRef() {}
|
func (sb *SegmentBase) AddRef() {}
|
||||||
func (sb *SegmentBase) DecRef() (err error) { return nil }
|
func (sb *SegmentBase) DecRef() (err error) { return nil }
|
||||||
func (sb *SegmentBase) Close() (err error) { return nil }
|
func (sb *SegmentBase) Close() (err error) { return nil }
|
||||||
|
@ -111,56 +146,19 @@ type Segment struct {
|
||||||
refs int64
|
refs int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Segment) SizeInBytes() uint64 {
|
func (s *Segment) Size() int {
|
||||||
// 8 /* size of file pointer */
|
// 8 /* size of file pointer */
|
||||||
// 4 /* size of version -> uint32 */
|
// 4 /* size of version -> uint32 */
|
||||||
// 4 /* size of crc -> uint32 */
|
// 4 /* size of crc -> uint32 */
|
||||||
sizeOfUints := 16
|
sizeOfUints := 16
|
||||||
|
|
||||||
sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
|
sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
|
||||||
|
|
||||||
// mutex, refs -> int64
|
// mutex, refs -> int64
|
||||||
sizeInBytes += 16
|
sizeInBytes += 16
|
||||||
|
|
||||||
// do not include the mmap'ed part
|
// do not include the mmap'ed part
|
||||||
return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem))
|
return sizeInBytes + s.SegmentBase.Size() - len(s.mem)
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SegmentBase) SizeInBytes() uint64 {
|
|
||||||
// 4 /* size of memCRC -> uint32 */
|
|
||||||
// 4 /* size of chunkFactor -> uint32 */
|
|
||||||
// 8 /* size of numDocs -> uint64 */
|
|
||||||
// 8 /* size of storedIndexOffset -> uint64 */
|
|
||||||
// 8 /* size of fieldsIndexOffset -> uint64 */
|
|
||||||
// 8 /* size of docValueOffset -> uint64 */
|
|
||||||
sizeInBytes := 40
|
|
||||||
|
|
||||||
sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
|
|
||||||
|
|
||||||
// fieldsMap
|
|
||||||
for k, _ := range s.fieldsMap {
|
|
||||||
sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
|
|
||||||
}
|
|
||||||
sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
|
|
||||||
|
|
||||||
// fieldsInv, dictLocs
|
|
||||||
for _, entry := range s.fieldsInv {
|
|
||||||
sizeInBytes += (len(entry) + int(segment.SizeOfString))
|
|
||||||
}
|
|
||||||
sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */
|
|
||||||
sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
|
|
||||||
|
|
||||||
// fieldDvIterMap
|
|
||||||
sizeInBytes += len(s.fieldDvIterMap) *
|
|
||||||
int(segment.SizeOfPointer+2 /* size of uint16 */)
|
|
||||||
for _, entry := range s.fieldDvIterMap {
|
|
||||||
if entry != nil {
|
|
||||||
sizeInBytes += int(entry.sizeInBytes())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sizeInBytes += int(segment.SizeOfMap)
|
|
||||||
|
|
||||||
return uint64(sizeInBytes)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Segment) AddRef() {
|
func (s *Segment) AddRef() {
|
||||||
|
|
|
@ -27,6 +27,7 @@ import (
|
||||||
"github.com/blevesearch/bleve/document"
|
"github.com/blevesearch/bleve/document"
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
type asynchSegmentResult struct {
|
type asynchSegmentResult struct {
|
||||||
|
@ -89,6 +90,12 @@ func (i *IndexSnapshot) Close() error {
|
||||||
return i.DecRef()
|
return i.DecRef()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *IndexSnapshot) Size() int {
|
||||||
|
// Just return the size of the pointer for estimating the overhead
|
||||||
|
// during Search, a reference of the IndexSnapshot serves as the reader.
|
||||||
|
return size.SizeOfPtr
|
||||||
|
}
|
||||||
|
|
||||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
||||||
|
|
||||||
results := make(chan *asynchSegmentResult)
|
results := make(chan *asynchSegmentResult)
|
||||||
|
|
|
@ -16,17 +16,30 @@ package scorch
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/RoaringBitmap/roaring"
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeIndexSnapshotDocIDReader int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var isdr IndexSnapshotDocIDReader
|
||||||
|
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type IndexSnapshotDocIDReader struct {
|
type IndexSnapshotDocIDReader struct {
|
||||||
snapshot *IndexSnapshot
|
snapshot *IndexSnapshot
|
||||||
iterators []roaring.IntIterable
|
iterators []roaring.IntIterable
|
||||||
segmentOffset int
|
segmentOffset int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *IndexSnapshotDocIDReader) Size() int {
|
||||||
|
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
|
||||||
|
}
|
||||||
|
|
||||||
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
||||||
for i.segmentOffset < len(i.iterators) {
|
for i.segmentOffset < len(i.iterators) {
|
||||||
if !i.iterators[i.segmentOffset].HasNext() {
|
if !i.iterators[i.segmentOffset].HasNext() {
|
||||||
|
|
|
@ -16,12 +16,21 @@ package scorch
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"reflect"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeIndexSnapshotTermFieldReader int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var istfr IndexSnapshotTermFieldReader
|
||||||
|
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type IndexSnapshotTermFieldReader struct {
|
type IndexSnapshotTermFieldReader struct {
|
||||||
term []byte
|
term []byte
|
||||||
field string
|
field string
|
||||||
|
@ -36,6 +45,27 @@ type IndexSnapshotTermFieldReader struct {
|
||||||
currID index.IndexInternalID
|
currID index.IndexInternalID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *IndexSnapshotTermFieldReader) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
|
||||||
|
len(i.term) +
|
||||||
|
len(i.field) +
|
||||||
|
len(i.currID)
|
||||||
|
|
||||||
|
for _, entry := range i.postings {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range i.iterators {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
if i.currPosting != nil {
|
||||||
|
sizeInBytes += i.currPosting.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||||
rv := preAlloced
|
rv := preAlloced
|
||||||
if rv == nil {
|
if rv == nil {
|
||||||
|
|
|
@ -213,7 +213,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cachedDocs) sizeInBytes() uint64 {
|
func (c *cachedDocs) size() int {
|
||||||
sizeInBytes := 0
|
sizeInBytes := 0
|
||||||
c.m.Lock()
|
c.m.Lock()
|
||||||
for k, v := range c.cache { // cachedFieldDocs
|
for k, v := range c.cache { // cachedFieldDocs
|
||||||
|
@ -225,5 +225,5 @@ func (c *cachedDocs) sizeInBytes() uint64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
c.m.Unlock()
|
c.m.Unlock()
|
||||||
return uint64(sizeInBytes)
|
return sizeInBytes
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,8 +89,10 @@ type Stats struct {
|
||||||
TotFileMergeSegments uint64
|
TotFileMergeSegments uint64
|
||||||
TotFileMergeWrittenBytes uint64
|
TotFileMergeWrittenBytes uint64
|
||||||
|
|
||||||
TotFileMergeZapBeg uint64
|
TotFileMergeZapBeg uint64
|
||||||
TotFileMergeZapEnd uint64
|
TotFileMergeZapEnd uint64
|
||||||
|
TotFileMergeZapTime uint64
|
||||||
|
MaxFileMergeZapTime uint64
|
||||||
|
|
||||||
TotFileMergeIntroductions uint64
|
TotFileMergeIntroductions uint64
|
||||||
TotFileMergeIntroductionsDone uint64
|
TotFileMergeIntroductionsDone uint64
|
||||||
|
@ -100,6 +102,8 @@ type Stats struct {
|
||||||
TotMemMergeDone uint64
|
TotMemMergeDone uint64
|
||||||
TotMemMergeZapBeg uint64
|
TotMemMergeZapBeg uint64
|
||||||
TotMemMergeZapEnd uint64
|
TotMemMergeZapEnd uint64
|
||||||
|
TotMemMergeZapTime uint64
|
||||||
|
MaxMemMergeZapTime uint64
|
||||||
TotMemMergeSegments uint64
|
TotMemMergeSegments uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,17 +15,31 @@
|
||||||
package upsidedown
|
package upsidedown
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/document"
|
"github.com/blevesearch/bleve/document"
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/index/store"
|
"github.com/blevesearch/bleve/index/store"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeIndexReader int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var ir IndexReader
|
||||||
|
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type IndexReader struct {
|
type IndexReader struct {
|
||||||
index *UpsideDownCouch
|
index *UpsideDownCouch
|
||||||
kvreader store.KVReader
|
kvreader store.KVReader
|
||||||
docCount uint64
|
docCount uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *IndexReader) Size() int {
|
||||||
|
return reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||||
|
}
|
||||||
|
|
||||||
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||||
if fieldExists {
|
if fieldExists {
|
||||||
|
|
|
@ -16,13 +16,27 @@ package upsidedown
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/index/store"
|
"github.com/blevesearch/bleve/index/store"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeUpsideDownCouchTermFieldReader int
|
||||||
|
var reflectStaticSizeUpsideDownCouchDocIDReader int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var tfr UpsideDownCouchTermFieldReader
|
||||||
|
reflectStaticSizeUpsideDownCouchTermFieldReader =
|
||||||
|
int(reflect.TypeOf(tfr).Size())
|
||||||
|
var cdr UpsideDownCouchDocIDReader
|
||||||
|
reflectStaticSizeUpsideDownCouchDocIDReader =
|
||||||
|
int(reflect.TypeOf(cdr).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type UpsideDownCouchTermFieldReader struct {
|
type UpsideDownCouchTermFieldReader struct {
|
||||||
count uint64
|
count uint64
|
||||||
indexReader *IndexReader
|
indexReader *IndexReader
|
||||||
|
@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
|
||||||
includeTermVectors bool
|
includeTermVectors bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *UpsideDownCouchTermFieldReader) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
|
||||||
|
len(r.term) +
|
||||||
|
r.tfrPrealloc.Size() +
|
||||||
|
len(r.keyBuf)
|
||||||
|
|
||||||
|
if r.tfrNext != nil {
|
||||||
|
sizeInBytes += r.tfrNext.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||||
bufNeeded := termFrequencyRowKeySize(term, nil)
|
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||||
if bufNeeded < dictionaryRowKeySize(term) {
|
if bufNeeded < dictionaryRowKeySize(term) {
|
||||||
|
@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
|
||||||
onlyMode bool
|
onlyMode bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
func (r *UpsideDownCouchDocIDReader) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
|
||||||
|
r.indexReader.Size()
|
||||||
|
|
||||||
|
for _, entry := range r.only {
|
||||||
|
sizeInBytes += size.SizeOfString + len(entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||||
startBytes := []byte{0x0}
|
startBytes := []byte{0x0}
|
||||||
endBytes := []byte{0xff}
|
endBytes := []byte{0xff}
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,22 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeTermFrequencyRow int
|
||||||
|
var reflectStaticSizeTermVector int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var tfr TermFrequencyRow
|
||||||
|
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
|
||||||
|
var tv TermVector
|
||||||
|
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
|
||||||
|
}
|
||||||
|
|
||||||
const ByteSeparator byte = 0xff
|
const ByteSeparator byte = 0xff
|
||||||
|
|
||||||
type UpsideDownCouchRowStream chan UpsideDownCouchRow
|
type UpsideDownCouchRowStream chan UpsideDownCouchRow
|
||||||
|
@ -358,6 +370,11 @@ type TermVector struct {
|
||||||
end uint64
|
end uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tv *TermVector) Size() int {
|
||||||
|
return reflectStaticSizeTermVector + size.SizeOfPtr +
|
||||||
|
len(tv.arrayPositions)*size.SizeOfUint64
|
||||||
|
}
|
||||||
|
|
||||||
func (tv *TermVector) String() string {
|
func (tv *TermVector) String() string {
|
||||||
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
|
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
|
||||||
}
|
}
|
||||||
|
@ -371,6 +388,18 @@ type TermFrequencyRow struct {
|
||||||
field uint16
|
field uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeTermFrequencyRow +
|
||||||
|
len(tfr.term) +
|
||||||
|
len(tfr.doc)
|
||||||
|
|
||||||
|
for _, entry := range tfr.vectors {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (tfr *TermFrequencyRow) Term() []byte {
|
func (tfr *TermFrequencyRow) Term() []byte {
|
||||||
return tfr.term
|
return tfr.term
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,6 +50,10 @@ const storePath = "store"
|
||||||
|
|
||||||
var mappingInternalKey = []byte("_mapping")
|
var mappingInternalKey = []byte("_mapping")
|
||||||
|
|
||||||
|
const SearchMemCheckCallbackKey = "_search_mem_callback_key"
|
||||||
|
|
||||||
|
type SearchMemCheckCallbackFn func(size uint64) error
|
||||||
|
|
||||||
func indexStorePath(path string) string {
|
func indexStorePath(path string) string {
|
||||||
return path + string(os.PathSeparator) + storePath
|
return path + string(os.PathSeparator) + storePath
|
||||||
}
|
}
|
||||||
|
@ -362,8 +366,59 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
|
||||||
return i.SearchInContext(context.Background(), req)
|
return i.SearchInContext(context.Background(), req)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// memNeededForSearch is a helper function that returns an estimate of RAM
|
||||||
|
// needed to execute a search request.
|
||||||
|
func memNeededForSearch(req *SearchRequest,
|
||||||
|
searcher search.Searcher,
|
||||||
|
topnCollector *collector.TopNCollector) uint64 {
|
||||||
|
|
||||||
|
backingSize := req.Size + req.From + 1
|
||||||
|
if req.Size+req.From > collector.PreAllocSizeSkipCap {
|
||||||
|
backingSize = collector.PreAllocSizeSkipCap + 1
|
||||||
|
}
|
||||||
|
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
|
||||||
|
|
||||||
|
estimate := 0
|
||||||
|
|
||||||
|
// overhead, size in bytes from collector
|
||||||
|
estimate += topnCollector.Size()
|
||||||
|
|
||||||
|
var dm search.DocumentMatch
|
||||||
|
sizeOfDocumentMatch := dm.Size()
|
||||||
|
|
||||||
|
// pre-allocing DocumentMatchPool
|
||||||
|
var sc search.SearchContext
|
||||||
|
estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch
|
||||||
|
|
||||||
|
// searcher overhead
|
||||||
|
estimate += searcher.Size()
|
||||||
|
|
||||||
|
// overhead from results, lowestMatchOutsideResults
|
||||||
|
estimate += (numDocMatches + 1) * sizeOfDocumentMatch
|
||||||
|
|
||||||
|
// additional overhead from SearchResult
|
||||||
|
var sr SearchResult
|
||||||
|
estimate += sr.Size()
|
||||||
|
|
||||||
|
// overhead from facet results
|
||||||
|
if req.Facets != nil {
|
||||||
|
var fr search.FacetResult
|
||||||
|
estimate += len(req.Facets) * fr.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
// highlighting, store
|
||||||
|
var d document.Document
|
||||||
|
if len(req.Fields) > 0 || req.Highlight != nil {
|
||||||
|
for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits
|
||||||
|
estimate += (req.Size + req.From) * d.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return uint64(estimate)
|
||||||
|
}
|
||||||
|
|
||||||
// SearchInContext executes a search request operation within the provided
|
// SearchInContext executes a search request operation within the provided
|
||||||
// Context. Returns a SearchResult object or an error.
|
// Context. Returns a SearchResult object or an error.
|
||||||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
|
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
|
||||||
i.mutex.RLock()
|
i.mutex.RLock()
|
||||||
defer i.mutex.RUnlock()
|
defer i.mutex.RUnlock()
|
||||||
|
@ -428,6 +483,15 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||||
collector.SetFacetsBuilder(facetsBuilder)
|
collector.SetFacetsBuilder(facetsBuilder)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if memCb := ctx.Value(SearchMemCheckCallbackKey); memCb != nil {
|
||||||
|
if memCbFn, ok := memCb.(SearchMemCheckCallbackFn); ok {
|
||||||
|
err = memCbFn(memNeededForSearch(req, searcher, collector))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
err = collector.Collect(ctx, searcher, indexReader)
|
err = collector.Collect(ctx, searcher, indexReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
102
index_test.go
102
index_test.go
|
@ -36,6 +36,9 @@ import (
|
||||||
"github.com/blevesearch/bleve/mapping"
|
"github.com/blevesearch/bleve/mapping"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/query"
|
"github.com/blevesearch/bleve/search/query"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/scorch"
|
||||||
|
"github.com/blevesearch/bleve/index/upsidedown"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestCrud(t *testing.T) {
|
func TestCrud(t *testing.T) {
|
||||||
|
@ -1815,3 +1818,102 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func benchmarkSearchOverhead(indexType string, b *testing.B) {
|
||||||
|
defer func() {
|
||||||
|
err := os.RemoveAll("testidx")
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
index, err := NewUsing("testidx", NewIndexMapping(),
|
||||||
|
indexType, Config.DefaultKVStore, nil)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
err := index.Close()
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
elements := []string{"air", "water", "fire", "earth"}
|
||||||
|
for j := 0; j < 10000; j++ {
|
||||||
|
err = index.Index(fmt.Sprintf("%d", j),
|
||||||
|
map[string]interface{}{"name": elements[j%len(elements)]})
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
query1 := NewTermQuery("water")
|
||||||
|
query2 := NewTermQuery("fire")
|
||||||
|
query := NewDisjunctionQuery(query1, query2)
|
||||||
|
req := NewSearchRequest(query)
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
_, err = index.Search(req)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
|
||||||
|
benchmarkSearchOverhead(upsidedown.Name, b)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkScorchSearchOverhead(b *testing.B) {
|
||||||
|
benchmarkSearchOverhead(scorch.Name, b)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchMemCheckCallback(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
err := os.RemoveAll("testidx")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
index, err := New("testidx", NewIndexMapping())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
err := index.Close()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
elements := []string{"air", "water", "fire", "earth"}
|
||||||
|
for j := 0; j < 10000; j++ {
|
||||||
|
err = index.Index(fmt.Sprintf("%d", j),
|
||||||
|
map[string]interface{}{"name": elements[j%len(elements)]})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
query := NewTermQuery("water")
|
||||||
|
req := NewSearchRequest(query)
|
||||||
|
|
||||||
|
expErr := fmt.Errorf("MEM_LIMIT_EXCEEDED")
|
||||||
|
f := func(size uint64) error {
|
||||||
|
if size > 1000 {
|
||||||
|
return expErr
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.WithValue(context.Background(), SearchMemCheckCallbackKey,
|
||||||
|
SearchMemCheckCallbackFn(f))
|
||||||
|
_, err = index.SearchInContext(ctx, req)
|
||||||
|
if err != expErr {
|
||||||
|
t.Fatalf("Expected: %v, Got: %v", expErr, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
30
search.go
30
search.go
|
@ -17,6 +17,7 @@ package bleve
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
@ -24,8 +25,19 @@ import (
|
||||||
"github.com/blevesearch/bleve/registry"
|
"github.com/blevesearch/bleve/registry"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/query"
|
"github.com/blevesearch/bleve/search/query"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeSearchResult int
|
||||||
|
var reflectStaticSizeSearchStatus int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var sr SearchResult
|
||||||
|
reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
|
||||||
|
var ss SearchStatus
|
||||||
|
reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
|
||||||
|
}
|
||||||
|
|
||||||
var cache = registry.NewCache()
|
var cache = registry.NewCache()
|
||||||
|
|
||||||
const defaultDateTimeParser = optional.Name
|
const defaultDateTimeParser = optional.Name
|
||||||
|
@ -432,6 +444,24 @@ type SearchResult struct {
|
||||||
Facets search.FacetResults `json:"facets"`
|
Facets search.FacetResults `json:"facets"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sr *SearchResult) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
|
||||||
|
reflectStaticSizeSearchStatus
|
||||||
|
|
||||||
|
for _, entry := range sr.Hits {
|
||||||
|
if entry != nil {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range sr.Facets {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
v.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (sr *SearchResult) String() string {
|
func (sr *SearchResult) String() string {
|
||||||
rv := ""
|
rv := ""
|
||||||
if sr.Total > 0 {
|
if sr.Total > 0 {
|
||||||
|
|
|
@ -15,6 +15,8 @@
|
||||||
package collector
|
package collector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/document"
|
"github.com/blevesearch/bleve/document"
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
@ -25,6 +27,18 @@ type stubSearcher struct {
|
||||||
matches []*search.DocumentMatch
|
matches []*search.DocumentMatch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ss *stubSearcher) Size() int {
|
||||||
|
sizeInBytes := int(reflect.TypeOf(*ss).Size())
|
||||||
|
|
||||||
|
for _, entry := range ss.matches {
|
||||||
|
if entry != nil {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||||
if ss.index < len(ss.matches) {
|
if ss.index < len(ss.matches) {
|
||||||
rv := ctx.DocumentMatchPool.Get()
|
rv := ctx.DocumentMatchPool.Get()
|
||||||
|
@ -76,6 +90,10 @@ func (ss *stubSearcher) DocumentMatchPoolSize() int {
|
||||||
|
|
||||||
type stubReader struct{}
|
type stubReader struct{}
|
||||||
|
|
||||||
|
func (sr *stubReader) Size() int {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,12 +16,21 @@ package collector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"reflect"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeTopNCollector int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var coll TopNCollector
|
||||||
|
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type collectorStore interface {
|
type collectorStore interface {
|
||||||
// Add the document, and if the new store size exceeds the provided size
|
// Add the document, and if the new store size exceeds the provided size
|
||||||
// the last element is removed and returned. If the size has not been
|
// the last element is removed and returned. If the size has not been
|
||||||
|
@ -98,6 +107,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
|
||||||
return hc
|
return hc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (hc *TopNCollector) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
|
||||||
|
|
||||||
|
if hc.facetsBuilder != nil {
|
||||||
|
sizeInBytes += hc.facetsBuilder.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range hc.neededFields {
|
||||||
|
sizeInBytes += len(entry) + size.SizeOfString
|
||||||
|
}
|
||||||
|
|
||||||
|
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
// Collect goes to the index to find the matching documents
|
// Collect goes to the index to find the matching documents
|
||||||
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
|
|
|
@ -17,8 +17,18 @@ package search
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeExplanation int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var e Explanation
|
||||||
|
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type Explanation struct {
|
type Explanation struct {
|
||||||
Value float64 `json:"value"`
|
Value float64 `json:"value"`
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
|
@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
|
||||||
}
|
}
|
||||||
return string(js)
|
return string(js)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (expl *Explanation) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
|
||||||
|
len(expl.Message)
|
||||||
|
|
||||||
|
for _, entry := range expl.Children {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
|
@ -15,13 +15,25 @@
|
||||||
package facet
|
package facet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/numeric"
|
"github.com/blevesearch/bleve/numeric"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDateTimeFacetBuilder int
|
||||||
|
var reflectStaticSizedateTimeRange int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var dtfb DateTimeFacetBuilder
|
||||||
|
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
|
||||||
|
var dtr dateTimeRange
|
||||||
|
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type dateTimeRange struct {
|
type dateTimeRange struct {
|
||||||
start time.Time
|
start time.Time
|
||||||
end time.Time
|
end time.Time
|
||||||
|
@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (fb *DateTimeFacetBuilder) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
|
||||||
|
len(fb.field)
|
||||||
|
|
||||||
|
for k, _ := range fb.termsCount {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
size.SizeOfInt
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, _ := range fb.ranges {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
size.SizeOfPtr + reflectStaticSizedateTimeRange
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
|
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
|
||||||
r := dateTimeRange{
|
r := dateTimeRange{
|
||||||
start: start,
|
start: start,
|
||||||
|
|
|
@ -15,12 +15,24 @@
|
||||||
package facet
|
package facet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/numeric"
|
"github.com/blevesearch/bleve/numeric"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeNumericFacetBuilder int
|
||||||
|
var reflectStaticSizenumericRange int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var nfb NumericFacetBuilder
|
||||||
|
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
|
||||||
|
var nr numericRange
|
||||||
|
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type numericRange struct {
|
type numericRange struct {
|
||||||
min *float64
|
min *float64
|
||||||
max *float64
|
max *float64
|
||||||
|
@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (fb *NumericFacetBuilder) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
|
||||||
|
len(fb.field)
|
||||||
|
|
||||||
|
for k, _ := range fb.termsCount {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
size.SizeOfInt
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, _ := range fb.ranges {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
size.SizeOfPtr + reflectStaticSizenumericRange
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
|
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
|
||||||
r := numericRange{
|
r := numericRange{
|
||||||
min: min,
|
min: min,
|
||||||
|
|
|
@ -15,11 +15,20 @@
|
||||||
package facet
|
package facet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeTermsFacetBuilder int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var tfb TermsFacetBuilder
|
||||||
|
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type TermsFacetBuilder struct {
|
type TermsFacetBuilder struct {
|
||||||
size int
|
size int
|
||||||
field string
|
field string
|
||||||
|
@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (fb *TermsFacetBuilder) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
|
||||||
|
len(fb.field)
|
||||||
|
|
||||||
|
for k, _ := range fb.termsCount {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
size.SizeOfInt
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (fb *TermsFacetBuilder) Field() string {
|
func (fb *TermsFacetBuilder) Field() string {
|
||||||
return fb.field
|
return fb.field
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,11 +15,32 @@
|
||||||
package search
|
package search
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeFacetsBuilder int
|
||||||
|
var reflectStaticSizeFacetResult int
|
||||||
|
var reflectStaticSizeTermFacet int
|
||||||
|
var reflectStaticSizeNumericRangeFacet int
|
||||||
|
var reflectStaticSizeDateRangeFacet int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var fb FacetsBuilder
|
||||||
|
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
|
||||||
|
var fr FacetResult
|
||||||
|
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
|
||||||
|
var tf TermFacet
|
||||||
|
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
|
||||||
|
var nrf NumericRangeFacet
|
||||||
|
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
|
||||||
|
var drf DateRangeFacet
|
||||||
|
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type FacetBuilder interface {
|
type FacetBuilder interface {
|
||||||
StartDoc()
|
StartDoc()
|
||||||
UpdateVisitor(field string, term []byte)
|
UpdateVisitor(field string, term []byte)
|
||||||
|
@ -27,6 +48,8 @@ type FacetBuilder interface {
|
||||||
|
|
||||||
Result() *FacetResult
|
Result() *FacetResult
|
||||||
Field() string
|
Field() string
|
||||||
|
|
||||||
|
Size() int
|
||||||
}
|
}
|
||||||
|
|
||||||
type FacetsBuilder struct {
|
type FacetsBuilder struct {
|
||||||
|
@ -42,6 +65,22 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (fb *FacetsBuilder) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
|
||||||
|
fb.indexReader.Size()
|
||||||
|
|
||||||
|
for k, v := range fb.facets {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
v.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range fb.fields {
|
||||||
|
sizeInBytes += size.SizeOfString + len(entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
|
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
|
||||||
fb.facets[name] = facetBuilder
|
fb.facets[name] = facetBuilder
|
||||||
fb.fields = append(fb.fields, facetBuilder.Field())
|
fb.fields = append(fb.fields, facetBuilder.Field())
|
||||||
|
@ -213,6 +252,14 @@ type FacetResult struct {
|
||||||
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
|
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (fr *FacetResult) Size() int {
|
||||||
|
return reflectStaticSizeFacetResult + size.SizeOfPtr +
|
||||||
|
len(fr.Field) +
|
||||||
|
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
|
||||||
|
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
|
||||||
|
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
|
||||||
|
}
|
||||||
|
|
||||||
func (fr *FacetResult) Merge(other *FacetResult) {
|
func (fr *FacetResult) Merge(other *FacetResult) {
|
||||||
fr.Total += other.Total
|
fr.Total += other.Total
|
||||||
fr.Missing += other.Missing
|
fr.Missing += other.Missing
|
||||||
|
|
|
@ -14,6 +14,17 @@
|
||||||
|
|
||||||
package search
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDocumentMatchPool int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var dmp DocumentMatchPool
|
||||||
|
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// DocumentMatchPoolTooSmall is a callback function that can be executed
|
// DocumentMatchPoolTooSmall is a callback function that can be executed
|
||||||
// when the DocumentMatchPool does not have sufficient capacity
|
// when the DocumentMatchPool does not have sufficient capacity
|
||||||
// By default we just perform just-in-time allocation, but you could log
|
// By default we just perform just-in-time allocation, but you could log
|
||||||
|
|
|
@ -15,13 +15,27 @@
|
||||||
package scorer
|
package scorer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeConjunctionQueryScorer int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var cqs ConjunctionQueryScorer
|
||||||
|
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type ConjunctionQueryScorer struct {
|
type ConjunctionQueryScorer struct {
|
||||||
options search.SearcherOptions
|
options search.SearcherOptions
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *ConjunctionQueryScorer) Size() int {
|
||||||
|
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
|
||||||
|
}
|
||||||
|
|
||||||
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
|
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
|
||||||
return &ConjunctionQueryScorer{
|
return &ConjunctionQueryScorer{
|
||||||
options: options,
|
options: options,
|
||||||
|
|
|
@ -16,11 +16,20 @@ package scorer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeConstantScorer int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var cs ConstantScorer
|
||||||
|
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type ConstantScorer struct {
|
type ConstantScorer struct {
|
||||||
constant float64
|
constant float64
|
||||||
boost float64
|
boost float64
|
||||||
|
@ -30,6 +39,16 @@ type ConstantScorer struct {
|
||||||
queryWeightExplanation *search.Explanation
|
queryWeightExplanation *search.Explanation
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *ConstantScorer) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
|
||||||
|
|
||||||
|
if s.queryWeightExplanation != nil {
|
||||||
|
sizeInBytes += s.queryWeightExplanation.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
|
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
|
||||||
rv := ConstantScorer{
|
rv := ConstantScorer{
|
||||||
options: options,
|
options: options,
|
||||||
|
|
|
@ -16,14 +16,27 @@ package scorer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDisjunctionQueryScorer int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var dqs DisjunctionQueryScorer
|
||||||
|
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type DisjunctionQueryScorer struct {
|
type DisjunctionQueryScorer struct {
|
||||||
options search.SearcherOptions
|
options search.SearcherOptions
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *DisjunctionQueryScorer) Size() int {
|
||||||
|
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
|
||||||
|
}
|
||||||
|
|
||||||
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
|
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
|
||||||
return &DisjunctionQueryScorer{
|
return &DisjunctionQueryScorer{
|
||||||
options: options,
|
options: options,
|
||||||
|
|
|
@ -17,11 +17,20 @@ package scorer
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeTermQueryScorer int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var tqs TermQueryScorer
|
||||||
|
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type TermQueryScorer struct {
|
type TermQueryScorer struct {
|
||||||
queryTerm []byte
|
queryTerm []byte
|
||||||
queryField string
|
queryField string
|
||||||
|
@ -36,6 +45,21 @@ type TermQueryScorer struct {
|
||||||
queryWeightExplanation *search.Explanation
|
queryWeightExplanation *search.Explanation
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *TermQueryScorer) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
|
||||||
|
len(s.queryTerm) + len(s.queryField)
|
||||||
|
|
||||||
|
if s.idfExplanation != nil {
|
||||||
|
sizeInBytes += s.idfExplanation.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.queryWeightExplanation != nil {
|
||||||
|
sizeInBytes += s.queryWeightExplanation.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
|
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
|
||||||
rv := TermQueryScorer{
|
rv := TermQueryScorer{
|
||||||
queryTerm: queryTerm,
|
queryTerm: queryTerm,
|
||||||
|
|
|
@ -16,11 +16,26 @@ package search
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/document"
|
"github.com/blevesearch/bleve/document"
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDocumentMatch int
|
||||||
|
var reflectStaticSizeSearchContext int
|
||||||
|
var reflectStaticSizeLocation int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var dm DocumentMatch
|
||||||
|
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
|
||||||
|
var sc SearchContext
|
||||||
|
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
|
||||||
|
var l Location
|
||||||
|
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type ArrayPositions []uint64
|
type ArrayPositions []uint64
|
||||||
|
|
||||||
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
|
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
|
||||||
|
@ -47,6 +62,11 @@ type Location struct {
|
||||||
ArrayPositions ArrayPositions `json:"array_positions"`
|
ArrayPositions ArrayPositions `json:"array_positions"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *Location) Size() int {
|
||||||
|
return reflectStaticSizeLocation + size.SizeOfPtr +
|
||||||
|
len(l.ArrayPositions)*size.SizeOfUint64
|
||||||
|
}
|
||||||
|
|
||||||
type Locations []*Location
|
type Locations []*Location
|
||||||
|
|
||||||
type TermLocationMap map[string]Locations
|
type TermLocationMap map[string]Locations
|
||||||
|
@ -117,6 +137,52 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
|
||||||
return dm
|
return dm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (dm *DocumentMatch) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
|
||||||
|
len(dm.Index) +
|
||||||
|
len(dm.ID) +
|
||||||
|
len(dm.IndexInternalID)
|
||||||
|
|
||||||
|
if dm.Expl != nil {
|
||||||
|
sizeInBytes += dm.Expl.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range dm.Locations {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k)
|
||||||
|
for k1, v1 := range v {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k1) +
|
||||||
|
size.SizeOfSlice
|
||||||
|
for _, entry := range v1 {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range dm.Fragments {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
size.SizeOfSlice
|
||||||
|
|
||||||
|
for _, entry := range v {
|
||||||
|
sizeInBytes += size.SizeOfString + len(entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range dm.Sort {
|
||||||
|
sizeInBytes += size.SizeOfString + len(entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, _ := range dm.Fields {
|
||||||
|
sizeInBytes += size.SizeOfString + len(k) +
|
||||||
|
size.SizeOfPtr
|
||||||
|
}
|
||||||
|
|
||||||
|
if dm.Document != nil {
|
||||||
|
sizeInBytes += dm.Document.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (dm *DocumentMatch) String() string {
|
func (dm *DocumentMatch) String() string {
|
||||||
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
|
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
|
||||||
}
|
}
|
||||||
|
@ -135,6 +201,7 @@ type Searcher interface {
|
||||||
SetQueryNorm(float64)
|
SetQueryNorm(float64)
|
||||||
Count() uint64
|
Count() uint64
|
||||||
Min() int
|
Min() int
|
||||||
|
Size() int
|
||||||
|
|
||||||
DocumentMatchPoolSize() int
|
DocumentMatchPoolSize() int
|
||||||
}
|
}
|
||||||
|
@ -148,3 +215,18 @@ type SearcherOptions struct {
|
||||||
type SearchContext struct {
|
type SearchContext struct {
|
||||||
DocumentMatchPool *DocumentMatchPool
|
DocumentMatchPool *DocumentMatchPool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sc *SearchContext) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
|
||||||
|
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
|
||||||
|
|
||||||
|
if sc.DocumentMatchPool != nil {
|
||||||
|
for _, entry := range sc.DocumentMatchPool.avail {
|
||||||
|
if entry != nil {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
|
@ -16,12 +16,21 @@ package searcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/scorer"
|
"github.com/blevesearch/bleve/search/scorer"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeBooleanSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var bs BooleanSearcher
|
||||||
|
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type BooleanSearcher struct {
|
type BooleanSearcher struct {
|
||||||
indexReader index.IndexReader
|
indexReader index.IndexReader
|
||||||
mustSearcher search.Searcher
|
mustSearcher search.Searcher
|
||||||
|
@ -52,6 +61,33 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
|
||||||
return &rv, nil
|
return &rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BooleanSearcher) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
|
||||||
|
s.indexReader.Size()
|
||||||
|
|
||||||
|
if s.mustSearcher != nil {
|
||||||
|
sizeInBytes += s.mustSearcher.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.shouldSearcher != nil {
|
||||||
|
sizeInBytes += s.shouldSearcher.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.mustNotSearcher != nil {
|
||||||
|
sizeInBytes += s.mustNotSearcher.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
sizeInBytes += s.scorer.Size()
|
||||||
|
|
||||||
|
for _, entry := range s.matches {
|
||||||
|
if entry != nil {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (s *BooleanSearcher) computeQueryNorm() {
|
func (s *BooleanSearcher) computeQueryNorm() {
|
||||||
// first calculate sum of squared weights
|
// first calculate sum of squared weights
|
||||||
sumOfSquaredWeights := 0.0
|
sumOfSquaredWeights := 0.0
|
||||||
|
|
|
@ -16,13 +16,22 @@ package searcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/scorer"
|
"github.com/blevesearch/bleve/search/scorer"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeConjunctionSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var cs ConjunctionSearcher
|
||||||
|
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type ConjunctionSearcher struct {
|
type ConjunctionSearcher struct {
|
||||||
indexReader index.IndexReader
|
indexReader index.IndexReader
|
||||||
searchers OrderedSearcherList
|
searchers OrderedSearcherList
|
||||||
|
@ -54,6 +63,23 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
|
||||||
return &rv, nil
|
return &rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *ConjunctionSearcher) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
|
||||||
|
s.scorer.Size()
|
||||||
|
|
||||||
|
for _, entry := range s.searchers {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range s.currs {
|
||||||
|
if entry != nil {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (s *ConjunctionSearcher) computeQueryNorm() {
|
func (s *ConjunctionSearcher) computeQueryNorm() {
|
||||||
// first calculate sum of squared weights
|
// first calculate sum of squared weights
|
||||||
sumOfSquaredWeights := 0.0
|
sumOfSquaredWeights := 0.0
|
||||||
|
|
|
@ -17,13 +17,22 @@ package searcher
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/scorer"
|
"github.com/blevesearch/bleve/search/scorer"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDisjunctionSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var ds DisjunctionSearcher
|
||||||
|
reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// DisjunctionMaxClauseCount is a compile time setting that applications can
|
// DisjunctionMaxClauseCount is a compile time setting that applications can
|
||||||
// adjust to non-zero value to cause the DisjunctionSearcher to return an
|
// adjust to non-zero value to cause the DisjunctionSearcher to return an
|
||||||
// error instead of exeucting searches when the size exceeds this value.
|
// error instead of exeucting searches when the size exceeds this value.
|
||||||
|
@ -90,6 +99,32 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
|
||||||
return &rv, nil
|
return &rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *DisjunctionSearcher) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
|
||||||
|
s.indexReader.Size() +
|
||||||
|
s.scorer.Size()
|
||||||
|
|
||||||
|
for _, entry := range s.searchers {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range s.currs {
|
||||||
|
if entry != nil {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range s.matching {
|
||||||
|
if entry != nil {
|
||||||
|
sizeInBytes += entry.Size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func (s *DisjunctionSearcher) computeQueryNorm() {
|
func (s *DisjunctionSearcher) computeQueryNorm() {
|
||||||
// first calculate sum of squared weights
|
// first calculate sum of squared weights
|
||||||
sumOfSquaredWeights := 0.0
|
sumOfSquaredWeights := 0.0
|
||||||
|
|
|
@ -15,11 +15,21 @@
|
||||||
package searcher
|
package searcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/scorer"
|
"github.com/blevesearch/bleve/search/scorer"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeDocIDSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var ds DocIDSearcher
|
||||||
|
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// DocIDSearcher returns documents matching a predefined set of identifiers.
|
// DocIDSearcher returns documents matching a predefined set of identifiers.
|
||||||
type DocIDSearcher struct {
|
type DocIDSearcher struct {
|
||||||
reader index.DocIDReader
|
reader index.DocIDReader
|
||||||
|
@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *DocIDSearcher) Size() int {
|
||||||
|
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
|
||||||
|
s.reader.Size() +
|
||||||
|
s.scorer.Size()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *DocIDSearcher) Count() uint64 {
|
func (s *DocIDSearcher) Count() uint64 {
|
||||||
return uint64(s.count)
|
return uint64(s.count)
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,10 +15,20 @@
|
||||||
package searcher
|
package searcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeFilteringSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var fs FilteringSearcher
|
||||||
|
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
|
||||||
|
}
|
||||||
|
|
||||||
// FilterFunc defines a function which can filter documents
|
// FilterFunc defines a function which can filter documents
|
||||||
// returning true means keep the document
|
// returning true means keep the document
|
||||||
// returning false means do not keep the document
|
// returning false means do not keep the document
|
||||||
|
@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (f *FilteringSearcher) Size() int {
|
||||||
|
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
|
||||||
|
f.child.Size()
|
||||||
|
}
|
||||||
|
|
||||||
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||||
next, err := f.child.Next(ctx)
|
next, err := f.child.Next(ctx)
|
||||||
for next != nil && err == nil {
|
for next != nil && err == nil {
|
||||||
|
|
|
@ -15,11 +15,21 @@
|
||||||
package searcher
|
package searcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/scorer"
|
"github.com/blevesearch/bleve/search/scorer"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeMatchAllSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var mas MatchAllSearcher
|
||||||
|
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type MatchAllSearcher struct {
|
type MatchAllSearcher struct {
|
||||||
indexReader index.IndexReader
|
indexReader index.IndexReader
|
||||||
reader index.DocIDReader
|
reader index.DocIDReader
|
||||||
|
@ -46,6 +56,13 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *MatchAllSearcher) Size() int {
|
||||||
|
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
|
||||||
|
s.indexReader.Size() +
|
||||||
|
s.reader.Size() +
|
||||||
|
s.scorer.Size()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *MatchAllSearcher) Count() uint64 {
|
func (s *MatchAllSearcher) Count() uint64 {
|
||||||
return s.count
|
return s.count
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,10 +15,20 @@
|
||||||
package searcher
|
package searcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeMatchNoneSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var mns MatchNoneSearcher
|
||||||
|
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type MatchNoneSearcher struct {
|
type MatchNoneSearcher struct {
|
||||||
indexReader index.IndexReader
|
indexReader index.IndexReader
|
||||||
}
|
}
|
||||||
|
@ -29,6 +39,11 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *MatchNoneSearcher) Size() int {
|
||||||
|
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
|
||||||
|
s.indexReader.Size()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *MatchNoneSearcher) Count() uint64 {
|
func (s *MatchNoneSearcher) Count() uint64 {
|
||||||
return uint64(0)
|
return uint64(0)
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,11 +17,20 @@ package searcher
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizePhraseSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var ps PhraseSearcher
|
||||||
|
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type PhraseSearcher struct {
|
type PhraseSearcher struct {
|
||||||
indexReader index.IndexReader
|
indexReader index.IndexReader
|
||||||
mustSearcher *ConjunctionSearcher
|
mustSearcher *ConjunctionSearcher
|
||||||
|
@ -32,6 +41,28 @@ type PhraseSearcher struct {
|
||||||
initialized bool
|
initialized bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *PhraseSearcher) Size() int {
|
||||||
|
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
|
||||||
|
s.indexReader.Size()
|
||||||
|
|
||||||
|
if s.mustSearcher != nil {
|
||||||
|
sizeInBytes += s.mustSearcher.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.currMust != nil {
|
||||||
|
sizeInBytes += s.currMust.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range s.terms {
|
||||||
|
sizeInBytes += size.SizeOfSlice
|
||||||
|
for _, entry1 := range entry {
|
||||||
|
sizeInBytes += size.SizeOfString + len(entry1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeInBytes
|
||||||
|
}
|
||||||
|
|
||||||
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
|
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
|
||||||
// turn flat terms []string into [][]string
|
// turn flat terms []string into [][]string
|
||||||
mterms := make([][]string, len(terms))
|
mterms := make([][]string, len(terms))
|
||||||
|
|
|
@ -15,11 +15,21 @@
|
||||||
package searcher
|
package searcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/scorer"
|
"github.com/blevesearch/bleve/search/scorer"
|
||||||
|
"github.com/blevesearch/bleve/size"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var reflectStaticSizeTermSearcher int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var ts TermSearcher
|
||||||
|
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
|
||||||
|
}
|
||||||
|
|
||||||
type TermSearcher struct {
|
type TermSearcher struct {
|
||||||
indexReader index.IndexReader
|
indexReader index.IndexReader
|
||||||
reader index.TermFieldReader
|
reader index.TermFieldReader
|
||||||
|
@ -63,6 +73,14 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *TermSearcher) Size() int {
|
||||||
|
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
|
||||||
|
s.indexReader.Size() +
|
||||||
|
s.reader.Size() +
|
||||||
|
s.tfd.Size() +
|
||||||
|
s.scorer.Size()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *TermSearcher) Count() uint64 {
|
func (s *TermSearcher) Count() uint64 {
|
||||||
return s.reader.Count()
|
return s.reader.Count()
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
// Copyright (c) 2018 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package size
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
var a bool
|
||||||
|
SizeOfBool = int(reflect.TypeOf(a).Size())
|
||||||
|
var b float32
|
||||||
|
SizeOfFloat32 = int(reflect.TypeOf(b).Size())
|
||||||
|
var c float64
|
||||||
|
SizeOfFloat64 = int(reflect.TypeOf(c).Size())
|
||||||
|
var d map[int]int
|
||||||
|
SizeOfMap = int(reflect.TypeOf(d).Size())
|
||||||
|
var e *int
|
||||||
|
SizeOfPtr = int(reflect.TypeOf(e).Size())
|
||||||
|
var f []int
|
||||||
|
SizeOfSlice = int(reflect.TypeOf(f).Size())
|
||||||
|
var g string
|
||||||
|
SizeOfString = int(reflect.TypeOf(g).Size())
|
||||||
|
var h uint8
|
||||||
|
SizeOfUint8 = int(reflect.TypeOf(h).Size())
|
||||||
|
var i uint16
|
||||||
|
SizeOfUint16 = int(reflect.TypeOf(i).Size())
|
||||||
|
var j uint32
|
||||||
|
SizeOfUint32 = int(reflect.TypeOf(j).Size())
|
||||||
|
var k uint64
|
||||||
|
SizeOfUint64 = int(reflect.TypeOf(k).Size())
|
||||||
|
}
|
||||||
|
|
||||||
|
var SizeOfBool int
|
||||||
|
var SizeOfFloat32 int
|
||||||
|
var SizeOfFloat64 int
|
||||||
|
var SizeOfInt int
|
||||||
|
var SizeOfMap int
|
||||||
|
var SizeOfPtr int
|
||||||
|
var SizeOfSlice int
|
||||||
|
var SizeOfString int
|
||||||
|
var SizeOfUint8 int
|
||||||
|
var SizeOfUint16 int
|
||||||
|
var SizeOfUint32 int
|
||||||
|
var SizeOfUint64 int
|
Loading…
Reference in New Issue