Merge branch 'master' into compaction_bytes_stats
This commit is contained in:
commit
e0369a3553
|
@ -14,7 +14,19 @@
|
|||
|
||||
package document
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocument int
|
||||
|
||||
func init() {
|
||||
var d Document
|
||||
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
|
||||
}
|
||||
|
||||
type Document struct {
|
||||
ID string `json:"id"`
|
||||
|
@ -30,6 +42,13 @@ func NewDocument(id string) *Document {
|
|||
}
|
||||
}
|
||||
|
||||
func (d *Document) Size() int {
|
||||
return reflectStaticSizeDocument + size.SizeOfPtr +
|
||||
len(d.ID) +
|
||||
len(d.Fields)*size.SizeOfPtr +
|
||||
len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField)
|
||||
}
|
||||
|
||||
func (d *Document) AddField(f Field) *Document {
|
||||
switch f := f.(type) {
|
||||
case *CompositeField:
|
||||
|
|
|
@ -15,9 +15,18 @@
|
|||
package document
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
var reflectStaticSizeCompositeField int
|
||||
|
||||
func init() {
|
||||
var cf CompositeField
|
||||
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
|
||||
}
|
||||
|
||||
const DefaultCompositeIndexingOptions = IndexField
|
||||
|
||||
type CompositeField struct {
|
||||
|
|
|
@ -18,11 +18,23 @@ import (
|
|||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermFieldDoc int
|
||||
var reflectStaticSizeTermFieldVector int
|
||||
|
||||
func init() {
|
||||
var tfd TermFieldDoc
|
||||
reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
|
||||
var tfv TermFieldVector
|
||||
reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
|
||||
}
|
||||
|
||||
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
|
||||
|
||||
type Index interface {
|
||||
|
@ -82,6 +94,8 @@ type IndexReader interface {
|
|||
DumpFields() chan interface{}
|
||||
|
||||
Close() error
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
|
@ -115,6 +129,11 @@ type TermFieldVector struct {
|
|||
End uint64
|
||||
}
|
||||
|
||||
func (tfv *TermFieldVector) Size() int {
|
||||
return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
|
||||
len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
// IndexInternalID is an opaque document identifier interal to the index impl
|
||||
type IndexInternalID []byte
|
||||
|
||||
|
@ -134,6 +153,17 @@ type TermFieldDoc struct {
|
|||
Vectors []*TermFieldVector
|
||||
}
|
||||
|
||||
func (tfd *TermFieldDoc) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
|
||||
len(tfd.Term) + len(tfd.ID)
|
||||
|
||||
for _, entry := range tfd.Vectors {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Reset allows an already allocated TermFieldDoc to be reused
|
||||
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
|
||||
// remember the []byte used for the ID
|
||||
|
@ -161,6 +191,8 @@ type TermFieldReader interface {
|
|||
// Count returns the number of documents contains the term in this field.
|
||||
Count() uint64
|
||||
Close() error
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type DictEntry struct {
|
||||
|
@ -185,6 +217,9 @@ type DocIDReader interface {
|
|||
// will start there instead. If ID is greater than or equal to the end of
|
||||
// the range, Next() call will return io.EOF.
|
||||
Advance(ID IndexInternalID) (IndexInternalID, error)
|
||||
|
||||
Size() int
|
||||
|
||||
Close() error
|
||||
}
|
||||
|
||||
|
|
|
@ -179,11 +179,21 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
filename := zapFileName(newSegmentID)
|
||||
s.markIneligibleForRemoval(filename)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
fileMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
||||
newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
|
||||
|
||||
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
return fmt.Errorf("merging failed: %v", err)
|
||||
|
@ -259,11 +269,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
|||
|
||||
cr := zap.NewCountHashWriter(&br)
|
||||
|
||||
memMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
|
||||
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
|
||||
docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
|
||||
zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
|
||||
|
||||
memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return 0, nil, 0, err
|
||||
|
|
|
@ -365,7 +365,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
|||
introTime := uint64(time.Since(introStartTime))
|
||||
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
|
||||
atomic.AddUint64(&s.stats.MaxBatchIntroTime, introTime)
|
||||
atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
|
||||
}
|
||||
|
||||
return err
|
||||
|
@ -473,20 +473,20 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
|
|||
}
|
||||
|
||||
func (s *Scorch) MemoryUsed() uint64 {
|
||||
var memUsed uint64
|
||||
var memUsed int
|
||||
s.rootLock.RLock()
|
||||
if s.root != nil {
|
||||
for _, segmentSnapshot := range s.root.segment {
|
||||
memUsed += 8 /* size of id -> uint64 */ +
|
||||
segmentSnapshot.segment.SizeInBytes()
|
||||
segmentSnapshot.segment.Size()
|
||||
if segmentSnapshot.deleted != nil {
|
||||
memUsed += segmentSnapshot.deleted.GetSizeInBytes()
|
||||
memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
|
||||
}
|
||||
memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
|
||||
memUsed += segmentSnapshot.cachedDocs.size()
|
||||
}
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
return memUsed
|
||||
return uint64(memUsed)
|
||||
}
|
||||
|
||||
func (s *Scorch) markIneligibleForRemoval(filename string) {
|
||||
|
|
|
@ -46,6 +46,10 @@ func (e *EmptySegment) Close() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (e *EmptySegment) Size() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *EmptySegment) AddRef() {
|
||||
}
|
||||
|
||||
|
@ -84,6 +88,10 @@ func (e *EmptyPostingsList) Iterator() PostingsIterator {
|
|||
return &EmptyPostingsIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsList) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsList) Count() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
@ -93,3 +101,7 @@ type EmptyPostingsIterator struct{}
|
|||
func (e *EmptyPostingsIterator) Next() (Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
|
|||
}
|
||||
|
||||
// compute memory usage of segment
|
||||
s.updateSizeInBytes()
|
||||
s.updateSize()
|
||||
|
||||
// professional debugging
|
||||
//
|
||||
|
@ -222,12 +222,6 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
}
|
||||
}
|
||||
|
||||
storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
|
||||
s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
|
||||
s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
|
||||
s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
|
||||
}
|
||||
|
||||
// walk each composite field
|
||||
for _, field := range result.Document.CompositeFields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
|
@ -235,6 +229,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
processField(fieldID, field.Name(), l, tf)
|
||||
}
|
||||
|
||||
docStored := s.Stored[docNum]
|
||||
docStoredTypes := s.StoredTypes[docNum]
|
||||
docStoredPos := s.StoredPos[docNum]
|
||||
|
||||
// walk each field
|
||||
for i, field := range result.Document.Fields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
|
@ -242,7 +240,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
tf := result.Analyzed[i]
|
||||
processField(fieldID, field.Name(), l, tf)
|
||||
if field.Options().IsStored() {
|
||||
storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
|
||||
docStored[fieldID] = append(docStored[fieldID], field.Value())
|
||||
docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field))
|
||||
docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions())
|
||||
}
|
||||
|
||||
if field.Options().IncludeDocValues() {
|
||||
|
@ -252,12 +252,14 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
|
||||
// now that its been rolled up into docMap, walk that
|
||||
for fieldID, tokenFrequencies := range docMap {
|
||||
dict := s.Dicts[fieldID]
|
||||
norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
|
||||
for term, tokenFreq := range tokenFrequencies {
|
||||
pid := s.Dicts[fieldID][term] - 1
|
||||
pid := dict[term] - 1
|
||||
bs := s.Postings[pid]
|
||||
bs.AddInt(int(docNum))
|
||||
s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
|
||||
s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
|
||||
s.Norms[pid] = append(s.Norms[pid], norm)
|
||||
locationBS := s.PostingsLocs[pid]
|
||||
if len(tokenFreq.Locations) > 0 {
|
||||
locationBS.AddInt(int(docNum))
|
||||
|
|
|
@ -15,14 +15,23 @@
|
|||
package mem
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDictionary int
|
||||
|
||||
func init() {
|
||||
var d Dictionary
|
||||
reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size())
|
||||
}
|
||||
|
||||
// Dictionary is the in-memory representation of the term dictionary
|
||||
type Dictionary struct {
|
||||
segment *Segment
|
||||
|
@ -30,15 +39,34 @@ type Dictionary struct {
|
|||
fieldID uint16
|
||||
}
|
||||
|
||||
func (d *Dictionary) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr +
|
||||
len(d.field)
|
||||
|
||||
if d.segment != nil {
|
||||
sizeInBytes += int(d.segment.Size())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// PostingsList returns the postings list for the specified term
|
||||
func (d *Dictionary) PostingsList(term string,
|
||||
except *roaring.Bitmap) (segment.PostingsList, error) {
|
||||
return &PostingsList{
|
||||
dictionary: d,
|
||||
term: term,
|
||||
postingsID: d.segment.Dicts[d.fieldID][term],
|
||||
except: except,
|
||||
}, nil
|
||||
return d.InitPostingsList(term, except, nil)
|
||||
}
|
||||
|
||||
func (d *Dictionary) InitPostingsList(term string, except *roaring.Bitmap,
|
||||
prealloc *PostingsList) (*PostingsList, error) {
|
||||
rv := prealloc
|
||||
if rv == nil {
|
||||
rv = &PostingsList{}
|
||||
}
|
||||
rv.dictionary = d
|
||||
rv.term = term
|
||||
rv.postingsID = d.segment.Dicts[d.fieldID][term]
|
||||
rv.except = except
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this dictionary
|
||||
|
|
|
@ -15,10 +15,29 @@
|
|||
package mem
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizePostingsList int
|
||||
var reflectStaticSizePostingsIterator int
|
||||
var reflectStaticSizePosting int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
func init() {
|
||||
var pl PostingsList
|
||||
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
|
||||
var pi PostingsIterator
|
||||
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
|
||||
var p Posting
|
||||
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
|
||||
var l Location
|
||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||
}
|
||||
|
||||
// PostingsList is an in-memory represenation of a postings list
|
||||
type PostingsList struct {
|
||||
dictionary *Dictionary
|
||||
|
@ -27,6 +46,20 @@ type PostingsList struct {
|
|||
except *roaring.Bitmap
|
||||
}
|
||||
|
||||
func (p *PostingsList) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
|
||||
|
||||
if p.dictionary != nil {
|
||||
sizeInBytes += p.dictionary.Size()
|
||||
}
|
||||
|
||||
if p.except != nil {
|
||||
sizeInBytes += int(p.except.GetSizeInBytes())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Count returns the number of items on this postings list
|
||||
func (p *PostingsList) Count() uint64 {
|
||||
var rv uint64
|
||||
|
@ -46,9 +79,16 @@ func (p *PostingsList) Count() uint64 {
|
|||
|
||||
// Iterator returns an iterator for this postings list
|
||||
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
||||
rv := &PostingsIterator{
|
||||
postings: p,
|
||||
return p.InitIterator(nil)
|
||||
}
|
||||
func (p *PostingsList) InitIterator(prealloc *PostingsIterator) *PostingsIterator {
|
||||
rv := prealloc
|
||||
if rv == nil {
|
||||
rv = &PostingsIterator{postings: p}
|
||||
} else {
|
||||
*rv = PostingsIterator{postings: p}
|
||||
}
|
||||
|
||||
if p.postingsID > 0 {
|
||||
allbits := p.dictionary.segment.Postings[p.postingsID-1]
|
||||
rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]
|
||||
|
@ -73,6 +113,17 @@ type PostingsIterator struct {
|
|||
offset int
|
||||
locoffset int
|
||||
actual roaring.IntIterable
|
||||
reuse Posting
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr
|
||||
|
||||
if i.locations != nil {
|
||||
sizeInBytes += int(i.locations.GetSizeInBytes())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Next returns the next posting on the postings list, or nil at the end
|
||||
|
@ -92,17 +143,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
|||
i.offset++
|
||||
allN = i.all.Next()
|
||||
}
|
||||
rv := &Posting{
|
||||
i.reuse = Posting{
|
||||
iterator: i,
|
||||
docNum: uint64(n),
|
||||
offset: i.offset,
|
||||
locoffset: i.locoffset,
|
||||
hasLoc: i.locations.Contains(n),
|
||||
}
|
||||
|
||||
i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
|
||||
i.offset++
|
||||
return rv, nil
|
||||
return &i.reuse, nil
|
||||
}
|
||||
|
||||
// Posting is a single entry in a postings list
|
||||
|
@ -114,6 +164,16 @@ type Posting struct {
|
|||
hasLoc bool
|
||||
}
|
||||
|
||||
func (p *Posting) Size() int {
|
||||
sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr
|
||||
|
||||
if p.iterator != nil {
|
||||
sizeInBytes += p.iterator.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Number returns the document number of this posting in this segment
|
||||
func (p *Posting) Number() uint64 {
|
||||
return p.docNum
|
||||
|
@ -151,6 +211,15 @@ type Location struct {
|
|||
offset int
|
||||
}
|
||||
|
||||
func (l *Location) Size() int {
|
||||
sizeInBytes := reflectStaticSizeLocation
|
||||
if l.p != nil {
|
||||
sizeInBytes += l.p.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Field returns the name of the field (useful in composite fields to know
|
||||
// which original field the value came from)
|
||||
func (l *Location) Field() string {
|
||||
|
|
|
@ -16,11 +16,20 @@ package mem
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSegment int
|
||||
|
||||
func init() {
|
||||
var s Segment
|
||||
reflectStaticSizeSegment = int(reflect.TypeOf(s).Size())
|
||||
}
|
||||
|
||||
// _id field is always guaranteed to have fieldID of 0
|
||||
const idFieldID uint16 = 0
|
||||
|
||||
|
@ -96,7 +105,7 @@ type Segment struct {
|
|||
|
||||
// Footprint of the segment, updated when analyzed document mutations
|
||||
// are added into the segment
|
||||
sizeInBytes uint64
|
||||
sizeInBytes int
|
||||
}
|
||||
|
||||
// New builds a new empty Segment
|
||||
|
@ -107,99 +116,87 @@ func New() *Segment {
|
|||
}
|
||||
}
|
||||
|
||||
func (s *Segment) updateSizeInBytes() {
|
||||
var sizeInBytes uint64
|
||||
func (s *Segment) updateSize() {
|
||||
sizeInBytes := reflectStaticSizeSegment
|
||||
|
||||
// FieldsMap, FieldsInv
|
||||
for k, _ := range s.FieldsMap {
|
||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
||||
2 /* size of uint16 */)
|
||||
sizeInBytes += (len(k)+size.SizeOfString)*2 +
|
||||
size.SizeOfUint16
|
||||
}
|
||||
// overhead from the data structures
|
||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
||||
|
||||
// Dicts, DictKeys
|
||||
for _, entry := range s.Dicts {
|
||||
for k, _ := range entry {
|
||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
||||
8 /* size of uint64 */)
|
||||
sizeInBytes += (len(k)+size.SizeOfString)*2 +
|
||||
size.SizeOfUint64
|
||||
}
|
||||
// overhead from the data structures
|
||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
||||
sizeInBytes += (size.SizeOfMap + size.SizeOfSlice)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Postings, PostingsLocs
|
||||
for i := 0; i < len(s.Postings); i++ {
|
||||
sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
|
||||
(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
|
||||
sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) +
|
||||
(int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Freqs, Norms
|
||||
for i := 0; i < len(s.Freqs); i++ {
|
||||
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
|
||||
len(s.Norms[i])*4 /* size of float32 */) +
|
||||
(segment.SizeOfSlice * 2)
|
||||
sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 +
|
||||
len(s.Norms[i])*size.SizeOfFloat32) +
|
||||
(size.SizeOfSlice * 2)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Location data
|
||||
for i := 0; i < len(s.Locfields); i++ {
|
||||
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
|
||||
len(s.Locstarts[i])*8 /* size of uint64 */ +
|
||||
len(s.Locends[i])*8 /* size of uint64 */ +
|
||||
len(s.Locpos[i])*8 /* size of uint64 */)
|
||||
sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 +
|
||||
len(s.Locstarts[i])*size.SizeOfUint64 +
|
||||
len(s.Locends[i])*size.SizeOfUint64 +
|
||||
len(s.Locpos[i])*size.SizeOfUint64
|
||||
|
||||
for j := 0; j < len(s.Locarraypos[i]); j++ {
|
||||
sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
|
||||
segment.SizeOfSlice
|
||||
sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 +
|
||||
size.SizeOfSlice
|
||||
}
|
||||
|
||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
||||
sizeInBytes += (size.SizeOfSlice * 5)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
||||
|
||||
// Stored data
|
||||
for i := 0; i < len(s.Stored); i++ {
|
||||
for _, v := range s.Stored[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
||||
sizeInBytes += size.SizeOfUint16
|
||||
for _, arr := range v {
|
||||
sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
|
||||
sizeInBytes += len(arr) + size.SizeOfSlice
|
||||
}
|
||||
sizeInBytes += segment.SizeOfSlice
|
||||
sizeInBytes += size.SizeOfSlice
|
||||
}
|
||||
|
||||
for _, v := range s.StoredTypes[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
|
||||
sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice
|
||||
}
|
||||
|
||||
for _, v := range s.StoredPos[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
||||
sizeInBytes += size.SizeOfUint16
|
||||
for _, arr := range v {
|
||||
sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
|
||||
segment.SizeOfSlice
|
||||
sizeInBytes += len(arr)*size.SizeOfUint64 +
|
||||
size.SizeOfSlice
|
||||
}
|
||||
sizeInBytes += segment.SizeOfSlice
|
||||
sizeInBytes += size.SizeOfSlice
|
||||
}
|
||||
|
||||
// overhead from map(s) within Stored, StoredTypes, StoredPos
|
||||
sizeInBytes += (segment.SizeOfMap * 3)
|
||||
sizeInBytes += (size.SizeOfMap * 3)
|
||||
}
|
||||
// overhead from data structures: Stored, StoredTypes, StoredPos
|
||||
sizeInBytes += (segment.SizeOfSlice * 3)
|
||||
|
||||
// DocValueFields
|
||||
sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
|
||||
segment.SizeOfMap
|
||||
|
||||
// SizeInBytes
|
||||
sizeInBytes += uint64(8)
|
||||
sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool)
|
||||
|
||||
s.sizeInBytes = sizeInBytes
|
||||
}
|
||||
|
||||
func (s *Segment) SizeInBytes() uint64 {
|
||||
func (s *Segment) Size() int {
|
||||
return s.sizeInBytes
|
||||
}
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ func TestSingle(t *testing.T) {
|
|||
t.Fatalf("segment nil, not expected")
|
||||
}
|
||||
|
||||
if segment.SizeInBytes() <= 0 {
|
||||
if segment.Size() <= 0 {
|
||||
t.Fatalf("segment size not updated")
|
||||
}
|
||||
|
||||
|
|
|
@ -19,12 +19,6 @@ import (
|
|||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
// Overhead from go data structures when deployed on a 64-bit system.
|
||||
const SizeOfMap uint64 = 8
|
||||
const SizeOfPointer uint64 = 8
|
||||
const SizeOfSlice uint64 = 24
|
||||
const SizeOfString uint64 = 16
|
||||
|
||||
// DocumentFieldValueVisitor defines a callback to be visited for each
|
||||
// stored field value. The return value determines if the visitor
|
||||
// should keep going. Returning true continues visiting, false stops.
|
||||
|
@ -42,7 +36,7 @@ type Segment interface {
|
|||
|
||||
Close() error
|
||||
|
||||
SizeInBytes() uint64
|
||||
Size() int
|
||||
|
||||
AddRef()
|
||||
DecRef() error
|
||||
|
@ -63,6 +57,8 @@ type DictionaryIterator interface {
|
|||
type PostingsList interface {
|
||||
Iterator() PostingsIterator
|
||||
|
||||
Size() int
|
||||
|
||||
Count() uint64
|
||||
|
||||
// NOTE deferred for future work
|
||||
|
@ -77,6 +73,8 @@ type PostingsIterator interface {
|
|||
// implementations may return a shared instance to reduce memory
|
||||
// allocations.
|
||||
Next() (Posting, error)
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type Posting interface {
|
||||
|
@ -86,6 +84,8 @@ type Posting interface {
|
|||
Norm() float64
|
||||
|
||||
Locations() []Location
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type Location interface {
|
||||
|
@ -94,6 +94,7 @@ type Location interface {
|
|||
End() uint64
|
||||
Pos() uint64
|
||||
ArrayPositions() []uint64
|
||||
Size() int
|
||||
}
|
||||
|
||||
// DocumentFieldTermVisitable is implemented by various scorch segment
|
||||
|
|
|
@ -308,7 +308,7 @@ func persistStoredFieldValues(fieldID int,
|
|||
}
|
||||
|
||||
func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
|
||||
var freqOffsets, locOfffsets []uint64
|
||||
freqOffsets := make([]uint64, 0, len(memSegment.Postings))
|
||||
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
||||
for postingID := range memSegment.Postings {
|
||||
if postingID != 0 {
|
||||
|
@ -319,19 +319,10 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
|||
postingsListItr := memSegment.Postings[postingID].Iterator()
|
||||
var offset int
|
||||
for postingsListItr.HasNext() {
|
||||
|
||||
docNum := uint64(postingsListItr.Next())
|
||||
|
||||
// put freq
|
||||
err := tfEncoder.Add(docNum, freqs[offset])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// put norm
|
||||
norm := norms[offset]
|
||||
normBits := math.Float32bits(norm)
|
||||
err = tfEncoder.Add(docNum, uint64(normBits))
|
||||
// put freq & norm
|
||||
err := tfEncoder.Add(docNum, freqs[offset], uint64(math.Float32bits(norms[offset])))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
@ -347,10 +338,10 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
|||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// now do it again for the locations
|
||||
locOffsets := make([]uint64, 0, len(memSegment.Postings))
|
||||
locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
||||
for postingID := range memSegment.Postings {
|
||||
if postingID != 0 {
|
||||
|
@ -367,45 +358,20 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
|||
var locOffset int
|
||||
for postingsListItr.HasNext() {
|
||||
docNum := uint64(postingsListItr.Next())
|
||||
for i := 0; i < int(freqs[offset]); i++ {
|
||||
n := int(freqs[offset])
|
||||
for i := 0; i < n; i++ {
|
||||
if len(locfields) > 0 {
|
||||
// put field
|
||||
err := locEncoder.Add(docNum, uint64(locfields[locOffset]))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// put pos
|
||||
err = locEncoder.Add(docNum, locpos[locOffset])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// put start
|
||||
err = locEncoder.Add(docNum, locstarts[locOffset])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// put end
|
||||
err = locEncoder.Add(docNum, locends[locOffset])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// put the number of array positions to follow
|
||||
num := len(locarraypos[locOffset])
|
||||
err = locEncoder.Add(docNum, uint64(num))
|
||||
err := locEncoder.Add(docNum, uint64(locfields[locOffset]),
|
||||
locpos[locOffset], locstarts[locOffset], locends[locOffset],
|
||||
uint64(len(locarraypos[locOffset])))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// put each array position
|
||||
for _, pos := range locarraypos[locOffset] {
|
||||
err = locEncoder.Add(docNum, pos)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = locEncoder.Add(docNum, locarraypos[locOffset]...)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
locOffset++
|
||||
|
@ -414,14 +380,16 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
|||
}
|
||||
|
||||
// record where this postings loc info starts
|
||||
locOfffsets = append(locOfffsets, uint64(w.Count()))
|
||||
locOffsets = append(locOffsets, uint64(w.Count()))
|
||||
|
||||
locEncoder.Close()
|
||||
_, err := locEncoder.Write(w)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
return freqOffsets, locOfffsets, nil
|
||||
|
||||
return freqOffsets, locOffsets, nil
|
||||
}
|
||||
|
||||
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
||||
|
@ -532,6 +500,9 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
|||
fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
|
||||
fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
|
||||
|
||||
var postings *mem.PostingsList
|
||||
var postingsItr *mem.PostingsIterator
|
||||
|
||||
for fieldID := range memSegment.DocValueFields {
|
||||
field := memSegment.FieldsInv[fieldID]
|
||||
docTermMap := make(map[uint64][]byte, 0)
|
||||
|
@ -543,17 +514,17 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
|||
dictItr := dict.Iterator()
|
||||
next, err := dictItr.Next()
|
||||
for err == nil && next != nil {
|
||||
postings, err1 := dict.PostingsList(next.Term, nil)
|
||||
var err1 error
|
||||
postings, err1 = dict.(*mem.Dictionary).InitPostingsList(next.Term, nil, postings)
|
||||
if err1 != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
postingsItr := postings.Iterator()
|
||||
postingsItr = postings.InitIterator(postingsItr)
|
||||
nextPosting, err2 := postingsItr.Next()
|
||||
for err2 == nil && nextPosting != nil {
|
||||
docNum := nextPosting.Number()
|
||||
docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
|
||||
docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
|
||||
docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator)
|
||||
nextPosting, err2 = postingsItr.Next()
|
||||
}
|
||||
if err2 != nil {
|
||||
|
@ -562,12 +533,12 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
|||
|
||||
next, err = dictItr.Next()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// sort wrt to docIDs
|
||||
var docNumbers docIDRange
|
||||
docNumbers := make(docIDRange, 0, len(docTermMap))
|
||||
for k := range docTermMap {
|
||||
docNumbers = append(docNumbers, k)
|
||||
}
|
||||
|
|
|
@ -18,10 +18,18 @@ import (
|
|||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"reflect"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizeMetaData int
|
||||
|
||||
func init() {
|
||||
var md MetaData
|
||||
reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
|
||||
}
|
||||
|
||||
var termSeparator byte = 0xff
|
||||
var termSeparatorSplitSlice = []byte{termSeparator}
|
||||
|
||||
|
|
|
@ -19,13 +19,21 @@ import (
|
|||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizedocValueIterator int
|
||||
|
||||
func init() {
|
||||
var dvi docValueIterator
|
||||
reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
|
||||
}
|
||||
|
||||
type docValueIterator struct {
|
||||
field string
|
||||
curChunkNum uint64
|
||||
|
@ -36,21 +44,12 @@ type docValueIterator struct {
|
|||
curChunkData []byte // compressed data cache
|
||||
}
|
||||
|
||||
func (di *docValueIterator) sizeInBytes() uint64 {
|
||||
// curChunkNum, numChunks, dvDataLoc --> uint64
|
||||
sizeInBytes := 24
|
||||
|
||||
// field
|
||||
sizeInBytes += (len(di.field) + int(segment.SizeOfString))
|
||||
|
||||
// chunkLens, curChunkHeader
|
||||
sizeInBytes += len(di.chunkLens)*8 +
|
||||
len(di.curChunkHeader)*24 +
|
||||
int(segment.SizeOfSlice*2) /* overhead from slices */
|
||||
|
||||
// curChunkData is mmap'ed, not included
|
||||
|
||||
return uint64(sizeInBytes)
|
||||
func (di *docValueIterator) size() int {
|
||||
return reflectStaticSizedocValueIterator + size.SizeOfPtr +
|
||||
len(di.field) +
|
||||
len(di.chunkLens)*size.SizeOfUint64 +
|
||||
len(di.curChunkHeader)*reflectStaticSizeMetaData +
|
||||
len(di.curChunkData)
|
||||
}
|
||||
|
||||
func (di *docValueIterator) fieldName() string {
|
||||
|
|
|
@ -82,6 +82,19 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
|
||||
chunk := docNum / c.chunkSize
|
||||
if chunk != c.currChunk {
|
||||
// starting a new chunk
|
||||
c.Close()
|
||||
c.chunkBuf.Reset()
|
||||
c.currChunk = chunk
|
||||
}
|
||||
|
||||
_, err := c.chunkBuf.Write(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
// Close indicates you are done calling Add() this allows the final chunk
|
||||
// to be encoded.
|
||||
func (c *chunkedIntCoder) Close() {
|
||||
|
|
|
@ -162,7 +162,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
|||
|
||||
var bufReuse bytes.Buffer
|
||||
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
||||
var bufLoc []uint64
|
||||
|
||||
var postings *PostingsList
|
||||
var postItr *PostingsIterator
|
||||
|
@ -316,45 +315,32 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
|||
newDocNumsI := newDocNums[itrI]
|
||||
|
||||
postItr = postings.iterator(postItr)
|
||||
next, err2 := postItr.Next()
|
||||
for next != nil && err2 == nil {
|
||||
hitNewDocNum := newDocNumsI[next.Number()]
|
||||
|
||||
nextDocNum, nextFreqNormBytes, nextLocBytes, err2 := postItr.nextBytes()
|
||||
for err2 == nil && len(nextFreqNormBytes) > 0 {
|
||||
hitNewDocNum := newDocNumsI[nextDocNum]
|
||||
if hitNewDocNum == docDropped {
|
||||
return nil, 0, fmt.Errorf("see hit with dropped doc num")
|
||||
}
|
||||
|
||||
newRoaring.Add(uint32(hitNewDocNum))
|
||||
// encode norm bits
|
||||
norm := next.Norm()
|
||||
normBits := math.Float32bits(float32(norm))
|
||||
err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
err2 = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
locs := next.Locations()
|
||||
if len(locs) > 0 {
|
||||
|
||||
if len(nextLocBytes) > 0 {
|
||||
newRoaringLocs.Add(uint32(hitNewDocNum))
|
||||
for _, loc := range locs {
|
||||
if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
|
||||
bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
|
||||
}
|
||||
args := bufLoc[0:5]
|
||||
args[0] = uint64(fieldsMap[loc.Field()] - 1)
|
||||
args[1] = loc.Pos()
|
||||
args[2] = loc.Start()
|
||||
args[3] = loc.End()
|
||||
args[4] = uint64(len(loc.ArrayPositions()))
|
||||
args = append(args, loc.ArrayPositions()...)
|
||||
err = locEncoder.Add(hitNewDocNum, args...)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
err2 = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
}
|
||||
|
||||
docTermMap[hitNewDocNum] =
|
||||
append(append(docTermMap[hitNewDocNum], term...), termSeparator)
|
||||
|
||||
next, err2 = postItr.Next()
|
||||
nextDocNum, nextFreqNormBytes, nextLocBytes, err2 = postItr.nextBytes()
|
||||
}
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
|
|
|
@ -19,12 +19,30 @@ import (
|
|||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/Smerity/govarint"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizePostingsList int
|
||||
var reflectStaticSizePostingsIterator int
|
||||
var reflectStaticSizePosting int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
func init() {
|
||||
var pl PostingsList
|
||||
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
|
||||
var pi PostingsIterator
|
||||
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
|
||||
var p Posting
|
||||
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
|
||||
var l Location
|
||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||
}
|
||||
|
||||
// PostingsList is an in-memory represenation of a postings list
|
||||
type PostingsList struct {
|
||||
sb *SegmentBase
|
||||
|
@ -36,6 +54,28 @@ type PostingsList struct {
|
|||
except *roaring.Bitmap
|
||||
}
|
||||
|
||||
func (p *PostingsList) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
|
||||
|
||||
if p.sb != nil {
|
||||
sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part
|
||||
}
|
||||
|
||||
if p.locBitmap != nil {
|
||||
sizeInBytes += int(p.locBitmap.GetSizeInBytes())
|
||||
}
|
||||
|
||||
if p.postings != nil {
|
||||
sizeInBytes += int(p.postings.GetSizeInBytes())
|
||||
}
|
||||
|
||||
if p.except != nil {
|
||||
sizeInBytes += int(p.except.GetSizeInBytes())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this postings list
|
||||
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
||||
return p.iterator(nil)
|
||||
|
@ -45,7 +85,25 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
|||
if rv == nil {
|
||||
rv = &PostingsIterator{}
|
||||
} else {
|
||||
freqNormReader := rv.freqNormReader
|
||||
if freqNormReader != nil {
|
||||
freqNormReader.Reset([]byte(nil))
|
||||
}
|
||||
freqNormDecoder := rv.freqNormDecoder
|
||||
|
||||
locReader := rv.locReader
|
||||
if locReader != nil {
|
||||
locReader.Reset([]byte(nil))
|
||||
}
|
||||
locDecoder := rv.locDecoder
|
||||
|
||||
*rv = PostingsIterator{} // clear the struct
|
||||
|
||||
rv.freqNormReader = freqNormReader
|
||||
rv.freqNormDecoder = freqNormDecoder
|
||||
|
||||
rv.locReader = locReader
|
||||
rv.locDecoder = locDecoder
|
||||
}
|
||||
rv.postings = p
|
||||
|
||||
|
@ -175,6 +233,25 @@ type PostingsIterator struct {
|
|||
nextLocs []Location // reused across Next() calls
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
|
||||
len(i.currChunkFreqNorm) +
|
||||
len(i.currChunkLoc) +
|
||||
len(i.freqChunkLens)*size.SizeOfUint64 +
|
||||
len(i.locChunkLens)*size.SizeOfUint64 +
|
||||
i.next.Size()
|
||||
|
||||
if i.locBitmap != nil {
|
||||
sizeInBytes += int(i.locBitmap.GetSizeInBytes())
|
||||
}
|
||||
|
||||
for _, entry := range i.nextLocs {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
|
||||
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
|
||||
|
@ -279,75 +356,23 @@ func (i *PostingsIterator) readLocation(l *Location) error {
|
|||
|
||||
// Next returns the next posting on the postings list, or nil at the end
|
||||
func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||
if i.actual == nil || !i.actual.HasNext() {
|
||||
return nil, nil
|
||||
}
|
||||
n := i.actual.Next()
|
||||
nChunk := n / i.postings.sb.chunkFactor
|
||||
allN := i.all.Next()
|
||||
allNChunk := allN / i.postings.sb.chunkFactor
|
||||
|
||||
// n is the next actual hit (excluding some postings)
|
||||
// allN is the next hit in the full postings
|
||||
// if they don't match, adjust offsets to factor in item we're skipping over
|
||||
// incr the all iterator, and check again
|
||||
for allN != n {
|
||||
|
||||
// in different chunks, reset offsets
|
||||
if allNChunk != nChunk {
|
||||
i.locoffset = 0
|
||||
i.offset = 0
|
||||
} else {
|
||||
|
||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// read off freq/offsets even though we don't care about them
|
||||
freq, _, err := i.readFreqNorm()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if i.locBitmap.Contains(allN) {
|
||||
for j := 0; j < int(freq); j++ {
|
||||
err := i.readLocation(nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// in same chunk, need to account for offsets
|
||||
i.offset++
|
||||
}
|
||||
|
||||
allN = i.all.Next()
|
||||
}
|
||||
|
||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading chunk: %v", err)
|
||||
}
|
||||
docNum, exists, err := i.nextDocNum()
|
||||
if err != nil || !exists {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reuseLocs := i.next.locs // hold for reuse before struct clearing
|
||||
i.next = Posting{} // clear the struct
|
||||
rv := &i.next
|
||||
rv.iterator = i
|
||||
rv.docNum = uint64(n)
|
||||
rv.docNum = docNum
|
||||
|
||||
var err error
|
||||
var normBits uint64
|
||||
rv.freq, normBits, err = i.readFreqNorm()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.norm = math.Float32frombits(uint32(normBits))
|
||||
if i.locBitmap.Contains(n) {
|
||||
if i.locBitmap.Contains(uint32(docNum)) {
|
||||
// read off 'freq' locations, into reused slices
|
||||
if cap(i.nextLocs) >= int(rv.freq) {
|
||||
i.nextLocs = i.nextLocs[0:rv.freq]
|
||||
|
@ -371,14 +396,121 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
|||
return rv, nil
|
||||
}
|
||||
|
||||
// nextBytes returns the docNum and the encoded freq & loc bytes for
|
||||
// the next posting
|
||||
func (i *PostingsIterator) nextBytes() (uint64, []byte, []byte, error) {
|
||||
docNum, exists, err := i.nextDocNum()
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
if !exists {
|
||||
return 0, nil, nil, nil
|
||||
}
|
||||
|
||||
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||
|
||||
freq, _, err := i.readFreqNorm()
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
|
||||
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||
bytesFreqNorm := i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
|
||||
|
||||
var bytesLoc []byte
|
||||
if i.locBitmap.Contains(uint32(docNum)) {
|
||||
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
|
||||
for j := uint64(0); j < freq; j++ {
|
||||
err := i.readLocation(nil)
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
endLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
bytesLoc = i.currChunkLoc[startLoc:endLoc]
|
||||
}
|
||||
|
||||
return docNum, bytesFreqNorm, bytesLoc, nil
|
||||
}
|
||||
|
||||
// nextDocNum returns the next docNum on the postings list, and also
|
||||
// sets up the currChunk / loc related fields of the iterator.
|
||||
func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
|
||||
if i.actual == nil || !i.actual.HasNext() {
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
n := i.actual.Next()
|
||||
nChunk := n / i.postings.sb.chunkFactor
|
||||
allN := i.all.Next()
|
||||
allNChunk := allN / i.postings.sb.chunkFactor
|
||||
|
||||
// n is the next actual hit (excluding some postings)
|
||||
// allN is the next hit in the full postings
|
||||
// if they don't match, adjust offsets to factor in item we're skipping over
|
||||
// incr the all iterator, and check again
|
||||
for allN != n {
|
||||
// in different chunks, reset offsets
|
||||
if allNChunk != nChunk {
|
||||
i.locoffset = 0
|
||||
i.offset = 0
|
||||
} else {
|
||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
return 0, false, fmt.Errorf("error loading chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// read off freq/offsets even though we don't care about them
|
||||
freq, _, err := i.readFreqNorm()
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
if i.locBitmap.Contains(allN) {
|
||||
for j := 0; j < int(freq); j++ {
|
||||
err := i.readLocation(nil)
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// in same chunk, need to account for offsets
|
||||
i.offset++
|
||||
}
|
||||
|
||||
allN = i.all.Next()
|
||||
}
|
||||
|
||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
return 0, false, fmt.Errorf("error loading chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return uint64(n), true, nil
|
||||
}
|
||||
|
||||
// Posting is a single entry in a postings list
|
||||
type Posting struct {
|
||||
iterator *PostingsIterator
|
||||
docNum uint64
|
||||
docNum uint64
|
||||
freq uint64
|
||||
norm float32
|
||||
locs []segment.Location
|
||||
}
|
||||
|
||||
freq uint64
|
||||
norm float32
|
||||
locs []segment.Location
|
||||
func (p *Posting) Size() int {
|
||||
sizeInBytes := reflectStaticSizePosting
|
||||
|
||||
for _, entry := range p.locs {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Number returns the document number of this posting in this segment
|
||||
|
@ -410,6 +542,12 @@ type Location struct {
|
|||
ap []uint64
|
||||
}
|
||||
|
||||
func (l *Location) Size() int {
|
||||
return reflectStaticSizeLocation +
|
||||
len(l.field) +
|
||||
len(l.ap)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
// Field returns the name of the field (useful in composite fields to know
|
||||
// which original field the value came from)
|
||||
func (l *Location) Field() string {
|
||||
|
|
|
@ -20,16 +20,25 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/Smerity/govarint"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/couchbase/vellum"
|
||||
mmap "github.com/edsrzf/mmap-go"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSegmentBase int
|
||||
|
||||
func init() {
|
||||
var sb SegmentBase
|
||||
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
|
||||
}
|
||||
|
||||
// Open returns a zap impl of a segment
|
||||
func Open(path string) (segment.Segment, error) {
|
||||
f, err := os.Open(path)
|
||||
|
@ -92,6 +101,32 @@ type SegmentBase struct {
|
|||
fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) Size() int {
|
||||
sizeInBytes := reflectStaticSizeSegmentBase +
|
||||
len(sb.mem)
|
||||
|
||||
// fieldsMap
|
||||
for k, _ := range sb.fieldsMap {
|
||||
sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
|
||||
}
|
||||
|
||||
// fieldsInv, dictLocs
|
||||
for _, entry := range sb.fieldsInv {
|
||||
sizeInBytes += len(entry) + size.SizeOfString
|
||||
}
|
||||
sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
|
||||
|
||||
// fieldDvIterMap
|
||||
for _, v := range sb.fieldDvIterMap {
|
||||
sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
|
||||
if v != nil {
|
||||
sizeInBytes += v.size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) AddRef() {}
|
||||
func (sb *SegmentBase) DecRef() (err error) { return nil }
|
||||
func (sb *SegmentBase) Close() (err error) { return nil }
|
||||
|
@ -111,56 +146,19 @@ type Segment struct {
|
|||
refs int64
|
||||
}
|
||||
|
||||
func (s *Segment) SizeInBytes() uint64 {
|
||||
func (s *Segment) Size() int {
|
||||
// 8 /* size of file pointer */
|
||||
// 4 /* size of version -> uint32 */
|
||||
// 4 /* size of crc -> uint32 */
|
||||
sizeOfUints := 16
|
||||
|
||||
sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
|
||||
sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
|
||||
|
||||
// mutex, refs -> int64
|
||||
sizeInBytes += 16
|
||||
|
||||
// do not include the mmap'ed part
|
||||
return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem))
|
||||
}
|
||||
|
||||
func (s *SegmentBase) SizeInBytes() uint64 {
|
||||
// 4 /* size of memCRC -> uint32 */
|
||||
// 4 /* size of chunkFactor -> uint32 */
|
||||
// 8 /* size of numDocs -> uint64 */
|
||||
// 8 /* size of storedIndexOffset -> uint64 */
|
||||
// 8 /* size of fieldsIndexOffset -> uint64 */
|
||||
// 8 /* size of docValueOffset -> uint64 */
|
||||
sizeInBytes := 40
|
||||
|
||||
sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
|
||||
|
||||
// fieldsMap
|
||||
for k, _ := range s.fieldsMap {
|
||||
sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
|
||||
}
|
||||
sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
|
||||
|
||||
// fieldsInv, dictLocs
|
||||
for _, entry := range s.fieldsInv {
|
||||
sizeInBytes += (len(entry) + int(segment.SizeOfString))
|
||||
}
|
||||
sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */
|
||||
sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
|
||||
|
||||
// fieldDvIterMap
|
||||
sizeInBytes += len(s.fieldDvIterMap) *
|
||||
int(segment.SizeOfPointer+2 /* size of uint16 */)
|
||||
for _, entry := range s.fieldDvIterMap {
|
||||
if entry != nil {
|
||||
sizeInBytes += int(entry.sizeInBytes())
|
||||
}
|
||||
}
|
||||
sizeInBytes += int(segment.SizeOfMap)
|
||||
|
||||
return uint64(sizeInBytes)
|
||||
return sizeInBytes + s.SegmentBase.Size() - len(s.mem)
|
||||
}
|
||||
|
||||
func (s *Segment) AddRef() {
|
||||
|
|
|
@ -27,6 +27,7 @@ import (
|
|||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
|
@ -89,6 +90,12 @@ func (i *IndexSnapshot) Close() error {
|
|||
return i.DecRef()
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Size() int {
|
||||
// Just return the size of the pointer for estimating the overhead
|
||||
// during Search, a reference of the IndexSnapshot serves as the reader.
|
||||
return size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
||||
|
||||
results := make(chan *asynchSegmentResult)
|
||||
|
|
|
@ -16,17 +16,30 @@ package scorch
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotDocIDReader int
|
||||
|
||||
func init() {
|
||||
var isdr IndexSnapshotDocIDReader
|
||||
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotDocIDReader struct {
|
||||
snapshot *IndexSnapshot
|
||||
iterators []roaring.IntIterable
|
||||
segmentOffset int
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Size() int {
|
||||
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
if !i.iterators[i.segmentOffset].HasNext() {
|
||||
|
|
|
@ -16,12 +16,21 @@ package scorch
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotTermFieldReader int
|
||||
|
||||
func init() {
|
||||
var istfr IndexSnapshotTermFieldReader
|
||||
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotTermFieldReader struct {
|
||||
term []byte
|
||||
field string
|
||||
|
@ -36,6 +45,27 @@ type IndexSnapshotTermFieldReader struct {
|
|||
currID index.IndexInternalID
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
|
||||
len(i.term) +
|
||||
len(i.field) +
|
||||
len(i.currID)
|
||||
|
||||
for _, entry := range i.postings {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range i.iterators {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
if i.currPosting != nil {
|
||||
sizeInBytes += i.currPosting.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
|
|
|
@ -213,7 +213,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *cachedDocs) sizeInBytes() uint64 {
|
||||
func (c *cachedDocs) size() int {
|
||||
sizeInBytes := 0
|
||||
c.m.Lock()
|
||||
for k, v := range c.cache { // cachedFieldDocs
|
||||
|
@ -225,5 +225,5 @@ func (c *cachedDocs) sizeInBytes() uint64 {
|
|||
}
|
||||
}
|
||||
c.m.Unlock()
|
||||
return uint64(sizeInBytes)
|
||||
return sizeInBytes
|
||||
}
|
||||
|
|
|
@ -89,8 +89,10 @@ type Stats struct {
|
|||
TotFileMergeSegments uint64
|
||||
TotFileMergeWrittenBytes uint64
|
||||
|
||||
TotFileMergeZapBeg uint64
|
||||
TotFileMergeZapEnd uint64
|
||||
TotFileMergeZapBeg uint64
|
||||
TotFileMergeZapEnd uint64
|
||||
TotFileMergeZapTime uint64
|
||||
MaxFileMergeZapTime uint64
|
||||
|
||||
TotFileMergeIntroductions uint64
|
||||
TotFileMergeIntroductionsDone uint64
|
||||
|
@ -100,6 +102,8 @@ type Stats struct {
|
|||
TotMemMergeDone uint64
|
||||
TotMemMergeZapBeg uint64
|
||||
TotMemMergeZapEnd uint64
|
||||
TotMemMergeZapTime uint64
|
||||
MaxMemMergeZapTime uint64
|
||||
TotMemMergeSegments uint64
|
||||
}
|
||||
|
||||
|
|
|
@ -15,17 +15,31 @@
|
|||
package upsidedown
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexReader int
|
||||
|
||||
func init() {
|
||||
var ir IndexReader
|
||||
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
|
||||
}
|
||||
|
||||
type IndexReader struct {
|
||||
index *UpsideDownCouch
|
||||
kvreader store.KVReader
|
||||
docCount uint64
|
||||
}
|
||||
|
||||
func (i *IndexReader) Size() int {
|
||||
return reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||
if fieldExists {
|
||||
|
|
|
@ -16,13 +16,27 @@ package upsidedown
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUpsideDownCouchTermFieldReader int
|
||||
var reflectStaticSizeUpsideDownCouchDocIDReader int
|
||||
|
||||
func init() {
|
||||
var tfr UpsideDownCouchTermFieldReader
|
||||
reflectStaticSizeUpsideDownCouchTermFieldReader =
|
||||
int(reflect.TypeOf(tfr).Size())
|
||||
var cdr UpsideDownCouchDocIDReader
|
||||
reflectStaticSizeUpsideDownCouchDocIDReader =
|
||||
int(reflect.TypeOf(cdr).Size())
|
||||
}
|
||||
|
||||
type UpsideDownCouchTermFieldReader struct {
|
||||
count uint64
|
||||
indexReader *IndexReader
|
||||
|
@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
|
|||
includeTermVectors bool
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
|
||||
len(r.term) +
|
||||
r.tfrPrealloc.Size() +
|
||||
len(r.keyBuf)
|
||||
|
||||
if r.tfrNext != nil {
|
||||
sizeInBytes += r.tfrNext.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||
if bufNeeded < dictionaryRowKeySize(term) {
|
||||
|
@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
|
|||
onlyMode bool
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
func (r *UpsideDownCouchDocIDReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
|
||||
r.indexReader.Size()
|
||||
|
||||
for _, entry := range r.only {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
startBytes := []byte{0x0}
|
||||
endBytes := []byte{0xff}
|
||||
|
||||
|
|
|
@ -20,10 +20,22 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/golang/protobuf/proto"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermFrequencyRow int
|
||||
var reflectStaticSizeTermVector int
|
||||
|
||||
func init() {
|
||||
var tfr TermFrequencyRow
|
||||
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
|
||||
var tv TermVector
|
||||
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
|
||||
}
|
||||
|
||||
const ByteSeparator byte = 0xff
|
||||
|
||||
type UpsideDownCouchRowStream chan UpsideDownCouchRow
|
||||
|
@ -358,6 +370,11 @@ type TermVector struct {
|
|||
end uint64
|
||||
}
|
||||
|
||||
func (tv *TermVector) Size() int {
|
||||
return reflectStaticSizeTermVector + size.SizeOfPtr +
|
||||
len(tv.arrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
func (tv *TermVector) String() string {
|
||||
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
|
||||
}
|
||||
|
@ -371,6 +388,18 @@ type TermFrequencyRow struct {
|
|||
field uint16
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermFrequencyRow +
|
||||
len(tfr.term) +
|
||||
len(tfr.doc)
|
||||
|
||||
for _, entry := range tfr.vectors {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Term() []byte {
|
||||
return tfr.term
|
||||
}
|
||||
|
|
|
@ -50,6 +50,10 @@ const storePath = "store"
|
|||
|
||||
var mappingInternalKey = []byte("_mapping")
|
||||
|
||||
const SearchMemCheckCallbackKey = "_search_mem_callback_key"
|
||||
|
||||
type SearchMemCheckCallbackFn func(size uint64) error
|
||||
|
||||
func indexStorePath(path string) string {
|
||||
return path + string(os.PathSeparator) + storePath
|
||||
}
|
||||
|
@ -362,8 +366,59 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
|
|||
return i.SearchInContext(context.Background(), req)
|
||||
}
|
||||
|
||||
// memNeededForSearch is a helper function that returns an estimate of RAM
|
||||
// needed to execute a search request.
|
||||
func memNeededForSearch(req *SearchRequest,
|
||||
searcher search.Searcher,
|
||||
topnCollector *collector.TopNCollector) uint64 {
|
||||
|
||||
backingSize := req.Size + req.From + 1
|
||||
if req.Size+req.From > collector.PreAllocSizeSkipCap {
|
||||
backingSize = collector.PreAllocSizeSkipCap + 1
|
||||
}
|
||||
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
|
||||
|
||||
estimate := 0
|
||||
|
||||
// overhead, size in bytes from collector
|
||||
estimate += topnCollector.Size()
|
||||
|
||||
var dm search.DocumentMatch
|
||||
sizeOfDocumentMatch := dm.Size()
|
||||
|
||||
// pre-allocing DocumentMatchPool
|
||||
var sc search.SearchContext
|
||||
estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch
|
||||
|
||||
// searcher overhead
|
||||
estimate += searcher.Size()
|
||||
|
||||
// overhead from results, lowestMatchOutsideResults
|
||||
estimate += (numDocMatches + 1) * sizeOfDocumentMatch
|
||||
|
||||
// additional overhead from SearchResult
|
||||
var sr SearchResult
|
||||
estimate += sr.Size()
|
||||
|
||||
// overhead from facet results
|
||||
if req.Facets != nil {
|
||||
var fr search.FacetResult
|
||||
estimate += len(req.Facets) * fr.Size()
|
||||
}
|
||||
|
||||
// highlighting, store
|
||||
var d document.Document
|
||||
if len(req.Fields) > 0 || req.Highlight != nil {
|
||||
for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits
|
||||
estimate += (req.Size + req.From) * d.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return uint64(estimate)
|
||||
}
|
||||
|
||||
// SearchInContext executes a search request operation within the provided
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
@ -428,6 +483,15 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
collector.SetFacetsBuilder(facetsBuilder)
|
||||
}
|
||||
|
||||
if memCb := ctx.Value(SearchMemCheckCallbackKey); memCb != nil {
|
||||
if memCbFn, ok := memCb.(SearchMemCheckCallbackFn); ok {
|
||||
err = memCbFn(memNeededForSearch(req, searcher, collector))
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = collector.Collect(ctx, searcher, indexReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
102
index_test.go
102
index_test.go
|
@ -36,6 +36,9 @@ import (
|
|||
"github.com/blevesearch/bleve/mapping"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/query"
|
||||
|
||||
"github.com/blevesearch/bleve/index/scorch"
|
||||
"github.com/blevesearch/bleve/index/upsidedown"
|
||||
)
|
||||
|
||||
func TestCrud(t *testing.T) {
|
||||
|
@ -1815,3 +1818,102 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSearchOverhead(indexType string, b *testing.B) {
|
||||
defer func() {
|
||||
err := os.RemoveAll("testidx")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
index, err := NewUsing("testidx", NewIndexMapping(),
|
||||
indexType, Config.DefaultKVStore, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := index.Close()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
elements := []string{"air", "water", "fire", "earth"}
|
||||
for j := 0; j < 10000; j++ {
|
||||
err = index.Index(fmt.Sprintf("%d", j),
|
||||
map[string]interface{}{"name": elements[j%len(elements)]})
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
query1 := NewTermQuery("water")
|
||||
query2 := NewTermQuery("fire")
|
||||
query := NewDisjunctionQuery(query1, query2)
|
||||
req := NewSearchRequest(query)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err = index.Search(req)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
|
||||
benchmarkSearchOverhead(upsidedown.Name, b)
|
||||
}
|
||||
|
||||
func BenchmarkScorchSearchOverhead(b *testing.B) {
|
||||
benchmarkSearchOverhead(scorch.Name, b)
|
||||
}
|
||||
|
||||
func TestSearchMemCheckCallback(t *testing.T) {
|
||||
defer func() {
|
||||
err := os.RemoveAll("testidx")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
index, err := New("testidx", NewIndexMapping())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := index.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
elements := []string{"air", "water", "fire", "earth"}
|
||||
for j := 0; j < 10000; j++ {
|
||||
err = index.Index(fmt.Sprintf("%d", j),
|
||||
map[string]interface{}{"name": elements[j%len(elements)]})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
query := NewTermQuery("water")
|
||||
req := NewSearchRequest(query)
|
||||
|
||||
expErr := fmt.Errorf("MEM_LIMIT_EXCEEDED")
|
||||
f := func(size uint64) error {
|
||||
if size > 1000 {
|
||||
return expErr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
ctx := context.WithValue(context.Background(), SearchMemCheckCallbackKey,
|
||||
SearchMemCheckCallbackFn(f))
|
||||
_, err = index.SearchInContext(ctx, req)
|
||||
if err != expErr {
|
||||
t.Fatalf("Expected: %v, Got: %v", expErr, err)
|
||||
}
|
||||
}
|
||||
|
|
30
search.go
30
search.go
|
@ -17,6 +17,7 @@ package bleve
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
|
@ -24,8 +25,19 @@ import (
|
|||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/query"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSearchResult int
|
||||
var reflectStaticSizeSearchStatus int
|
||||
|
||||
func init() {
|
||||
var sr SearchResult
|
||||
reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
|
||||
var ss SearchStatus
|
||||
reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
|
||||
}
|
||||
|
||||
var cache = registry.NewCache()
|
||||
|
||||
const defaultDateTimeParser = optional.Name
|
||||
|
@ -432,6 +444,24 @@ type SearchResult struct {
|
|||
Facets search.FacetResults `json:"facets"`
|
||||
}
|
||||
|
||||
func (sr *SearchResult) Size() int {
|
||||
sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
|
||||
reflectStaticSizeSearchStatus
|
||||
|
||||
for _, entry := range sr.Hits {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range sr.Facets {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
v.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (sr *SearchResult) String() string {
|
||||
rv := ""
|
||||
if sr.Total > 0 {
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
|
@ -25,6 +27,18 @@ type stubSearcher struct {
|
|||
matches []*search.DocumentMatch
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Size() int {
|
||||
sizeInBytes := int(reflect.TypeOf(*ss).Size())
|
||||
|
||||
for _, entry := range ss.matches {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
if ss.index < len(ss.matches) {
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
|
@ -76,6 +90,10 @@ func (ss *stubSearcher) DocumentMatchPoolSize() int {
|
|||
|
||||
type stubReader struct{}
|
||||
|
||||
func (sr *stubReader) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -16,12 +16,21 @@ package collector
|
|||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTopNCollector int
|
||||
|
||||
func init() {
|
||||
var coll TopNCollector
|
||||
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
|
||||
}
|
||||
|
||||
type collectorStore interface {
|
||||
// Add the document, and if the new store size exceeds the provided size
|
||||
// the last element is removed and returned. If the size has not been
|
||||
|
@ -98,6 +107,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
|
|||
return hc
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
|
||||
|
||||
if hc.facetsBuilder != nil {
|
||||
sizeInBytes += hc.facetsBuilder.Size()
|
||||
}
|
||||
|
||||
for _, entry := range hc.neededFields {
|
||||
sizeInBytes += len(entry) + size.SizeOfString
|
||||
}
|
||||
|
||||
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Collect goes to the index to find the matching documents
|
||||
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
||||
startTime := time.Now()
|
||||
|
|
|
@ -17,8 +17,18 @@ package search
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeExplanation int
|
||||
|
||||
func init() {
|
||||
var e Explanation
|
||||
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
|
||||
}
|
||||
|
||||
type Explanation struct {
|
||||
Value float64 `json:"value"`
|
||||
Message string `json:"message"`
|
||||
|
@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
|
|||
}
|
||||
return string(js)
|
||||
}
|
||||
|
||||
func (expl *Explanation) Size() int {
|
||||
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
|
||||
len(expl.Message)
|
||||
|
||||
for _, entry := range expl.Children {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
|
|
@ -15,13 +15,25 @@
|
|||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDateTimeFacetBuilder int
|
||||
var reflectStaticSizedateTimeRange int
|
||||
|
||||
func init() {
|
||||
var dtfb DateTimeFacetBuilder
|
||||
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
|
||||
var dtr dateTimeRange
|
||||
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
|
||||
}
|
||||
|
||||
type dateTimeRange struct {
|
||||
start time.Time
|
||||
end time.Time
|
||||
|
@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k, _ := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
for k, _ := range fb.ranges {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr + reflectStaticSizedateTimeRange
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
|
||||
r := dateTimeRange{
|
||||
start: start,
|
||||
|
|
|
@ -15,12 +15,24 @@
|
|||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeNumericFacetBuilder int
|
||||
var reflectStaticSizenumericRange int
|
||||
|
||||
func init() {
|
||||
var nfb NumericFacetBuilder
|
||||
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
|
||||
var nr numericRange
|
||||
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
|
||||
}
|
||||
|
||||
type numericRange struct {
|
||||
min *float64
|
||||
max *float64
|
||||
|
@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k, _ := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
for k, _ := range fb.ranges {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr + reflectStaticSizenumericRange
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
|
||||
r := numericRange{
|
||||
min: min,
|
||||
|
|
|
@ -15,11 +15,20 @@
|
|||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermsFacetBuilder int
|
||||
|
||||
func init() {
|
||||
var tfb TermsFacetBuilder
|
||||
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
|
||||
}
|
||||
|
||||
type TermsFacetBuilder struct {
|
||||
size int
|
||||
field string
|
||||
|
@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k, _ := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Field() string {
|
||||
return fb.field
|
||||
}
|
||||
|
|
|
@ -15,11 +15,32 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeFacetsBuilder int
|
||||
var reflectStaticSizeFacetResult int
|
||||
var reflectStaticSizeTermFacet int
|
||||
var reflectStaticSizeNumericRangeFacet int
|
||||
var reflectStaticSizeDateRangeFacet int
|
||||
|
||||
func init() {
|
||||
var fb FacetsBuilder
|
||||
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
|
||||
var fr FacetResult
|
||||
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
|
||||
var tf TermFacet
|
||||
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
|
||||
var nrf NumericRangeFacet
|
||||
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
|
||||
var drf DateRangeFacet
|
||||
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
|
||||
}
|
||||
|
||||
type FacetBuilder interface {
|
||||
StartDoc()
|
||||
UpdateVisitor(field string, term []byte)
|
||||
|
@ -27,6 +48,8 @@ type FacetBuilder interface {
|
|||
|
||||
Result() *FacetResult
|
||||
Field() string
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type FacetsBuilder struct {
|
||||
|
@ -42,6 +65,22 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *FacetsBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
|
||||
fb.indexReader.Size()
|
||||
|
||||
for k, v := range fb.facets {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
v.Size()
|
||||
}
|
||||
|
||||
for _, entry := range fb.fields {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
|
||||
fb.facets[name] = facetBuilder
|
||||
fb.fields = append(fb.fields, facetBuilder.Field())
|
||||
|
@ -213,6 +252,14 @@ type FacetResult struct {
|
|||
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
|
||||
}
|
||||
|
||||
func (fr *FacetResult) Size() int {
|
||||
return reflectStaticSizeFacetResult + size.SizeOfPtr +
|
||||
len(fr.Field) +
|
||||
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
|
||||
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
|
||||
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
|
||||
}
|
||||
|
||||
func (fr *FacetResult) Merge(other *FacetResult) {
|
||||
fr.Total += other.Total
|
||||
fr.Missing += other.Missing
|
||||
|
|
|
@ -14,6 +14,17 @@
|
|||
|
||||
package search
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocumentMatchPool int
|
||||
|
||||
func init() {
|
||||
var dmp DocumentMatchPool
|
||||
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
|
||||
}
|
||||
|
||||
// DocumentMatchPoolTooSmall is a callback function that can be executed
|
||||
// when the DocumentMatchPool does not have sufficient capacity
|
||||
// By default we just perform just-in-time allocation, but you could log
|
||||
|
|
|
@ -15,13 +15,27 @@
|
|||
package scorer
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConjunctionQueryScorer int
|
||||
|
||||
func init() {
|
||||
var cqs ConjunctionQueryScorer
|
||||
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
|
||||
}
|
||||
|
||||
type ConjunctionQueryScorer struct {
|
||||
options search.SearcherOptions
|
||||
}
|
||||
|
||||
func (s *ConjunctionQueryScorer) Size() int {
|
||||
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
|
||||
return &ConjunctionQueryScorer{
|
||||
options: options,
|
||||
|
|
|
@ -16,11 +16,20 @@ package scorer
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConstantScorer int
|
||||
|
||||
func init() {
|
||||
var cs ConstantScorer
|
||||
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
|
||||
}
|
||||
|
||||
type ConstantScorer struct {
|
||||
constant float64
|
||||
boost float64
|
||||
|
@ -30,6 +39,16 @@ type ConstantScorer struct {
|
|||
queryWeightExplanation *search.Explanation
|
||||
}
|
||||
|
||||
func (s *ConstantScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
|
||||
rv := ConstantScorer{
|
||||
options: options,
|
||||
|
|
|
@ -16,14 +16,27 @@ package scorer
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDisjunctionQueryScorer int
|
||||
|
||||
func init() {
|
||||
var dqs DisjunctionQueryScorer
|
||||
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
|
||||
}
|
||||
|
||||
type DisjunctionQueryScorer struct {
|
||||
options search.SearcherOptions
|
||||
}
|
||||
|
||||
func (s *DisjunctionQueryScorer) Size() int {
|
||||
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
|
||||
return &DisjunctionQueryScorer{
|
||||
options: options,
|
||||
|
|
|
@ -17,11 +17,20 @@ package scorer
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermQueryScorer int
|
||||
|
||||
func init() {
|
||||
var tqs TermQueryScorer
|
||||
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
|
||||
}
|
||||
|
||||
type TermQueryScorer struct {
|
||||
queryTerm []byte
|
||||
queryField string
|
||||
|
@ -36,6 +45,21 @@ type TermQueryScorer struct {
|
|||
queryWeightExplanation *search.Explanation
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
|
||||
len(s.queryTerm) + len(s.queryField)
|
||||
|
||||
if s.idfExplanation != nil {
|
||||
sizeInBytes += s.idfExplanation.Size()
|
||||
}
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
|
||||
rv := TermQueryScorer{
|
||||
queryTerm: queryTerm,
|
||||
|
|
|
@ -16,11 +16,26 @@ package search
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocumentMatch int
|
||||
var reflectStaticSizeSearchContext int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
func init() {
|
||||
var dm DocumentMatch
|
||||
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
|
||||
var sc SearchContext
|
||||
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
|
||||
var l Location
|
||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||
}
|
||||
|
||||
type ArrayPositions []uint64
|
||||
|
||||
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
|
||||
|
@ -47,6 +62,11 @@ type Location struct {
|
|||
ArrayPositions ArrayPositions `json:"array_positions"`
|
||||
}
|
||||
|
||||
func (l *Location) Size() int {
|
||||
return reflectStaticSizeLocation + size.SizeOfPtr +
|
||||
len(l.ArrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
type Locations []*Location
|
||||
|
||||
type TermLocationMap map[string]Locations
|
||||
|
@ -117,6 +137,52 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
|
|||
return dm
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
|
||||
len(dm.Index) +
|
||||
len(dm.ID) +
|
||||
len(dm.IndexInternalID)
|
||||
|
||||
if dm.Expl != nil {
|
||||
sizeInBytes += dm.Expl.Size()
|
||||
}
|
||||
|
||||
for k, v := range dm.Locations {
|
||||
sizeInBytes += size.SizeOfString + len(k)
|
||||
for k1, v1 := range v {
|
||||
sizeInBytes += size.SizeOfString + len(k1) +
|
||||
size.SizeOfSlice
|
||||
for _, entry := range v1 {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range dm.Fragments {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfSlice
|
||||
|
||||
for _, entry := range v {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
}
|
||||
|
||||
for _, entry := range dm.Sort {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
for k, _ := range dm.Fields {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr
|
||||
}
|
||||
|
||||
if dm.Document != nil {
|
||||
sizeInBytes += dm.Document.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) String() string {
|
||||
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
|
||||
}
|
||||
|
@ -135,6 +201,7 @@ type Searcher interface {
|
|||
SetQueryNorm(float64)
|
||||
Count() uint64
|
||||
Min() int
|
||||
Size() int
|
||||
|
||||
DocumentMatchPoolSize() int
|
||||
}
|
||||
|
@ -148,3 +215,18 @@ type SearcherOptions struct {
|
|||
type SearchContext struct {
|
||||
DocumentMatchPool *DocumentMatchPool
|
||||
}
|
||||
|
||||
func (sc *SearchContext) Size() int {
|
||||
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
|
||||
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
|
||||
|
||||
if sc.DocumentMatchPool != nil {
|
||||
for _, entry := range sc.DocumentMatchPool.avail {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
|
|
@ -16,12 +16,21 @@ package searcher
|
|||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeBooleanSearcher int
|
||||
|
||||
func init() {
|
||||
var bs BooleanSearcher
|
||||
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
|
||||
}
|
||||
|
||||
type BooleanSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
mustSearcher search.Searcher
|
||||
|
@ -52,6 +61,33 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
|
|||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *BooleanSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
|
||||
if s.mustSearcher != nil {
|
||||
sizeInBytes += s.mustSearcher.Size()
|
||||
}
|
||||
|
||||
if s.shouldSearcher != nil {
|
||||
sizeInBytes += s.shouldSearcher.Size()
|
||||
}
|
||||
|
||||
if s.mustNotSearcher != nil {
|
||||
sizeInBytes += s.mustNotSearcher.Size()
|
||||
}
|
||||
|
||||
sizeInBytes += s.scorer.Size()
|
||||
|
||||
for _, entry := range s.matches {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *BooleanSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
|
|
|
@ -16,13 +16,22 @@ package searcher
|
|||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConjunctionSearcher int
|
||||
|
||||
func init() {
|
||||
var cs ConjunctionSearcher
|
||||
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
|
||||
}
|
||||
|
||||
type ConjunctionSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
searchers OrderedSearcherList
|
||||
|
@ -54,6 +63,23 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
|
|||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
|
||||
s.scorer.Size()
|
||||
|
||||
for _, entry := range s.searchers {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range s.currs {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
|
|
|
@ -17,13 +17,22 @@ package searcher
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDisjunctionSearcher int
|
||||
|
||||
func init() {
|
||||
var ds DisjunctionSearcher
|
||||
reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size())
|
||||
}
|
||||
|
||||
// DisjunctionMaxClauseCount is a compile time setting that applications can
|
||||
// adjust to non-zero value to cause the DisjunctionSearcher to return an
|
||||
// error instead of exeucting searches when the size exceeds this value.
|
||||
|
@ -90,6 +99,32 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
|
|||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *DisjunctionSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.scorer.Size()
|
||||
|
||||
for _, entry := range s.searchers {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range s.currs {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
for _, entry := range s.matching {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *DisjunctionSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
|
|
|
@ -15,11 +15,21 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocIDSearcher int
|
||||
|
||||
func init() {
|
||||
var ds DocIDSearcher
|
||||
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
|
||||
}
|
||||
|
||||
// DocIDSearcher returns documents matching a predefined set of identifiers.
|
||||
type DocIDSearcher struct {
|
||||
reader index.DocIDReader
|
||||
|
@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Size() int {
|
||||
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
|
||||
s.reader.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Count() uint64 {
|
||||
return uint64(s.count)
|
||||
}
|
||||
|
|
|
@ -15,10 +15,20 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeFilteringSearcher int
|
||||
|
||||
func init() {
|
||||
var fs FilteringSearcher
|
||||
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
|
||||
}
|
||||
|
||||
// FilterFunc defines a function which can filter documents
|
||||
// returning true means keep the document
|
||||
// returning false means do not keep the document
|
||||
|
@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
|
|||
}
|
||||
}
|
||||
|
||||
func (f *FilteringSearcher) Size() int {
|
||||
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
|
||||
f.child.Size()
|
||||
}
|
||||
|
||||
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
next, err := f.child.Next(ctx)
|
||||
for next != nil && err == nil {
|
||||
|
|
|
@ -15,11 +15,21 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeMatchAllSearcher int
|
||||
|
||||
func init() {
|
||||
var mas MatchAllSearcher
|
||||
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
|
||||
}
|
||||
|
||||
type MatchAllSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
reader index.DocIDReader
|
||||
|
@ -46,6 +56,13 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *MatchAllSearcher) Size() int {
|
||||
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.reader.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *MatchAllSearcher) Count() uint64 {
|
||||
return s.count
|
||||
}
|
||||
|
|
|
@ -15,10 +15,20 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeMatchNoneSearcher int
|
||||
|
||||
func init() {
|
||||
var mns MatchNoneSearcher
|
||||
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
|
||||
}
|
||||
|
||||
type MatchNoneSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
}
|
||||
|
@ -29,6 +39,11 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *MatchNoneSearcher) Size() int {
|
||||
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
}
|
||||
|
||||
func (s *MatchNoneSearcher) Count() uint64 {
|
||||
return uint64(0)
|
||||
}
|
||||
|
|
|
@ -17,11 +17,20 @@ package searcher
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizePhraseSearcher int
|
||||
|
||||
func init() {
|
||||
var ps PhraseSearcher
|
||||
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
|
||||
}
|
||||
|
||||
type PhraseSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
mustSearcher *ConjunctionSearcher
|
||||
|
@ -32,6 +41,28 @@ type PhraseSearcher struct {
|
|||
initialized bool
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
|
||||
if s.mustSearcher != nil {
|
||||
sizeInBytes += s.mustSearcher.Size()
|
||||
}
|
||||
|
||||
if s.currMust != nil {
|
||||
sizeInBytes += s.currMust.Size()
|
||||
}
|
||||
|
||||
for _, entry := range s.terms {
|
||||
sizeInBytes += size.SizeOfSlice
|
||||
for _, entry1 := range entry {
|
||||
sizeInBytes += size.SizeOfString + len(entry1)
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
|
||||
// turn flat terms []string into [][]string
|
||||
mterms := make([][]string, len(terms))
|
||||
|
|
|
@ -15,11 +15,21 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermSearcher int
|
||||
|
||||
func init() {
|
||||
var ts TermSearcher
|
||||
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
|
||||
}
|
||||
|
||||
type TermSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
reader index.TermFieldReader
|
||||
|
@ -63,6 +73,14 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Size() int {
|
||||
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.reader.Size() +
|
||||
s.tfd.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Count() uint64 {
|
||||
return s.reader.Count()
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package size
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
)
|
||||
|
||||
func init() {
|
||||
var a bool
|
||||
SizeOfBool = int(reflect.TypeOf(a).Size())
|
||||
var b float32
|
||||
SizeOfFloat32 = int(reflect.TypeOf(b).Size())
|
||||
var c float64
|
||||
SizeOfFloat64 = int(reflect.TypeOf(c).Size())
|
||||
var d map[int]int
|
||||
SizeOfMap = int(reflect.TypeOf(d).Size())
|
||||
var e *int
|
||||
SizeOfPtr = int(reflect.TypeOf(e).Size())
|
||||
var f []int
|
||||
SizeOfSlice = int(reflect.TypeOf(f).Size())
|
||||
var g string
|
||||
SizeOfString = int(reflect.TypeOf(g).Size())
|
||||
var h uint8
|
||||
SizeOfUint8 = int(reflect.TypeOf(h).Size())
|
||||
var i uint16
|
||||
SizeOfUint16 = int(reflect.TypeOf(i).Size())
|
||||
var j uint32
|
||||
SizeOfUint32 = int(reflect.TypeOf(j).Size())
|
||||
var k uint64
|
||||
SizeOfUint64 = int(reflect.TypeOf(k).Size())
|
||||
}
|
||||
|
||||
var SizeOfBool int
|
||||
var SizeOfFloat32 int
|
||||
var SizeOfFloat64 int
|
||||
var SizeOfInt int
|
||||
var SizeOfMap int
|
||||
var SizeOfPtr int
|
||||
var SizeOfSlice int
|
||||
var SizeOfString int
|
||||
var SizeOfUint8 int
|
||||
var SizeOfUint16 int
|
||||
var SizeOfUint32 int
|
||||
var SizeOfUint64 int
|
Loading…
Reference in New Issue