From 7e36109b3c83f2e454805e709bd850e859574244 Mon Sep 17 00:00:00 2001 From: abhinavdangeti Date: Thu, 1 Mar 2018 17:12:16 -0800 Subject: [PATCH] MB-28162: Provide API to estimate memory needed to run a search query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This API (unexported) will estimate the amount of memory needed to execute a search query over an index before the collector begins data collection. Sample estimates for certain queries: {Size: 10, BenchmarkUpsidedownSearchOverhead} ESTIMATE BENCHMEM TermQuery 4616 4796 MatchQuery 5210 5405 DisjunctionQuery (Match queries) 7700 8447 DisjunctionQuery (Term queries) 6514 6591 ConjunctionQuery (Match queries) 7524 8175 Nested disjunction query (disjunction of disjunctions) 10306 10708 … --- document/document.go | 21 +++++- document/field_composite.go | 9 +++ index/index.go | 35 ++++++++++ index/scorch/scorch.go | 10 +-- index/scorch/segment/empty.go | 12 ++++ index/scorch/segment/mem/build.go | 2 +- index/scorch/segment/mem/dict.go | 20 ++++++ index/scorch/segment/mem/posting.go | 62 ++++++++++++++++++ index/scorch/segment/mem/segment.go | 83 ++++++++++++------------ index/scorch/segment/mem/segment_test.go | 2 +- index/scorch/segment/segment.go | 15 +++-- index/scorch/segment/zap/contentcoder.go | 8 +++ index/scorch/segment/zap/docvalues.go | 31 +++++---- index/scorch/segment/zap/posting.go | 79 ++++++++++++++++++++++ index/scorch/segment/zap/segment.go | 78 +++++++++++----------- index/scorch/snapshot_index.go | 7 ++ index/scorch/snapshot_index_doc.go | 13 ++++ index/scorch/snapshot_index_tfr.go | 30 +++++++++ index/scorch/snapshot_segment.go | 4 +- index/upsidedown/index_reader.go | 14 ++++ index/upsidedown/reader.go | 39 ++++++++++- index/upsidedown/row.go | 29 +++++++++ index_impl.go | 53 ++++++++++++++- index_test.go | 55 ++++++++++++++++ search.go | 30 +++++++++ search/collector/search_test.go | 18 +++++ search/collector/topn.go | 25 +++++++ search/explanation.go | 21 ++++++ search/facet/facet_builder_datetime.go | 29 +++++++++ search/facet/facet_builder_numeric.go | 29 +++++++++ search/facet/facet_builder_terms.go | 21 ++++++ search/facets_builder.go | 47 ++++++++++++++ search/pool.go | 11 ++++ search/scorer/scorer_conjunction.go | 14 ++++ search/scorer/scorer_constant.go | 19 ++++++ search/scorer/scorer_disjunction.go | 13 ++++ search/scorer/scorer_term.go | 24 +++++++ search/search.go | 82 +++++++++++++++++++++++ search/searcher/search_boolean.go | 36 ++++++++++ search/searcher/search_conjunction.go | 26 ++++++++ search/searcher/search_disjunction.go | 35 ++++++++++ search/searcher/search_docid.go | 16 +++++ search/searcher/search_filter.go | 15 +++++ search/searcher/search_match_all.go | 17 +++++ search/searcher/search_match_none.go | 15 +++++ search/searcher/search_phrase.go | 31 +++++++++ search/searcher/search_term.go | 18 +++++ size/sizes.go | 57 ++++++++++++++++ 48 files changed, 1242 insertions(+), 118 deletions(-) create mode 100644 size/sizes.go diff --git a/document/document.go b/document/document.go index c37585c6..921098b0 100644 --- a/document/document.go +++ b/document/document.go @@ -14,7 +14,19 @@ package document -import "fmt" +import ( + "fmt" + "reflect" + + "github.com/blevesearch/bleve/size" +) + +var reflectStaticSizeDocument int + +func init() { + var d Document + reflectStaticSizeDocument = int(reflect.TypeOf(d).Size()) +} type Document struct { ID string `json:"id"` @@ -30,6 +42,13 @@ func NewDocument(id string) *Document { } } +func (d *Document) Size() int { + return reflectStaticSizeDocument + size.SizeOfPtr + + len(d.ID) + + len(d.Fields)*size.SizeOfPtr + + len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField) +} + func (d *Document) AddField(f Field) *Document { switch f := f.(type) { case *CompositeField: diff --git a/document/field_composite.go b/document/field_composite.go index b41b1b8e..e53cd456 100644 --- a/document/field_composite.go +++ b/document/field_composite.go @@ -15,9 +15,18 @@ package document import ( + "reflect" + "github.com/blevesearch/bleve/analysis" ) +var reflectStaticSizeCompositeField int + +func init() { + var cf CompositeField + reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size()) +} + const DefaultCompositeIndexingOptions = IndexField type CompositeField struct { diff --git a/index/index.go b/index/index.go index 9870b417..c25d7fa4 100644 --- a/index/index.go +++ b/index/index.go @@ -18,11 +18,23 @@ import ( "bytes" "encoding/json" "fmt" + "reflect" "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeTermFieldDoc int +var reflectStaticSizeTermFieldVector int + +func init() { + var tfd TermFieldDoc + reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size()) + var tfv TermFieldVector + reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size()) +} + var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") type Index interface { @@ -82,6 +94,8 @@ type IndexReader interface { DumpFields() chan interface{} Close() error + + Size() int } // FieldTerms contains the terms used by a document, keyed by field @@ -115,6 +129,11 @@ type TermFieldVector struct { End uint64 } +func (tfv *TermFieldVector) Size() int { + return reflectStaticSizeTermFieldVector + size.SizeOfPtr + + len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64 +} + // IndexInternalID is an opaque document identifier interal to the index impl type IndexInternalID []byte @@ -134,6 +153,17 @@ type TermFieldDoc struct { Vectors []*TermFieldVector } +func (tfd *TermFieldDoc) Size() int { + sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr + + len(tfd.Term) + len(tfd.ID) + + for _, entry := range tfd.Vectors { + sizeInBytes += entry.Size() + } + + return sizeInBytes +} + // Reset allows an already allocated TermFieldDoc to be reused func (tfd *TermFieldDoc) Reset() *TermFieldDoc { // remember the []byte used for the ID @@ -161,6 +191,8 @@ type TermFieldReader interface { // Count returns the number of documents contains the term in this field. Count() uint64 Close() error + + Size() int } type DictEntry struct { @@ -185,6 +217,9 @@ type DocIDReader interface { // will start there instead. If ID is greater than or equal to the end of // the range, Next() call will return io.EOF. Advance(ID IndexInternalID) (IndexInternalID, error) + + Size() int + Close() error } diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index a40f374a..2a9eb634 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -472,20 +472,20 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) { } func (s *Scorch) MemoryUsed() uint64 { - var memUsed uint64 + var memUsed int s.rootLock.RLock() if s.root != nil { for _, segmentSnapshot := range s.root.segment { memUsed += 8 /* size of id -> uint64 */ + - segmentSnapshot.segment.SizeInBytes() + segmentSnapshot.segment.Size() if segmentSnapshot.deleted != nil { - memUsed += segmentSnapshot.deleted.GetSizeInBytes() + memUsed += int(segmentSnapshot.deleted.GetSizeInBytes()) } - memUsed += segmentSnapshot.cachedDocs.sizeInBytes() + memUsed += segmentSnapshot.cachedDocs.size() } } s.rootLock.RUnlock() - return memUsed + return uint64(memUsed) } func (s *Scorch) markIneligibleForRemoval(filename string) { diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go index 83454644..6c19f60f 100644 --- a/index/scorch/segment/empty.go +++ b/index/scorch/segment/empty.go @@ -46,6 +46,10 @@ func (e *EmptySegment) Close() error { return nil } +func (e *EmptySegment) Size() uint64 { + return 0 +} + func (e *EmptySegment) AddRef() { } @@ -84,6 +88,10 @@ func (e *EmptyPostingsList) Iterator() PostingsIterator { return &EmptyPostingsIterator{} } +func (e *EmptyPostingsList) Size() int { + return 0 +} + func (e *EmptyPostingsList) Count() uint64 { return 0 } @@ -93,3 +101,7 @@ type EmptyPostingsIterator struct{} func (e *EmptyPostingsIterator) Next() (Posting, error) { return nil, nil } + +func (e *EmptyPostingsIterator) Size() int { + return 0 +} diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go index 57971aae..264c94d1 100644 --- a/index/scorch/segment/mem/build.go +++ b/index/scorch/segment/mem/build.go @@ -45,7 +45,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment { } // compute memory usage of segment - s.updateSizeInBytes() + s.updateSize() // professional debugging // diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go index b564ed1f..9f5a873a 100644 --- a/index/scorch/segment/mem/dict.go +++ b/index/scorch/segment/mem/dict.go @@ -15,14 +15,23 @@ package mem import ( + "reflect" "sort" "strings" "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeDictionary int + +func init() { + var d Dictionary + reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size()) +} + // Dictionary is the in-memory representation of the term dictionary type Dictionary struct { segment *Segment @@ -30,6 +39,17 @@ type Dictionary struct { fieldID uint16 } +func (d *Dictionary) Size() int { + sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr + + len(d.field) + + if d.segment != nil { + sizeInBytes += int(d.segment.Size()) + } + + return sizeInBytes +} + // PostingsList returns the postings list for the specified term func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) { diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go index 2554333a..4203acbe 100644 --- a/index/scorch/segment/mem/posting.go +++ b/index/scorch/segment/mem/posting.go @@ -15,10 +15,29 @@ package mem import ( + "reflect" + "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizePostingsList int +var reflectStaticSizePostingsIterator int +var reflectStaticSizePosting int +var reflectStaticSizeLocation int + +func init() { + var pl PostingsList + reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size()) + var pi PostingsIterator + reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size()) + var p Posting + reflectStaticSizePosting = int(reflect.TypeOf(p).Size()) + var l Location + reflectStaticSizeLocation = int(reflect.TypeOf(l).Size()) +} + // PostingsList is an in-memory represenation of a postings list type PostingsList struct { dictionary *Dictionary @@ -27,6 +46,20 @@ type PostingsList struct { except *roaring.Bitmap } +func (p *PostingsList) Size() int { + sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr + + if p.dictionary != nil { + sizeInBytes += p.dictionary.Size() + } + + if p.except != nil { + sizeInBytes += int(p.except.GetSizeInBytes()) + } + + return sizeInBytes +} + // Count returns the number of items on this postings list func (p *PostingsList) Count() uint64 { var rv uint64 @@ -83,6 +116,16 @@ type PostingsIterator struct { reuse Posting } +func (i *PostingsIterator) Size() int { + sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr + + if i.locations != nil { + sizeInBytes += int(i.locations.GetSizeInBytes()) + } + + return sizeInBytes +} + // Next returns the next posting on the postings list, or nil at the end func (i *PostingsIterator) Next() (segment.Posting, error) { if i.actual == nil || !i.actual.HasNext() { @@ -121,6 +164,16 @@ type Posting struct { hasLoc bool } +func (p *Posting) Size() int { + sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr + + if p.iterator != nil { + sizeInBytes += p.iterator.Size() + } + + return sizeInBytes +} + // Number returns the document number of this posting in this segment func (p *Posting) Number() uint64 { return p.docNum @@ -158,6 +211,15 @@ type Location struct { offset int } +func (l *Location) Size() int { + sizeInBytes := reflectStaticSizeLocation + if l.p != nil { + sizeInBytes += l.p.Size() + } + + return sizeInBytes +} + // Field returns the name of the field (useful in composite fields to know // which original field the value came from) func (l *Location) Field() string { diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go index 04bdb368..e9c4a273 100644 --- a/index/scorch/segment/mem/segment.go +++ b/index/scorch/segment/mem/segment.go @@ -16,11 +16,20 @@ package mem import ( "fmt" + "reflect" "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeSegment int + +func init() { + var s Segment + reflectStaticSizeSegment = int(reflect.TypeOf(s).Size()) +} + // _id field is always guaranteed to have fieldID of 0 const idFieldID uint16 = 0 @@ -96,7 +105,7 @@ type Segment struct { // Footprint of the segment, updated when analyzed document mutations // are added into the segment - sizeInBytes uint64 + sizeInBytes int } // New builds a new empty Segment @@ -107,99 +116,87 @@ func New() *Segment { } } -func (s *Segment) updateSizeInBytes() { - var sizeInBytes uint64 +func (s *Segment) updateSize() { + sizeInBytes := reflectStaticSizeSegment // FieldsMap, FieldsInv for k, _ := range s.FieldsMap { - sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + - 2 /* size of uint16 */) + sizeInBytes += (len(k)+size.SizeOfString)*2 + + size.SizeOfUint16 } - // overhead from the data structures - sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice) // Dicts, DictKeys for _, entry := range s.Dicts { for k, _ := range entry { - sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + - 8 /* size of uint64 */) + sizeInBytes += (len(k)+size.SizeOfString)*2 + + size.SizeOfUint64 } // overhead from the data structures - sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice) + sizeInBytes += (size.SizeOfMap + size.SizeOfSlice) } - sizeInBytes += (segment.SizeOfSlice * 2) // Postings, PostingsLocs for i := 0; i < len(s.Postings); i++ { - sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) + - (s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer) + sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) + + (int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr) } - sizeInBytes += (segment.SizeOfSlice * 2) // Freqs, Norms for i := 0; i < len(s.Freqs); i++ { - sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ + - len(s.Norms[i])*4 /* size of float32 */) + - (segment.SizeOfSlice * 2) + sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 + + len(s.Norms[i])*size.SizeOfFloat32) + + (size.SizeOfSlice * 2) } - sizeInBytes += (segment.SizeOfSlice * 2) // Location data for i := 0; i < len(s.Locfields); i++ { - sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ + - len(s.Locstarts[i])*8 /* size of uint64 */ + - len(s.Locends[i])*8 /* size of uint64 */ + - len(s.Locpos[i])*8 /* size of uint64 */) + sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 + + len(s.Locstarts[i])*size.SizeOfUint64 + + len(s.Locends[i])*size.SizeOfUint64 + + len(s.Locpos[i])*size.SizeOfUint64 for j := 0; j < len(s.Locarraypos[i]); j++ { - sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) + - segment.SizeOfSlice + sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 + + size.SizeOfSlice } - sizeInBytes += (segment.SizeOfSlice * 5) + sizeInBytes += (size.SizeOfSlice * 5) } - sizeInBytes += (segment.SizeOfSlice * 5) // Stored data for i := 0; i < len(s.Stored); i++ { for _, v := range s.Stored[i] { - sizeInBytes += uint64(2 /* size of uint16 */) + sizeInBytes += size.SizeOfUint16 for _, arr := range v { - sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice + sizeInBytes += len(arr) + size.SizeOfSlice } - sizeInBytes += segment.SizeOfSlice + sizeInBytes += size.SizeOfSlice } for _, v := range s.StoredTypes[i] { - sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice + sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice } for _, v := range s.StoredPos[i] { - sizeInBytes += uint64(2 /* size of uint16 */) + sizeInBytes += size.SizeOfUint16 for _, arr := range v { - sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) + - segment.SizeOfSlice + sizeInBytes += len(arr)*size.SizeOfUint64 + + size.SizeOfSlice } - sizeInBytes += segment.SizeOfSlice + sizeInBytes += size.SizeOfSlice } // overhead from map(s) within Stored, StoredTypes, StoredPos - sizeInBytes += (segment.SizeOfMap * 3) + sizeInBytes += (size.SizeOfMap * 3) } - // overhead from data structures: Stored, StoredTypes, StoredPos - sizeInBytes += (segment.SizeOfSlice * 3) // DocValueFields - sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) + - segment.SizeOfMap - - // SizeInBytes - sizeInBytes += uint64(8) + sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool) s.sizeInBytes = sizeInBytes } -func (s *Segment) SizeInBytes() uint64 { +func (s *Segment) Size() int { return s.sizeInBytes } diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go index 5e3818c2..6c5625d8 100644 --- a/index/scorch/segment/mem/segment_test.go +++ b/index/scorch/segment/mem/segment_test.go @@ -169,7 +169,7 @@ func TestSingle(t *testing.T) { t.Fatalf("segment nil, not expected") } - if segment.SizeInBytes() <= 0 { + if segment.Size() <= 0 { t.Fatalf("segment size not updated") } diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go index d5435ab9..8eee5f75 100644 --- a/index/scorch/segment/segment.go +++ b/index/scorch/segment/segment.go @@ -19,12 +19,6 @@ import ( "github.com/blevesearch/bleve/index" ) -// Overhead from go data structures when deployed on a 64-bit system. -const SizeOfMap uint64 = 8 -const SizeOfPointer uint64 = 8 -const SizeOfSlice uint64 = 24 -const SizeOfString uint64 = 16 - // DocumentFieldValueVisitor defines a callback to be visited for each // stored field value. The return value determines if the visitor // should keep going. Returning true continues visiting, false stops. @@ -42,7 +36,7 @@ type Segment interface { Close() error - SizeInBytes() uint64 + Size() int AddRef() DecRef() error @@ -63,6 +57,8 @@ type DictionaryIterator interface { type PostingsList interface { Iterator() PostingsIterator + Size() int + Count() uint64 // NOTE deferred for future work @@ -77,6 +73,8 @@ type PostingsIterator interface { // implementations may return a shared instance to reduce memory // allocations. Next() (Posting, error) + + Size() int } type Posting interface { @@ -86,6 +84,8 @@ type Posting interface { Norm() float64 Locations() []Location + + Size() int } type Location interface { @@ -94,6 +94,7 @@ type Location interface { End() uint64 Pos() uint64 ArrayPositions() []uint64 + Size() int } // DocumentFieldTermVisitable is implemented by various scorch segment diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go index 83457146..933f10a1 100644 --- a/index/scorch/segment/zap/contentcoder.go +++ b/index/scorch/segment/zap/contentcoder.go @@ -18,10 +18,18 @@ import ( "bytes" "encoding/binary" "io" + "reflect" "github.com/golang/snappy" ) +var reflectStaticSizeMetaData int + +func init() { + var md MetaData + reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size()) +} + var termSeparator byte = 0xff var termSeparatorSplitSlice = []byte{termSeparator} diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go index 0514bd30..13635c57 100644 --- a/index/scorch/segment/zap/docvalues.go +++ b/index/scorch/segment/zap/docvalues.go @@ -19,13 +19,21 @@ import ( "encoding/binary" "fmt" "math" + "reflect" "sort" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" "github.com/golang/snappy" ) +var reflectStaticSizedocValueIterator int + +func init() { + var dvi docValueIterator + reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size()) +} + type docValueIterator struct { field string curChunkNum uint64 @@ -36,21 +44,12 @@ type docValueIterator struct { curChunkData []byte // compressed data cache } -func (di *docValueIterator) sizeInBytes() uint64 { - // curChunkNum, numChunks, dvDataLoc --> uint64 - sizeInBytes := 24 - - // field - sizeInBytes += (len(di.field) + int(segment.SizeOfString)) - - // chunkLens, curChunkHeader - sizeInBytes += len(di.chunkLens)*8 + - len(di.curChunkHeader)*24 + - int(segment.SizeOfSlice*2) /* overhead from slices */ - - // curChunkData is mmap'ed, not included - - return uint64(sizeInBytes) +func (di *docValueIterator) size() int { + return reflectStaticSizedocValueIterator + size.SizeOfPtr + + len(di.field) + + len(di.chunkLens)*size.SizeOfUint64 + + len(di.curChunkHeader)*reflectStaticSizeMetaData + + len(di.curChunkData) } func (di *docValueIterator) fieldName() string { diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go index 589c7cb8..e9c68cba 100644 --- a/index/scorch/segment/zap/posting.go +++ b/index/scorch/segment/zap/posting.go @@ -19,12 +19,30 @@ import ( "encoding/binary" "fmt" "math" + "reflect" "github.com/RoaringBitmap/roaring" "github.com/Smerity/govarint" "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizePostingsList int +var reflectStaticSizePostingsIterator int +var reflectStaticSizePosting int +var reflectStaticSizeLocation int + +func init() { + var pl PostingsList + reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size()) + var pi PostingsIterator + reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size()) + var p Posting + reflectStaticSizePosting = int(reflect.TypeOf(p).Size()) + var l Location + reflectStaticSizeLocation = int(reflect.TypeOf(l).Size()) +} + // PostingsList is an in-memory represenation of a postings list type PostingsList struct { sb *SegmentBase @@ -36,6 +54,28 @@ type PostingsList struct { except *roaring.Bitmap } +func (p *PostingsList) Size() int { + sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr + + if p.sb != nil { + sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part + } + + if p.locBitmap != nil { + sizeInBytes += int(p.locBitmap.GetSizeInBytes()) + } + + if p.postings != nil { + sizeInBytes += int(p.postings.GetSizeInBytes()) + } + + if p.except != nil { + sizeInBytes += int(p.except.GetSizeInBytes()) + } + + return sizeInBytes +} + // Iterator returns an iterator for this postings list func (p *PostingsList) Iterator() segment.PostingsIterator { return p.iterator(nil) @@ -193,6 +233,25 @@ type PostingsIterator struct { nextLocs []Location // reused across Next() calls } +func (i *PostingsIterator) Size() int { + sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr + + len(i.currChunkFreqNorm) + + len(i.currChunkLoc) + + len(i.freqChunkLens)*size.SizeOfUint64 + + len(i.locChunkLens)*size.SizeOfUint64 + + i.next.Size() + + if i.locBitmap != nil { + sizeInBytes += int(i.locBitmap.GetSizeInBytes()) + } + + for _, entry := range i.nextLocs { + sizeInBytes += entry.Size() + } + + return sizeInBytes +} + func (i *PostingsIterator) loadChunk(chunk int) error { if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) { return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens)) @@ -444,6 +503,20 @@ type Posting struct { locs []segment.Location } +func (p *Posting) Size() int { + sizeInBytes := reflectStaticSizePosting + + if p.iterator != nil { + sizeInBytes += p.iterator.Size() + } + + for _, entry := range p.locs { + sizeInBytes += entry.Size() + } + + return sizeInBytes +} + // Number returns the document number of this posting in this segment func (p *Posting) Number() uint64 { return p.docNum @@ -473,6 +546,12 @@ type Location struct { ap []uint64 } +func (l *Location) Size() int { + return reflectStaticSizeLocation + + len(l.field) + + len(l.ap)*size.SizeOfUint64 +} + // Field returns the name of the field (useful in composite fields to know // which original field the value came from) func (l *Location) Field() string { diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go index 40c0af27..972b7578 100644 --- a/index/scorch/segment/zap/segment.go +++ b/index/scorch/segment/zap/segment.go @@ -20,16 +20,25 @@ import ( "fmt" "io" "os" + "reflect" "sync" "github.com/RoaringBitmap/roaring" "github.com/Smerity/govarint" "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" "github.com/couchbase/vellum" mmap "github.com/edsrzf/mmap-go" "github.com/golang/snappy" ) +var reflectStaticSizeSegmentBase int + +func init() { + var sb SegmentBase + reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size()) +} + // Open returns a zap impl of a segment func Open(path string) (segment.Segment, error) { f, err := os.Open(path) @@ -92,6 +101,32 @@ type SegmentBase struct { fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field } +func (sb *SegmentBase) Size() int { + sizeInBytes := reflectStaticSizeSegmentBase + + len(sb.mem) + + // fieldsMap + for k, _ := range sb.fieldsMap { + sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16 + } + + // fieldsInv, dictLocs + for _, entry := range sb.fieldsInv { + sizeInBytes += len(entry) + size.SizeOfString + } + sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64 + + // fieldDvIterMap + for _, v := range sb.fieldDvIterMap { + sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr + if v != nil { + sizeInBytes += v.size() + } + } + + return sizeInBytes +} + func (sb *SegmentBase) AddRef() {} func (sb *SegmentBase) DecRef() (err error) { return nil } func (sb *SegmentBase) Close() (err error) { return nil } @@ -111,56 +146,19 @@ type Segment struct { refs int64 } -func (s *Segment) SizeInBytes() uint64 { +func (s *Segment) Size() int { // 8 /* size of file pointer */ // 4 /* size of version -> uint32 */ // 4 /* size of crc -> uint32 */ sizeOfUints := 16 - sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints + sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints // mutex, refs -> int64 sizeInBytes += 16 // do not include the mmap'ed part - return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem)) -} - -func (s *SegmentBase) SizeInBytes() uint64 { - // 4 /* size of memCRC -> uint32 */ - // 4 /* size of chunkFactor -> uint32 */ - // 8 /* size of numDocs -> uint64 */ - // 8 /* size of storedIndexOffset -> uint64 */ - // 8 /* size of fieldsIndexOffset -> uint64 */ - // 8 /* size of docValueOffset -> uint64 */ - sizeInBytes := 40 - - sizeInBytes += len(s.mem) + int(segment.SizeOfSlice) - - // fieldsMap - for k, _ := range s.fieldsMap { - sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */ - } - sizeInBytes += int(segment.SizeOfMap) /* overhead from map */ - - // fieldsInv, dictLocs - for _, entry := range s.fieldsInv { - sizeInBytes += (len(entry) + int(segment.SizeOfString)) - } - sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */ - sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */ - - // fieldDvIterMap - sizeInBytes += len(s.fieldDvIterMap) * - int(segment.SizeOfPointer+2 /* size of uint16 */) - for _, entry := range s.fieldDvIterMap { - if entry != nil { - sizeInBytes += int(entry.sizeInBytes()) - } - } - sizeInBytes += int(segment.SizeOfMap) - - return uint64(sizeInBytes) + return sizeInBytes + s.SegmentBase.Size() - len(s.mem) } func (s *Segment) AddRef() { diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 5289b143..9394f391 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -27,6 +27,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" ) type asynchSegmentResult struct { @@ -89,6 +90,12 @@ func (i *IndexSnapshot) Close() error { return i.DecRef() } +func (i *IndexSnapshot) Size() int { + // Just return the size of the pointer for estimating the overhead + // during Search, a reference of the IndexSnapshot serves as the reader. + return size.SizeOfPtr +} + func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { results := make(chan *asynchSegmentResult) diff --git a/index/scorch/snapshot_index_doc.go b/index/scorch/snapshot_index_doc.go index d1205ff8..27da2086 100644 --- a/index/scorch/snapshot_index_doc.go +++ b/index/scorch/snapshot_index_doc.go @@ -16,17 +16,30 @@ package scorch import ( "bytes" + "reflect" "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeIndexSnapshotDocIDReader int + +func init() { + var isdr IndexSnapshotDocIDReader + reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size()) +} + type IndexSnapshotDocIDReader struct { snapshot *IndexSnapshot iterators []roaring.IntIterable segmentOffset int } +func (i *IndexSnapshotDocIDReader) Size() int { + return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr +} + func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) { for i.segmentOffset < len(i.iterators) { if !i.iterators[i.segmentOffset].HasNext() { diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index d1f23b27..e1a0e9a5 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -16,12 +16,21 @@ package scorch import ( "bytes" + "reflect" "sync/atomic" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeIndexSnapshotTermFieldReader int + +func init() { + var istfr IndexSnapshotTermFieldReader + reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size()) +} + type IndexSnapshotTermFieldReader struct { term []byte field string @@ -36,6 +45,27 @@ type IndexSnapshotTermFieldReader struct { currID index.IndexInternalID } +func (i *IndexSnapshotTermFieldReader) Size() int { + sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr + + len(i.term) + + len(i.field) + + len(i.currID) + + for _, entry := range i.postings { + sizeInBytes += entry.Size() + } + + for _, entry := range i.iterators { + sizeInBytes += entry.Size() + } + + if i.currPosting != nil { + sizeInBytes += i.currPosting.Size() + } + + return sizeInBytes +} + func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { rv := preAlloced if rv == nil { diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index 5e64cb1f..cdfe317f 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -213,7 +213,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e return nil } -func (c *cachedDocs) sizeInBytes() uint64 { +func (c *cachedDocs) size() int { sizeInBytes := 0 c.m.Lock() for k, v := range c.cache { // cachedFieldDocs @@ -225,5 +225,5 @@ func (c *cachedDocs) sizeInBytes() uint64 { } } c.m.Unlock() - return uint64(sizeInBytes) + return sizeInBytes } diff --git a/index/upsidedown/index_reader.go b/index/upsidedown/index_reader.go index 77d523c3..4e575521 100644 --- a/index/upsidedown/index_reader.go +++ b/index/upsidedown/index_reader.go @@ -15,17 +15,31 @@ package upsidedown import ( + "reflect" + "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeIndexReader int + +func init() { + var ir IndexReader + reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size()) +} + type IndexReader struct { index *UpsideDownCouch kvreader store.KVReader docCount uint64 } +func (i *IndexReader) Size() int { + return reflectStaticSizeIndexReader + size.SizeOfPtr +} + func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) if fieldExists { diff --git a/index/upsidedown/reader.go b/index/upsidedown/reader.go index 1f40c02d..646d4d8a 100644 --- a/index/upsidedown/reader.go +++ b/index/upsidedown/reader.go @@ -16,13 +16,27 @@ package upsidedown import ( "bytes" + "reflect" "sort" "sync/atomic" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeUpsideDownCouchTermFieldReader int +var reflectStaticSizeUpsideDownCouchDocIDReader int + +func init() { + var tfr UpsideDownCouchTermFieldReader + reflectStaticSizeUpsideDownCouchTermFieldReader = + int(reflect.TypeOf(tfr).Size()) + var cdr UpsideDownCouchDocIDReader + reflectStaticSizeUpsideDownCouchDocIDReader = + int(reflect.TypeOf(cdr).Size()) +} + type UpsideDownCouchTermFieldReader struct { count uint64 indexReader *IndexReader @@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct { includeTermVectors bool } +func (r *UpsideDownCouchTermFieldReader) Size() int { + sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr + + len(r.term) + + r.tfrPrealloc.Size() + + len(r.keyBuf) + + if r.tfrNext != nil { + sizeInBytes += r.tfrNext.Size() + } + + return sizeInBytes +} + func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { bufNeeded := termFrequencyRowKeySize(term, nil) if bufNeeded < dictionaryRowKeySize(term) { @@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct { onlyMode bool } -func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { +func (r *UpsideDownCouchDocIDReader) Size() int { + sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader + + r.indexReader.Size() + for _, entry := range r.only { + sizeInBytes += size.SizeOfString + len(entry) + } + + return sizeInBytes +} + +func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { startBytes := []byte{0x0} endBytes := []byte{0xff} diff --git a/index/upsidedown/row.go b/index/upsidedown/row.go index 7e503ae0..ba50314c 100644 --- a/index/upsidedown/row.go +++ b/index/upsidedown/row.go @@ -20,10 +20,22 @@ import ( "fmt" "io" "math" + "reflect" + "github.com/blevesearch/bleve/size" "github.com/golang/protobuf/proto" ) +var reflectStaticSizeTermFrequencyRow int +var reflectStaticSizeTermVector int + +func init() { + var tfr TermFrequencyRow + reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size()) + var tv TermVector + reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size()) +} + const ByteSeparator byte = 0xff type UpsideDownCouchRowStream chan UpsideDownCouchRow @@ -358,6 +370,11 @@ type TermVector struct { end uint64 } +func (tv *TermVector) Size() int { + return reflectStaticSizeTermVector + size.SizeOfPtr + + len(tv.arrayPositions)*size.SizeOfUint64 +} + func (tv *TermVector) String() string { return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) } @@ -371,6 +388,18 @@ type TermFrequencyRow struct { field uint16 } +func (tfr *TermFrequencyRow) Size() int { + sizeInBytes := reflectStaticSizeTermFrequencyRow + + len(tfr.term) + + len(tfr.doc) + + for _, entry := range tfr.vectors { + sizeInBytes += entry.Size() + } + + return sizeInBytes +} + func (tfr *TermFrequencyRow) Term() []byte { return tfr.term } diff --git a/index_impl.go b/index_impl.go index caea1b8e..df6e748d 100644 --- a/index_impl.go +++ b/index_impl.go @@ -362,8 +362,59 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { return i.SearchInContext(context.Background(), req) } +// memNeededForSearch is a helper function that returns an estimate of RAM +// needed to execute a search request. +func memNeededForSearch(req *SearchRequest, + searcher search.Searcher, + topnCollector *collector.TopNCollector) uint64 { + + backingSize := req.Size + req.From + 1 + if req.Size+req.From > collector.PreAllocSizeSkipCap { + backingSize = collector.PreAllocSizeSkipCap + 1 + } + numDocMatches := backingSize + searcher.DocumentMatchPoolSize() + + estimate := 0 + + // overhead, size in bytes from collector + estimate += topnCollector.Size() + + var dm search.DocumentMatch + sizeOfDocumentMatch := dm.Size() + + // pre-allocing DocumentMatchPool + var sc search.SearchContext + estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch + + // searcher overhead + estimate += searcher.Size() + + // overhead from results, lowestMatchOutsideResults + estimate += (numDocMatches + 1) * sizeOfDocumentMatch + + // additional overhead from SearchResult + var sr SearchResult + estimate += sr.Size() + + // overhead from facet results + if req.Facets != nil { + var fr search.FacetResult + estimate += len(req.Facets) * fr.Size() + } + + // highlighting, store + var d document.Document + if len(req.Fields) > 0 || req.Highlight != nil { + for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits + estimate += (req.Size + req.From) * d.Size() + } + } + + return uint64(estimate) +} + // SearchInContext executes a search request operation within the provided -// Context. Returns a SearchResult object or an error. +// Context. Returns a SearchResult object or an error. func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { i.mutex.RLock() defer i.mutex.RUnlock() diff --git a/index_test.go b/index_test.go index a69357bf..f1e53647 100644 --- a/index_test.go +++ b/index_test.go @@ -36,6 +36,9 @@ import ( "github.com/blevesearch/bleve/mapping" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/query" + + "github.com/blevesearch/bleve/index/scorch" + "github.com/blevesearch/bleve/index/upsidedown" ) func TestCrud(t *testing.T) { @@ -1815,3 +1818,55 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) { t.Fatal(err) } } + +func benchmarkSearchOverhead(indexType string, b *testing.B) { + defer func() { + err := os.RemoveAll("testidx") + if err != nil { + b.Fatal(err) + } + }() + + index, err := NewUsing("testidx", NewIndexMapping(), + indexType, Config.DefaultKVStore, nil) + if err != nil { + b.Fatal(err) + } + defer func() { + err := index.Close() + if err != nil { + b.Fatal(err) + } + }() + + elements := []string{"air", "water", "fire", "earth"} + for j := 0; j < 10000; j++ { + err = index.Index(fmt.Sprintf("%d", j), + map[string]interface{}{"name": elements[j%len(elements)]}) + if err != nil { + b.Fatal(err) + } + } + + query1 := NewTermQuery("water") + query2 := NewTermQuery("fire") + query := NewDisjunctionQuery(query1, query2) + req := NewSearchRequest(query) + + b.ResetTimer() + + for n := 0; n < b.N; n++ { + _, err = index.Search(req) + if err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkUpsidedownSearchOverhead(b *testing.B) { + benchmarkSearchOverhead(upsidedown.Name, b) +} + +func BenchmarkScorchSearchOverhead(b *testing.B) { + benchmarkSearchOverhead(scorch.Name, b) +} diff --git a/search.go b/search.go index 46d849c1..e324262e 100644 --- a/search.go +++ b/search.go @@ -17,6 +17,7 @@ package bleve import ( "encoding/json" "fmt" + "reflect" "time" "github.com/blevesearch/bleve/analysis" @@ -24,8 +25,19 @@ import ( "github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/query" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeSearchResult int +var reflectStaticSizeSearchStatus int + +func init() { + var sr SearchResult + reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size()) + var ss SearchStatus + reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size()) +} + var cache = registry.NewCache() const defaultDateTimeParser = optional.Name @@ -432,6 +444,24 @@ type SearchResult struct { Facets search.FacetResults `json:"facets"` } +func (sr *SearchResult) Size() int { + sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr + + reflectStaticSizeSearchStatus + + for _, entry := range sr.Hits { + if entry != nil { + sizeInBytes += entry.Size() + } + } + + for k, v := range sr.Facets { + sizeInBytes += size.SizeOfString + len(k) + + v.Size() + } + + return sizeInBytes +} + func (sr *SearchResult) String() string { rv := "" if sr.Total > 0 { diff --git a/search/collector/search_test.go b/search/collector/search_test.go index 8457fb98..3ba71c1d 100644 --- a/search/collector/search_test.go +++ b/search/collector/search_test.go @@ -15,6 +15,8 @@ package collector import ( + "reflect" + "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" @@ -25,6 +27,18 @@ type stubSearcher struct { matches []*search.DocumentMatch } +func (ss *stubSearcher) Size() int { + sizeInBytes := int(reflect.TypeOf(*ss).Size()) + + for _, entry := range ss.matches { + if entry != nil { + sizeInBytes += entry.Size() + } + } + + return sizeInBytes +} + func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { if ss.index < len(ss.matches) { rv := ctx.DocumentMatchPool.Get() @@ -76,6 +90,10 @@ func (ss *stubSearcher) DocumentMatchPoolSize() int { type stubReader struct{} +func (sr *stubReader) Size() int { + return 0 +} + func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { return nil, nil } diff --git a/search/collector/topn.go b/search/collector/topn.go index 388370e7..d684868c 100644 --- a/search/collector/topn.go +++ b/search/collector/topn.go @@ -16,12 +16,21 @@ package collector import ( "context" + "reflect" "time" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeTopNCollector int + +func init() { + var coll TopNCollector + reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size()) +} + type collectorStore interface { // Add the document, and if the new store size exceeds the provided size // the last element is removed and returned. If the size has not been @@ -98,6 +107,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector return hc } +func (hc *TopNCollector) Size() int { + sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr + + if hc.facetsBuilder != nil { + sizeInBytes += hc.facetsBuilder.Size() + } + + for _, entry := range hc.neededFields { + sizeInBytes += len(entry) + size.SizeOfString + } + + sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc) + + return sizeInBytes +} + // Collect goes to the index to find the matching documents func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error { startTime := time.Now() diff --git a/search/explanation.go b/search/explanation.go index 766367d7..3b81737b 100644 --- a/search/explanation.go +++ b/search/explanation.go @@ -17,8 +17,18 @@ package search import ( "encoding/json" "fmt" + "reflect" + + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeExplanation int + +func init() { + var e Explanation + reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size()) +} + type Explanation struct { Value float64 `json:"value"` Message string `json:"message"` @@ -32,3 +42,14 @@ func (expl *Explanation) String() string { } return string(js) } + +func (expl *Explanation) Size() int { + sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr + + len(expl.Message) + + for _, entry := range expl.Children { + sizeInBytes += entry.Size() + } + + return sizeInBytes +} diff --git a/search/facet/facet_builder_datetime.go b/search/facet/facet_builder_datetime.go index 8657a553..c45442e4 100644 --- a/search/facet/facet_builder_datetime.go +++ b/search/facet/facet_builder_datetime.go @@ -15,13 +15,25 @@ package facet import ( + "reflect" "sort" "time" "github.com/blevesearch/bleve/numeric" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeDateTimeFacetBuilder int +var reflectStaticSizedateTimeRange int + +func init() { + var dtfb DateTimeFacetBuilder + reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size()) + var dtr dateTimeRange + reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size()) +} + type dateTimeRange struct { start time.Time end time.Time @@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder { } } +func (fb *DateTimeFacetBuilder) Size() int { + sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr + + len(fb.field) + + for k, _ := range fb.termsCount { + sizeInBytes += size.SizeOfString + len(k) + + size.SizeOfInt + } + + for k, _ := range fb.ranges { + sizeInBytes += size.SizeOfString + len(k) + + size.SizeOfPtr + reflectStaticSizedateTimeRange + } + + return sizeInBytes +} + func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) { r := dateTimeRange{ start: start, diff --git a/search/facet/facet_builder_numeric.go b/search/facet/facet_builder_numeric.go index 2ab5f278..c1692b54 100644 --- a/search/facet/facet_builder_numeric.go +++ b/search/facet/facet_builder_numeric.go @@ -15,12 +15,24 @@ package facet import ( + "reflect" "sort" "github.com/blevesearch/bleve/numeric" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeNumericFacetBuilder int +var reflectStaticSizenumericRange int + +func init() { + var nfb NumericFacetBuilder + reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size()) + var nr numericRange + reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size()) +} + type numericRange struct { min *float64 max *float64 @@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder { } } +func (fb *NumericFacetBuilder) Size() int { + sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr + + len(fb.field) + + for k, _ := range fb.termsCount { + sizeInBytes += size.SizeOfString + len(k) + + size.SizeOfInt + } + + for k, _ := range fb.ranges { + sizeInBytes += size.SizeOfString + len(k) + + size.SizeOfPtr + reflectStaticSizenumericRange + } + + return sizeInBytes +} + func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) { r := numericRange{ min: min, diff --git a/search/facet/facet_builder_terms.go b/search/facet/facet_builder_terms.go index a41e475a..5b5901e0 100644 --- a/search/facet/facet_builder_terms.go +++ b/search/facet/facet_builder_terms.go @@ -15,11 +15,20 @@ package facet import ( + "reflect" "sort" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeTermsFacetBuilder int + +func init() { + var tfb TermsFacetBuilder + reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size()) +} + type TermsFacetBuilder struct { size int field string @@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder { } } +func (fb *TermsFacetBuilder) Size() int { + sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr + + len(fb.field) + + for k, _ := range fb.termsCount { + sizeInBytes += size.SizeOfString + len(k) + + size.SizeOfInt + } + + return sizeInBytes +} + func (fb *TermsFacetBuilder) Field() string { return fb.field } diff --git a/search/facets_builder.go b/search/facets_builder.go index 05e27041..34e45af8 100644 --- a/search/facets_builder.go +++ b/search/facets_builder.go @@ -15,11 +15,32 @@ package search import ( + "reflect" "sort" "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeFacetsBuilder int +var reflectStaticSizeFacetResult int +var reflectStaticSizeTermFacet int +var reflectStaticSizeNumericRangeFacet int +var reflectStaticSizeDateRangeFacet int + +func init() { + var fb FacetsBuilder + reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size()) + var fr FacetResult + reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size()) + var tf TermFacet + reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size()) + var nrf NumericRangeFacet + reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size()) + var drf DateRangeFacet + reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size()) +} + type FacetBuilder interface { StartDoc() UpdateVisitor(field string, term []byte) @@ -27,6 +48,8 @@ type FacetBuilder interface { Result() *FacetResult Field() string + + Size() int } type FacetsBuilder struct { @@ -42,6 +65,22 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder { } } +func (fb *FacetsBuilder) Size() int { + sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr + + fb.indexReader.Size() + + for k, v := range fb.facets { + sizeInBytes += size.SizeOfString + len(k) + + v.Size() + } + + for _, entry := range fb.fields { + sizeInBytes += size.SizeOfString + len(entry) + } + + return sizeInBytes +} + func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { fb.facets[name] = facetBuilder fb.fields = append(fb.fields, facetBuilder.Field()) @@ -213,6 +252,14 @@ type FacetResult struct { DateRanges DateRangeFacets `json:"date_ranges,omitempty"` } +func (fr *FacetResult) Size() int { + return reflectStaticSizeFacetResult + size.SizeOfPtr + + len(fr.Field) + + len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) + + len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) + + len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr) +} + func (fr *FacetResult) Merge(other *FacetResult) { fr.Total += other.Total fr.Missing += other.Missing diff --git a/search/pool.go b/search/pool.go index b9b52a61..ba8be8fc 100644 --- a/search/pool.go +++ b/search/pool.go @@ -14,6 +14,17 @@ package search +import ( + "reflect" +) + +var reflectStaticSizeDocumentMatchPool int + +func init() { + var dmp DocumentMatchPool + reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size()) +} + // DocumentMatchPoolTooSmall is a callback function that can be executed // when the DocumentMatchPool does not have sufficient capacity // By default we just perform just-in-time allocation, but you could log diff --git a/search/scorer/scorer_conjunction.go b/search/scorer/scorer_conjunction.go index aad6f9c1..b866293e 100644 --- a/search/scorer/scorer_conjunction.go +++ b/search/scorer/scorer_conjunction.go @@ -15,13 +15,27 @@ package scorer import ( + "reflect" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeConjunctionQueryScorer int + +func init() { + var cqs ConjunctionQueryScorer + reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size()) +} + type ConjunctionQueryScorer struct { options search.SearcherOptions } +func (s *ConjunctionQueryScorer) Size() int { + return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr +} + func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer { return &ConjunctionQueryScorer{ options: options, diff --git a/search/scorer/scorer_constant.go b/search/scorer/scorer_constant.go index a65a826f..dc10fdaa 100644 --- a/search/scorer/scorer_constant.go +++ b/search/scorer/scorer_constant.go @@ -16,11 +16,20 @@ package scorer import ( "fmt" + "reflect" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeConstantScorer int + +func init() { + var cs ConstantScorer + reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size()) +} + type ConstantScorer struct { constant float64 boost float64 @@ -30,6 +39,16 @@ type ConstantScorer struct { queryWeightExplanation *search.Explanation } +func (s *ConstantScorer) Size() int { + sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr + + if s.queryWeightExplanation != nil { + sizeInBytes += s.queryWeightExplanation.Size() + } + + return sizeInBytes +} + func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer { rv := ConstantScorer{ options: options, diff --git a/search/scorer/scorer_disjunction.go b/search/scorer/scorer_disjunction.go index 184a15d2..36a601c7 100644 --- a/search/scorer/scorer_disjunction.go +++ b/search/scorer/scorer_disjunction.go @@ -16,14 +16,27 @@ package scorer import ( "fmt" + "reflect" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeDisjunctionQueryScorer int + +func init() { + var dqs DisjunctionQueryScorer + reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size()) +} + type DisjunctionQueryScorer struct { options search.SearcherOptions } +func (s *DisjunctionQueryScorer) Size() int { + return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr +} + func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer { return &DisjunctionQueryScorer{ options: options, diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go index b5f46322..077e38e0 100644 --- a/search/scorer/scorer_term.go +++ b/search/scorer/scorer_term.go @@ -17,11 +17,20 @@ package scorer import ( "fmt" "math" + "reflect" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeTermQueryScorer int + +func init() { + var tqs TermQueryScorer + reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size()) +} + type TermQueryScorer struct { queryTerm []byte queryField string @@ -36,6 +45,21 @@ type TermQueryScorer struct { queryWeightExplanation *search.Explanation } +func (s *TermQueryScorer) Size() int { + sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr + + len(s.queryTerm) + len(s.queryField) + + if s.idfExplanation != nil { + sizeInBytes += s.idfExplanation.Size() + } + + if s.queryWeightExplanation != nil { + sizeInBytes += s.queryWeightExplanation.Size() + } + + return sizeInBytes +} + func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { rv := TermQueryScorer{ queryTerm: queryTerm, diff --git a/search/search.go b/search/search.go index f9a92783..ca030df4 100644 --- a/search/search.go +++ b/search/search.go @@ -16,11 +16,26 @@ package search import ( "fmt" + "reflect" "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeDocumentMatch int +var reflectStaticSizeSearchContext int +var reflectStaticSizeLocation int + +func init() { + var dm DocumentMatch + reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size()) + var sc SearchContext + reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size()) + var l Location + reflectStaticSizeLocation = int(reflect.TypeOf(l).Size()) +} + type ArrayPositions []uint64 func (ap ArrayPositions) Equals(other ArrayPositions) bool { @@ -47,6 +62,11 @@ type Location struct { ArrayPositions ArrayPositions `json:"array_positions"` } +func (l *Location) Size() int { + return reflectStaticSizeLocation + size.SizeOfPtr + + len(l.ArrayPositions)*size.SizeOfUint64 +} + type Locations []*Location type TermLocationMap map[string]Locations @@ -117,6 +137,52 @@ func (dm *DocumentMatch) Reset() *DocumentMatch { return dm } +func (dm *DocumentMatch) Size() int { + sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr + + len(dm.Index) + + len(dm.ID) + + len(dm.IndexInternalID) + + if dm.Expl != nil { + sizeInBytes += dm.Expl.Size() + } + + for k, v := range dm.Locations { + sizeInBytes += size.SizeOfString + len(k) + for k1, v1 := range v { + sizeInBytes += size.SizeOfString + len(k1) + + size.SizeOfSlice + for _, entry := range v1 { + sizeInBytes += entry.Size() + } + } + } + + for k, v := range dm.Fragments { + sizeInBytes += size.SizeOfString + len(k) + + size.SizeOfSlice + + for _, entry := range v { + sizeInBytes += size.SizeOfString + len(entry) + } + } + + for _, entry := range dm.Sort { + sizeInBytes += size.SizeOfString + len(entry) + } + + for k, _ := range dm.Fields { + sizeInBytes += size.SizeOfString + len(k) + + size.SizeOfPtr + } + + if dm.Document != nil { + sizeInBytes += dm.Document.Size() + } + + return sizeInBytes +} + func (dm *DocumentMatch) String() string { return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score) } @@ -135,6 +201,7 @@ type Searcher interface { SetQueryNorm(float64) Count() uint64 Min() int + Size() int DocumentMatchPoolSize() int } @@ -148,3 +215,18 @@ type SearcherOptions struct { type SearchContext struct { DocumentMatchPool *DocumentMatchPool } + +func (sc *SearchContext) Size() int { + sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr + + reflectStaticSizeDocumentMatchPool + size.SizeOfPtr + + if sc.DocumentMatchPool != nil { + for _, entry := range sc.DocumentMatchPool.avail { + if entry != nil { + sizeInBytes += entry.Size() + } + } + } + + return sizeInBytes +} diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go index a905c29e..b87337e1 100644 --- a/search/searcher/search_boolean.go +++ b/search/searcher/search_boolean.go @@ -16,12 +16,21 @@ package searcher import ( "math" + "reflect" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorer" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeBooleanSearcher int + +func init() { + var bs BooleanSearcher + reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size()) +} + type BooleanSearcher struct { indexReader index.IndexReader mustSearcher search.Searcher @@ -52,6 +61,33 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc return &rv, nil } +func (s *BooleanSearcher) Size() int { + sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr + + s.indexReader.Size() + + if s.mustSearcher != nil { + sizeInBytes += s.mustSearcher.Size() + } + + if s.shouldSearcher != nil { + sizeInBytes += s.shouldSearcher.Size() + } + + if s.mustNotSearcher != nil { + sizeInBytes += s.mustNotSearcher.Size() + } + + sizeInBytes += s.scorer.Size() + + for _, entry := range s.matches { + if entry != nil { + sizeInBytes += entry.Size() + } + } + + return sizeInBytes +} + func (s *BooleanSearcher) computeQueryNorm() { // first calculate sum of squared weights sumOfSquaredWeights := 0.0 diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go index 73fba19c..da65f398 100644 --- a/search/searcher/search_conjunction.go +++ b/search/searcher/search_conjunction.go @@ -16,13 +16,22 @@ package searcher import ( "math" + "reflect" "sort" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorer" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeConjunctionSearcher int + +func init() { + var cs ConjunctionSearcher + reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size()) +} + type ConjunctionSearcher struct { indexReader index.IndexReader searchers OrderedSearcherList @@ -54,6 +63,23 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S return &rv, nil } +func (s *ConjunctionSearcher) Size() int { + sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr + + s.scorer.Size() + + for _, entry := range s.searchers { + sizeInBytes += entry.Size() + } + + for _, entry := range s.currs { + if entry != nil { + sizeInBytes += entry.Size() + } + } + + return sizeInBytes +} + func (s *ConjunctionSearcher) computeQueryNorm() { // first calculate sum of squared weights sumOfSquaredWeights := 0.0 diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go index b6910ddb..119bac97 100644 --- a/search/searcher/search_disjunction.go +++ b/search/searcher/search_disjunction.go @@ -17,13 +17,22 @@ package searcher import ( "fmt" "math" + "reflect" "sort" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorer" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeDisjunctionSearcher int + +func init() { + var ds DisjunctionSearcher + reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size()) +} + // DisjunctionMaxClauseCount is a compile time setting that applications can // adjust to non-zero value to cause the DisjunctionSearcher to return an // error instead of exeucting searches when the size exceeds this value. @@ -90,6 +99,32 @@ func newDisjunctionSearcher(indexReader index.IndexReader, return &rv, nil } +func (s *DisjunctionSearcher) Size() int { + sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr + + s.indexReader.Size() + + s.scorer.Size() + + for _, entry := range s.searchers { + sizeInBytes += entry.Size() + } + + for _, entry := range s.currs { + if entry != nil { + sizeInBytes += entry.Size() + } + } + + for _, entry := range s.matching { + if entry != nil { + sizeInBytes += entry.Size() + } + } + + sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt + + return sizeInBytes +} + func (s *DisjunctionSearcher) computeQueryNorm() { // first calculate sum of squared weights sumOfSquaredWeights := 0.0 diff --git a/search/searcher/search_docid.go b/search/searcher/search_docid.go index 06351b4a..3b258a58 100644 --- a/search/searcher/search_docid.go +++ b/search/searcher/search_docid.go @@ -15,11 +15,21 @@ package searcher import ( + "reflect" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorer" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeDocIDSearcher int + +func init() { + var ds DocIDSearcher + reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size()) +} + // DocIDSearcher returns documents matching a predefined set of identifiers. type DocIDSearcher struct { reader index.DocIDReader @@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64 }, nil } +func (s *DocIDSearcher) Size() int { + return reflectStaticSizeDocIDSearcher + size.SizeOfPtr + + s.reader.Size() + + s.scorer.Size() +} + func (s *DocIDSearcher) Count() uint64 { return uint64(s.count) } diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go index 219f2ee7..7c95fb41 100644 --- a/search/searcher/search_filter.go +++ b/search/searcher/search_filter.go @@ -15,10 +15,20 @@ package searcher import ( + "reflect" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeFilteringSearcher int + +func init() { + var fs FilteringSearcher + reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size()) +} + // FilterFunc defines a function which can filter documents // returning true means keep the document // returning false means do not keep the document @@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch } } +func (f *FilteringSearcher) Size() int { + return reflectStaticSizeFilteringSearcher + size.SizeOfPtr + + f.child.Size() +} + func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { next, err := f.child.Next(ctx) for next != nil && err == nil { diff --git a/search/searcher/search_match_all.go b/search/searcher/search_match_all.go index 822db2ea..3f34e591 100644 --- a/search/searcher/search_match_all.go +++ b/search/searcher/search_match_all.go @@ -15,11 +15,21 @@ package searcher import ( + "reflect" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorer" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeMatchAllSearcher int + +func init() { + var mas MatchAllSearcher + reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size()) +} + type MatchAllSearcher struct { indexReader index.IndexReader reader index.DocIDReader @@ -46,6 +56,13 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s }, nil } +func (s *MatchAllSearcher) Size() int { + return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr + + s.indexReader.Size() + + s.reader.Size() + + s.scorer.Size() +} + func (s *MatchAllSearcher) Count() uint64 { return s.count } diff --git a/search/searcher/search_match_none.go b/search/searcher/search_match_none.go index 94759671..6b50b322 100644 --- a/search/searcher/search_match_none.go +++ b/search/searcher/search_match_none.go @@ -15,10 +15,20 @@ package searcher import ( + "reflect" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeMatchNoneSearcher int + +func init() { + var mns MatchNoneSearcher + reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size()) +} + type MatchNoneSearcher struct { indexReader index.IndexReader } @@ -29,6 +39,11 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er }, nil } +func (s *MatchNoneSearcher) Size() int { + return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr + + s.indexReader.Size() +} + func (s *MatchNoneSearcher) Count() uint64 { return uint64(0) } diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index 6237cecf..23a359bd 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -17,11 +17,20 @@ package searcher import ( "fmt" "math" + "reflect" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizePhraseSearcher int + +func init() { + var ps PhraseSearcher + reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size()) +} + type PhraseSearcher struct { indexReader index.IndexReader mustSearcher *ConjunctionSearcher @@ -32,6 +41,28 @@ type PhraseSearcher struct { initialized bool } +func (s *PhraseSearcher) Size() int { + sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr + + s.indexReader.Size() + + if s.mustSearcher != nil { + sizeInBytes += s.mustSearcher.Size() + } + + if s.currMust != nil { + sizeInBytes += s.currMust.Size() + } + + for _, entry := range s.terms { + sizeInBytes += size.SizeOfSlice + for _, entry1 := range entry { + sizeInBytes += size.SizeOfString + len(entry1) + } + } + + return sizeInBytes +} + func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) { // turn flat terms []string into [][]string mterms := make([][]string, len(terms)) diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go index 6fae6ae5..576d6643 100644 --- a/search/searcher/search_term.go +++ b/search/searcher/search_term.go @@ -15,11 +15,21 @@ package searcher import ( + "reflect" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorer" + "github.com/blevesearch/bleve/size" ) +var reflectStaticSizeTermSearcher int + +func init() { + var ts TermSearcher + reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size()) +} + type TermSearcher struct { indexReader index.IndexReader reader index.TermFieldReader @@ -63,6 +73,14 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri }, nil } +func (s *TermSearcher) Size() int { + return reflectStaticSizeTermSearcher + size.SizeOfPtr + + s.indexReader.Size() + + s.reader.Size() + + s.tfd.Size() + + s.scorer.Size() +} + func (s *TermSearcher) Count() uint64 { return s.reader.Count() } diff --git a/size/sizes.go b/size/sizes.go new file mode 100644 index 00000000..4ba544a7 --- /dev/null +++ b/size/sizes.go @@ -0,0 +1,57 @@ +// Copyright (c) 2018 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package size + +import ( + "reflect" +) + +func init() { + var a bool + SizeOfBool = int(reflect.TypeOf(a).Size()) + var b float32 + SizeOfFloat32 = int(reflect.TypeOf(b).Size()) + var c float64 + SizeOfFloat64 = int(reflect.TypeOf(c).Size()) + var d map[int]int + SizeOfMap = int(reflect.TypeOf(d).Size()) + var e *int + SizeOfPtr = int(reflect.TypeOf(e).Size()) + var f []int + SizeOfSlice = int(reflect.TypeOf(f).Size()) + var g string + SizeOfString = int(reflect.TypeOf(g).Size()) + var h uint8 + SizeOfUint8 = int(reflect.TypeOf(h).Size()) + var i uint16 + SizeOfUint16 = int(reflect.TypeOf(i).Size()) + var j uint32 + SizeOfUint32 = int(reflect.TypeOf(j).Size()) + var k uint64 + SizeOfUint64 = int(reflect.TypeOf(k).Size()) +} + +var SizeOfBool int +var SizeOfFloat32 int +var SizeOfFloat64 int +var SizeOfInt int +var SizeOfMap int +var SizeOfPtr int +var SizeOfSlice int +var SizeOfString int +var SizeOfUint8 int +var SizeOfUint16 int +var SizeOfUint32 int +var SizeOfUint64 int