0
0
Fork 0

MB-28162: Provide API to estimate memory needed to run a search query

This API (unexported) will estimate the amount of memory needed to execute
a search query over an index before the collector begins data collection.

Sample estimates for certain queries:
{Size: 10, BenchmarkUpsidedownSearchOverhead}
                                                           ESTIMATE    BENCHMEM
TermQuery                                                  4616        4796
MatchQuery                                                 5210        5405
DisjunctionQuery (Match queries)                           7700        8447
DisjunctionQuery (Term queries)                            6514        6591
ConjunctionQuery (Match queries)                           7524        8175
Nested disjunction query (disjunction of disjunctions)     10306       10708
…
This commit is contained in:
abhinavdangeti 2018-03-01 17:12:16 -08:00 committed by Abhinav Dangeti
parent 2b005f1e23
commit 7e36109b3c
48 changed files with 1242 additions and 118 deletions

View File

@ -14,7 +14,19 @@
package document package document
import "fmt" import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDocument int
func init() {
var d Document
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
}
type Document struct { type Document struct {
ID string `json:"id"` ID string `json:"id"`
@ -30,6 +42,13 @@ func NewDocument(id string) *Document {
} }
} }
func (d *Document) Size() int {
return reflectStaticSizeDocument + size.SizeOfPtr +
len(d.ID) +
len(d.Fields)*size.SizeOfPtr +
len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField)
}
func (d *Document) AddField(f Field) *Document { func (d *Document) AddField(f Field) *Document {
switch f := f.(type) { switch f := f.(type) {
case *CompositeField: case *CompositeField:

View File

@ -15,9 +15,18 @@
package document package document
import ( import (
"reflect"
"github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/analysis"
) )
var reflectStaticSizeCompositeField int
func init() {
var cf CompositeField
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
}
const DefaultCompositeIndexingOptions = IndexField const DefaultCompositeIndexingOptions = IndexField
type CompositeField struct { type CompositeField struct {

View File

@ -18,11 +18,23 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"reflect"
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeTermFieldDoc int
var reflectStaticSizeTermFieldVector int
func init() {
var tfd TermFieldDoc
reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
var tfv TermFieldVector
reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
}
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
type Index interface { type Index interface {
@ -82,6 +94,8 @@ type IndexReader interface {
DumpFields() chan interface{} DumpFields() chan interface{}
Close() error Close() error
Size() int
} }
// FieldTerms contains the terms used by a document, keyed by field // FieldTerms contains the terms used by a document, keyed by field
@ -115,6 +129,11 @@ type TermFieldVector struct {
End uint64 End uint64
} }
func (tfv *TermFieldVector) Size() int {
return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
}
// IndexInternalID is an opaque document identifier interal to the index impl // IndexInternalID is an opaque document identifier interal to the index impl
type IndexInternalID []byte type IndexInternalID []byte
@ -134,6 +153,17 @@ type TermFieldDoc struct {
Vectors []*TermFieldVector Vectors []*TermFieldVector
} }
func (tfd *TermFieldDoc) Size() int {
sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
len(tfd.Term) + len(tfd.ID)
for _, entry := range tfd.Vectors {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
// Reset allows an already allocated TermFieldDoc to be reused // Reset allows an already allocated TermFieldDoc to be reused
func (tfd *TermFieldDoc) Reset() *TermFieldDoc { func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
// remember the []byte used for the ID // remember the []byte used for the ID
@ -161,6 +191,8 @@ type TermFieldReader interface {
// Count returns the number of documents contains the term in this field. // Count returns the number of documents contains the term in this field.
Count() uint64 Count() uint64
Close() error Close() error
Size() int
} }
type DictEntry struct { type DictEntry struct {
@ -185,6 +217,9 @@ type DocIDReader interface {
// will start there instead. If ID is greater than or equal to the end of // will start there instead. If ID is greater than or equal to the end of
// the range, Next() call will return io.EOF. // the range, Next() call will return io.EOF.
Advance(ID IndexInternalID) (IndexInternalID, error) Advance(ID IndexInternalID) (IndexInternalID, error)
Size() int
Close() error Close() error
} }

View File

@ -472,20 +472,20 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
} }
func (s *Scorch) MemoryUsed() uint64 { func (s *Scorch) MemoryUsed() uint64 {
var memUsed uint64 var memUsed int
s.rootLock.RLock() s.rootLock.RLock()
if s.root != nil { if s.root != nil {
for _, segmentSnapshot := range s.root.segment { for _, segmentSnapshot := range s.root.segment {
memUsed += 8 /* size of id -> uint64 */ + memUsed += 8 /* size of id -> uint64 */ +
segmentSnapshot.segment.SizeInBytes() segmentSnapshot.segment.Size()
if segmentSnapshot.deleted != nil { if segmentSnapshot.deleted != nil {
memUsed += segmentSnapshot.deleted.GetSizeInBytes() memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
} }
memUsed += segmentSnapshot.cachedDocs.sizeInBytes() memUsed += segmentSnapshot.cachedDocs.size()
} }
} }
s.rootLock.RUnlock() s.rootLock.RUnlock()
return memUsed return uint64(memUsed)
} }
func (s *Scorch) markIneligibleForRemoval(filename string) { func (s *Scorch) markIneligibleForRemoval(filename string) {

View File

@ -46,6 +46,10 @@ func (e *EmptySegment) Close() error {
return nil return nil
} }
func (e *EmptySegment) Size() uint64 {
return 0
}
func (e *EmptySegment) AddRef() { func (e *EmptySegment) AddRef() {
} }
@ -84,6 +88,10 @@ func (e *EmptyPostingsList) Iterator() PostingsIterator {
return &EmptyPostingsIterator{} return &EmptyPostingsIterator{}
} }
func (e *EmptyPostingsList) Size() int {
return 0
}
func (e *EmptyPostingsList) Count() uint64 { func (e *EmptyPostingsList) Count() uint64 {
return 0 return 0
} }
@ -93,3 +101,7 @@ type EmptyPostingsIterator struct{}
func (e *EmptyPostingsIterator) Next() (Posting, error) { func (e *EmptyPostingsIterator) Next() (Posting, error) {
return nil, nil return nil, nil
} }
func (e *EmptyPostingsIterator) Size() int {
return 0
}

View File

@ -45,7 +45,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
} }
// compute memory usage of segment // compute memory usage of segment
s.updateSizeInBytes() s.updateSize()
// professional debugging // professional debugging
// //

View File

@ -15,14 +15,23 @@
package mem package mem
import ( import (
"reflect"
"sort" "sort"
"strings" "strings"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeDictionary int
func init() {
var d Dictionary
reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size())
}
// Dictionary is the in-memory representation of the term dictionary // Dictionary is the in-memory representation of the term dictionary
type Dictionary struct { type Dictionary struct {
segment *Segment segment *Segment
@ -30,6 +39,17 @@ type Dictionary struct {
fieldID uint16 fieldID uint16
} }
func (d *Dictionary) Size() int {
sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr +
len(d.field)
if d.segment != nil {
sizeInBytes += int(d.segment.Size())
}
return sizeInBytes
}
// PostingsList returns the postings list for the specified term // PostingsList returns the postings list for the specified term
func (d *Dictionary) PostingsList(term string, func (d *Dictionary) PostingsList(term string,
except *roaring.Bitmap) (segment.PostingsList, error) { except *roaring.Bitmap) (segment.PostingsList, error) {

View File

@ -15,10 +15,29 @@
package mem package mem
import ( import (
"reflect"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizePostingsList int
var reflectStaticSizePostingsIterator int
var reflectStaticSizePosting int
var reflectStaticSizeLocation int
func init() {
var pl PostingsList
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
var pi PostingsIterator
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
var p Posting
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
// PostingsList is an in-memory represenation of a postings list // PostingsList is an in-memory represenation of a postings list
type PostingsList struct { type PostingsList struct {
dictionary *Dictionary dictionary *Dictionary
@ -27,6 +46,20 @@ type PostingsList struct {
except *roaring.Bitmap except *roaring.Bitmap
} }
func (p *PostingsList) Size() int {
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
if p.dictionary != nil {
sizeInBytes += p.dictionary.Size()
}
if p.except != nil {
sizeInBytes += int(p.except.GetSizeInBytes())
}
return sizeInBytes
}
// Count returns the number of items on this postings list // Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 { func (p *PostingsList) Count() uint64 {
var rv uint64 var rv uint64
@ -83,6 +116,16 @@ type PostingsIterator struct {
reuse Posting reuse Posting
} }
func (i *PostingsIterator) Size() int {
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr
if i.locations != nil {
sizeInBytes += int(i.locations.GetSizeInBytes())
}
return sizeInBytes
}
// Next returns the next posting on the postings list, or nil at the end // Next returns the next posting on the postings list, or nil at the end
func (i *PostingsIterator) Next() (segment.Posting, error) { func (i *PostingsIterator) Next() (segment.Posting, error) {
if i.actual == nil || !i.actual.HasNext() { if i.actual == nil || !i.actual.HasNext() {
@ -121,6 +164,16 @@ type Posting struct {
hasLoc bool hasLoc bool
} }
func (p *Posting) Size() int {
sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr
if p.iterator != nil {
sizeInBytes += p.iterator.Size()
}
return sizeInBytes
}
// Number returns the document number of this posting in this segment // Number returns the document number of this posting in this segment
func (p *Posting) Number() uint64 { func (p *Posting) Number() uint64 {
return p.docNum return p.docNum
@ -158,6 +211,15 @@ type Location struct {
offset int offset int
} }
func (l *Location) Size() int {
sizeInBytes := reflectStaticSizeLocation
if l.p != nil {
sizeInBytes += l.p.Size()
}
return sizeInBytes
}
// Field returns the name of the field (useful in composite fields to know // Field returns the name of the field (useful in composite fields to know
// which original field the value came from) // which original field the value came from)
func (l *Location) Field() string { func (l *Location) Field() string {

View File

@ -16,11 +16,20 @@ package mem
import ( import (
"fmt" "fmt"
"reflect"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeSegment int
func init() {
var s Segment
reflectStaticSizeSegment = int(reflect.TypeOf(s).Size())
}
// _id field is always guaranteed to have fieldID of 0 // _id field is always guaranteed to have fieldID of 0
const idFieldID uint16 = 0 const idFieldID uint16 = 0
@ -96,7 +105,7 @@ type Segment struct {
// Footprint of the segment, updated when analyzed document mutations // Footprint of the segment, updated when analyzed document mutations
// are added into the segment // are added into the segment
sizeInBytes uint64 sizeInBytes int
} }
// New builds a new empty Segment // New builds a new empty Segment
@ -107,99 +116,87 @@ func New() *Segment {
} }
} }
func (s *Segment) updateSizeInBytes() { func (s *Segment) updateSize() {
var sizeInBytes uint64 sizeInBytes := reflectStaticSizeSegment
// FieldsMap, FieldsInv // FieldsMap, FieldsInv
for k, _ := range s.FieldsMap { for k, _ := range s.FieldsMap {
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + sizeInBytes += (len(k)+size.SizeOfString)*2 +
2 /* size of uint16 */) size.SizeOfUint16
} }
// overhead from the data structures
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
// Dicts, DictKeys // Dicts, DictKeys
for _, entry := range s.Dicts { for _, entry := range s.Dicts {
for k, _ := range entry { for k, _ := range entry {
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + sizeInBytes += (len(k)+size.SizeOfString)*2 +
8 /* size of uint64 */) size.SizeOfUint64
} }
// overhead from the data structures // overhead from the data structures
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice) sizeInBytes += (size.SizeOfMap + size.SizeOfSlice)
} }
sizeInBytes += (segment.SizeOfSlice * 2)
// Postings, PostingsLocs // Postings, PostingsLocs
for i := 0; i < len(s.Postings); i++ { for i := 0; i < len(s.Postings); i++ {
sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) + sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) +
(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer) (int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr)
} }
sizeInBytes += (segment.SizeOfSlice * 2)
// Freqs, Norms // Freqs, Norms
for i := 0; i < len(s.Freqs); i++ { for i := 0; i < len(s.Freqs); i++ {
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ + sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 +
len(s.Norms[i])*4 /* size of float32 */) + len(s.Norms[i])*size.SizeOfFloat32) +
(segment.SizeOfSlice * 2) (size.SizeOfSlice * 2)
} }
sizeInBytes += (segment.SizeOfSlice * 2)
// Location data // Location data
for i := 0; i < len(s.Locfields); i++ { for i := 0; i < len(s.Locfields); i++ {
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ + sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 +
len(s.Locstarts[i])*8 /* size of uint64 */ + len(s.Locstarts[i])*size.SizeOfUint64 +
len(s.Locends[i])*8 /* size of uint64 */ + len(s.Locends[i])*size.SizeOfUint64 +
len(s.Locpos[i])*8 /* size of uint64 */) len(s.Locpos[i])*size.SizeOfUint64
for j := 0; j < len(s.Locarraypos[i]); j++ { for j := 0; j < len(s.Locarraypos[i]); j++ {
sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) + sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 +
segment.SizeOfSlice size.SizeOfSlice
} }
sizeInBytes += (segment.SizeOfSlice * 5) sizeInBytes += (size.SizeOfSlice * 5)
} }
sizeInBytes += (segment.SizeOfSlice * 5)
// Stored data // Stored data
for i := 0; i < len(s.Stored); i++ { for i := 0; i < len(s.Stored); i++ {
for _, v := range s.Stored[i] { for _, v := range s.Stored[i] {
sizeInBytes += uint64(2 /* size of uint16 */) sizeInBytes += size.SizeOfUint16
for _, arr := range v { for _, arr := range v {
sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice sizeInBytes += len(arr) + size.SizeOfSlice
} }
sizeInBytes += segment.SizeOfSlice sizeInBytes += size.SizeOfSlice
} }
for _, v := range s.StoredTypes[i] { for _, v := range s.StoredTypes[i] {
sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice
} }
for _, v := range s.StoredPos[i] { for _, v := range s.StoredPos[i] {
sizeInBytes += uint64(2 /* size of uint16 */) sizeInBytes += size.SizeOfUint16
for _, arr := range v { for _, arr := range v {
sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) + sizeInBytes += len(arr)*size.SizeOfUint64 +
segment.SizeOfSlice size.SizeOfSlice
} }
sizeInBytes += segment.SizeOfSlice sizeInBytes += size.SizeOfSlice
} }
// overhead from map(s) within Stored, StoredTypes, StoredPos // overhead from map(s) within Stored, StoredTypes, StoredPos
sizeInBytes += (segment.SizeOfMap * 3) sizeInBytes += (size.SizeOfMap * 3)
} }
// overhead from data structures: Stored, StoredTypes, StoredPos
sizeInBytes += (segment.SizeOfSlice * 3)
// DocValueFields // DocValueFields
sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) + sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool)
segment.SizeOfMap
// SizeInBytes
sizeInBytes += uint64(8)
s.sizeInBytes = sizeInBytes s.sizeInBytes = sizeInBytes
} }
func (s *Segment) SizeInBytes() uint64 { func (s *Segment) Size() int {
return s.sizeInBytes return s.sizeInBytes
} }

View File

@ -169,7 +169,7 @@ func TestSingle(t *testing.T) {
t.Fatalf("segment nil, not expected") t.Fatalf("segment nil, not expected")
} }
if segment.SizeInBytes() <= 0 { if segment.Size() <= 0 {
t.Fatalf("segment size not updated") t.Fatalf("segment size not updated")
} }

View File

@ -19,12 +19,6 @@ import (
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
) )
// Overhead from go data structures when deployed on a 64-bit system.
const SizeOfMap uint64 = 8
const SizeOfPointer uint64 = 8
const SizeOfSlice uint64 = 24
const SizeOfString uint64 = 16
// DocumentFieldValueVisitor defines a callback to be visited for each // DocumentFieldValueVisitor defines a callback to be visited for each
// stored field value. The return value determines if the visitor // stored field value. The return value determines if the visitor
// should keep going. Returning true continues visiting, false stops. // should keep going. Returning true continues visiting, false stops.
@ -42,7 +36,7 @@ type Segment interface {
Close() error Close() error
SizeInBytes() uint64 Size() int
AddRef() AddRef()
DecRef() error DecRef() error
@ -63,6 +57,8 @@ type DictionaryIterator interface {
type PostingsList interface { type PostingsList interface {
Iterator() PostingsIterator Iterator() PostingsIterator
Size() int
Count() uint64 Count() uint64
// NOTE deferred for future work // NOTE deferred for future work
@ -77,6 +73,8 @@ type PostingsIterator interface {
// implementations may return a shared instance to reduce memory // implementations may return a shared instance to reduce memory
// allocations. // allocations.
Next() (Posting, error) Next() (Posting, error)
Size() int
} }
type Posting interface { type Posting interface {
@ -86,6 +84,8 @@ type Posting interface {
Norm() float64 Norm() float64
Locations() []Location Locations() []Location
Size() int
} }
type Location interface { type Location interface {
@ -94,6 +94,7 @@ type Location interface {
End() uint64 End() uint64
Pos() uint64 Pos() uint64
ArrayPositions() []uint64 ArrayPositions() []uint64
Size() int
} }
// DocumentFieldTermVisitable is implemented by various scorch segment // DocumentFieldTermVisitable is implemented by various scorch segment

View File

@ -18,10 +18,18 @@ import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"io" "io"
"reflect"
"github.com/golang/snappy" "github.com/golang/snappy"
) )
var reflectStaticSizeMetaData int
func init() {
var md MetaData
reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
}
var termSeparator byte = 0xff var termSeparator byte = 0xff
var termSeparatorSplitSlice = []byte{termSeparator} var termSeparatorSplitSlice = []byte{termSeparator}

View File

@ -19,13 +19,21 @@ import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"math" "math"
"reflect"
"sort" "sort"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/size"
"github.com/golang/snappy" "github.com/golang/snappy"
) )
var reflectStaticSizedocValueIterator int
func init() {
var dvi docValueIterator
reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
}
type docValueIterator struct { type docValueIterator struct {
field string field string
curChunkNum uint64 curChunkNum uint64
@ -36,21 +44,12 @@ type docValueIterator struct {
curChunkData []byte // compressed data cache curChunkData []byte // compressed data cache
} }
func (di *docValueIterator) sizeInBytes() uint64 { func (di *docValueIterator) size() int {
// curChunkNum, numChunks, dvDataLoc --> uint64 return reflectStaticSizedocValueIterator + size.SizeOfPtr +
sizeInBytes := 24 len(di.field) +
len(di.chunkLens)*size.SizeOfUint64 +
// field len(di.curChunkHeader)*reflectStaticSizeMetaData +
sizeInBytes += (len(di.field) + int(segment.SizeOfString)) len(di.curChunkData)
// chunkLens, curChunkHeader
sizeInBytes += len(di.chunkLens)*8 +
len(di.curChunkHeader)*24 +
int(segment.SizeOfSlice*2) /* overhead from slices */
// curChunkData is mmap'ed, not included
return uint64(sizeInBytes)
} }
func (di *docValueIterator) fieldName() string { func (di *docValueIterator) fieldName() string {

View File

@ -19,12 +19,30 @@ import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"math" "math"
"reflect"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/Smerity/govarint" "github.com/Smerity/govarint"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizePostingsList int
var reflectStaticSizePostingsIterator int
var reflectStaticSizePosting int
var reflectStaticSizeLocation int
func init() {
var pl PostingsList
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
var pi PostingsIterator
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
var p Posting
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
// PostingsList is an in-memory represenation of a postings list // PostingsList is an in-memory represenation of a postings list
type PostingsList struct { type PostingsList struct {
sb *SegmentBase sb *SegmentBase
@ -36,6 +54,28 @@ type PostingsList struct {
except *roaring.Bitmap except *roaring.Bitmap
} }
func (p *PostingsList) Size() int {
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
if p.sb != nil {
sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part
}
if p.locBitmap != nil {
sizeInBytes += int(p.locBitmap.GetSizeInBytes())
}
if p.postings != nil {
sizeInBytes += int(p.postings.GetSizeInBytes())
}
if p.except != nil {
sizeInBytes += int(p.except.GetSizeInBytes())
}
return sizeInBytes
}
// Iterator returns an iterator for this postings list // Iterator returns an iterator for this postings list
func (p *PostingsList) Iterator() segment.PostingsIterator { func (p *PostingsList) Iterator() segment.PostingsIterator {
return p.iterator(nil) return p.iterator(nil)
@ -193,6 +233,25 @@ type PostingsIterator struct {
nextLocs []Location // reused across Next() calls nextLocs []Location // reused across Next() calls
} }
func (i *PostingsIterator) Size() int {
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
len(i.currChunkFreqNorm) +
len(i.currChunkLoc) +
len(i.freqChunkLens)*size.SizeOfUint64 +
len(i.locChunkLens)*size.SizeOfUint64 +
i.next.Size()
if i.locBitmap != nil {
sizeInBytes += int(i.locBitmap.GetSizeInBytes())
}
for _, entry := range i.nextLocs {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (i *PostingsIterator) loadChunk(chunk int) error { func (i *PostingsIterator) loadChunk(chunk int) error {
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) { if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens)) return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
@ -444,6 +503,20 @@ type Posting struct {
locs []segment.Location locs []segment.Location
} }
func (p *Posting) Size() int {
sizeInBytes := reflectStaticSizePosting
if p.iterator != nil {
sizeInBytes += p.iterator.Size()
}
for _, entry := range p.locs {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
// Number returns the document number of this posting in this segment // Number returns the document number of this posting in this segment
func (p *Posting) Number() uint64 { func (p *Posting) Number() uint64 {
return p.docNum return p.docNum
@ -473,6 +546,12 @@ type Location struct {
ap []uint64 ap []uint64
} }
func (l *Location) Size() int {
return reflectStaticSizeLocation +
len(l.field) +
len(l.ap)*size.SizeOfUint64
}
// Field returns the name of the field (useful in composite fields to know // Field returns the name of the field (useful in composite fields to know
// which original field the value came from) // which original field the value came from)
func (l *Location) Field() string { func (l *Location) Field() string {

View File

@ -20,16 +20,25 @@ import (
"fmt" "fmt"
"io" "io"
"os" "os"
"reflect"
"sync" "sync"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/Smerity/govarint" "github.com/Smerity/govarint"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
"github.com/couchbase/vellum" "github.com/couchbase/vellum"
mmap "github.com/edsrzf/mmap-go" mmap "github.com/edsrzf/mmap-go"
"github.com/golang/snappy" "github.com/golang/snappy"
) )
var reflectStaticSizeSegmentBase int
func init() {
var sb SegmentBase
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
}
// Open returns a zap impl of a segment // Open returns a zap impl of a segment
func Open(path string) (segment.Segment, error) { func Open(path string) (segment.Segment, error) {
f, err := os.Open(path) f, err := os.Open(path)
@ -92,6 +101,32 @@ type SegmentBase struct {
fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
} }
func (sb *SegmentBase) Size() int {
sizeInBytes := reflectStaticSizeSegmentBase +
len(sb.mem)
// fieldsMap
for k, _ := range sb.fieldsMap {
sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
}
// fieldsInv, dictLocs
for _, entry := range sb.fieldsInv {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
// fieldDvIterMap
for _, v := range sb.fieldDvIterMap {
sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
if v != nil {
sizeInBytes += v.size()
}
}
return sizeInBytes
}
func (sb *SegmentBase) AddRef() {} func (sb *SegmentBase) AddRef() {}
func (sb *SegmentBase) DecRef() (err error) { return nil } func (sb *SegmentBase) DecRef() (err error) { return nil }
func (sb *SegmentBase) Close() (err error) { return nil } func (sb *SegmentBase) Close() (err error) { return nil }
@ -111,56 +146,19 @@ type Segment struct {
refs int64 refs int64
} }
func (s *Segment) SizeInBytes() uint64 { func (s *Segment) Size() int {
// 8 /* size of file pointer */ // 8 /* size of file pointer */
// 4 /* size of version -> uint32 */ // 4 /* size of version -> uint32 */
// 4 /* size of crc -> uint32 */ // 4 /* size of crc -> uint32 */
sizeOfUints := 16 sizeOfUints := 16
sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
// mutex, refs -> int64 // mutex, refs -> int64
sizeInBytes += 16 sizeInBytes += 16
// do not include the mmap'ed part // do not include the mmap'ed part
return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem)) return sizeInBytes + s.SegmentBase.Size() - len(s.mem)
}
func (s *SegmentBase) SizeInBytes() uint64 {
// 4 /* size of memCRC -> uint32 */
// 4 /* size of chunkFactor -> uint32 */
// 8 /* size of numDocs -> uint64 */
// 8 /* size of storedIndexOffset -> uint64 */
// 8 /* size of fieldsIndexOffset -> uint64 */
// 8 /* size of docValueOffset -> uint64 */
sizeInBytes := 40
sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
// fieldsMap
for k, _ := range s.fieldsMap {
sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
}
sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
// fieldsInv, dictLocs
for _, entry := range s.fieldsInv {
sizeInBytes += (len(entry) + int(segment.SizeOfString))
}
sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */
sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
// fieldDvIterMap
sizeInBytes += len(s.fieldDvIterMap) *
int(segment.SizeOfPointer+2 /* size of uint16 */)
for _, entry := range s.fieldDvIterMap {
if entry != nil {
sizeInBytes += int(entry.sizeInBytes())
}
}
sizeInBytes += int(segment.SizeOfMap)
return uint64(sizeInBytes)
} }
func (s *Segment) AddRef() { func (s *Segment) AddRef() {

View File

@ -27,6 +27,7 @@ import (
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
) )
type asynchSegmentResult struct { type asynchSegmentResult struct {
@ -89,6 +90,12 @@ func (i *IndexSnapshot) Close() error {
return i.DecRef() return i.DecRef()
} }
func (i *IndexSnapshot) Size() int {
// Just return the size of the pointer for estimating the overhead
// during Search, a reference of the IndexSnapshot serves as the reader.
return size.SizeOfPtr
}
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult) results := make(chan *asynchSegmentResult)

View File

@ -16,17 +16,30 @@ package scorch
import ( import (
"bytes" "bytes"
"reflect"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeIndexSnapshotDocIDReader int
func init() {
var isdr IndexSnapshotDocIDReader
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
}
type IndexSnapshotDocIDReader struct { type IndexSnapshotDocIDReader struct {
snapshot *IndexSnapshot snapshot *IndexSnapshot
iterators []roaring.IntIterable iterators []roaring.IntIterable
segmentOffset int segmentOffset int
} }
func (i *IndexSnapshotDocIDReader) Size() int {
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
}
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) { func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
for i.segmentOffset < len(i.iterators) { for i.segmentOffset < len(i.iterators) {
if !i.iterators[i.segmentOffset].HasNext() { if !i.iterators[i.segmentOffset].HasNext() {

View File

@ -16,12 +16,21 @@ package scorch
import ( import (
"bytes" "bytes"
"reflect"
"sync/atomic" "sync/atomic"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeIndexSnapshotTermFieldReader int
func init() {
var istfr IndexSnapshotTermFieldReader
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
}
type IndexSnapshotTermFieldReader struct { type IndexSnapshotTermFieldReader struct {
term []byte term []byte
field string field string
@ -36,6 +45,27 @@ type IndexSnapshotTermFieldReader struct {
currID index.IndexInternalID currID index.IndexInternalID
} }
func (i *IndexSnapshotTermFieldReader) Size() int {
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
len(i.term) +
len(i.field) +
len(i.currID)
for _, entry := range i.postings {
sizeInBytes += entry.Size()
}
for _, entry := range i.iterators {
sizeInBytes += entry.Size()
}
if i.currPosting != nil {
sizeInBytes += i.currPosting.Size()
}
return sizeInBytes
}
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
rv := preAlloced rv := preAlloced
if rv == nil { if rv == nil {

View File

@ -213,7 +213,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
return nil return nil
} }
func (c *cachedDocs) sizeInBytes() uint64 { func (c *cachedDocs) size() int {
sizeInBytes := 0 sizeInBytes := 0
c.m.Lock() c.m.Lock()
for k, v := range c.cache { // cachedFieldDocs for k, v := range c.cache { // cachedFieldDocs
@ -225,5 +225,5 @@ func (c *cachedDocs) sizeInBytes() uint64 {
} }
} }
c.m.Unlock() c.m.Unlock()
return uint64(sizeInBytes) return sizeInBytes
} }

View File

@ -15,17 +15,31 @@
package upsidedown package upsidedown
import ( import (
"reflect"
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeIndexReader int
func init() {
var ir IndexReader
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
}
type IndexReader struct { type IndexReader struct {
index *UpsideDownCouch index *UpsideDownCouch
kvreader store.KVReader kvreader store.KVReader
docCount uint64 docCount uint64
} }
func (i *IndexReader) Size() int {
return reflectStaticSizeIndexReader + size.SizeOfPtr
}
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists { if fieldExists {

View File

@ -16,13 +16,27 @@ package upsidedown
import ( import (
"bytes" "bytes"
"reflect"
"sort" "sort"
"sync/atomic" "sync/atomic"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeUpsideDownCouchTermFieldReader int
var reflectStaticSizeUpsideDownCouchDocIDReader int
func init() {
var tfr UpsideDownCouchTermFieldReader
reflectStaticSizeUpsideDownCouchTermFieldReader =
int(reflect.TypeOf(tfr).Size())
var cdr UpsideDownCouchDocIDReader
reflectStaticSizeUpsideDownCouchDocIDReader =
int(reflect.TypeOf(cdr).Size())
}
type UpsideDownCouchTermFieldReader struct { type UpsideDownCouchTermFieldReader struct {
count uint64 count uint64
indexReader *IndexReader indexReader *IndexReader
@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
includeTermVectors bool includeTermVectors bool
} }
func (r *UpsideDownCouchTermFieldReader) Size() int {
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
len(r.term) +
r.tfrPrealloc.Size() +
len(r.keyBuf)
if r.tfrNext != nil {
sizeInBytes += r.tfrNext.Size()
}
return sizeInBytes
}
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
bufNeeded := termFrequencyRowKeySize(term, nil) bufNeeded := termFrequencyRowKeySize(term, nil)
if bufNeeded < dictionaryRowKeySize(term) { if bufNeeded < dictionaryRowKeySize(term) {
@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
onlyMode bool onlyMode bool
} }
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { func (r *UpsideDownCouchDocIDReader) Size() int {
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
r.indexReader.Size()
for _, entry := range r.only {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
startBytes := []byte{0x0} startBytes := []byte{0x0}
endBytes := []byte{0xff} endBytes := []byte{0xff}

View File

@ -20,10 +20,22 @@ import (
"fmt" "fmt"
"io" "io"
"math" "math"
"reflect"
"github.com/blevesearch/bleve/size"
"github.com/golang/protobuf/proto" "github.com/golang/protobuf/proto"
) )
var reflectStaticSizeTermFrequencyRow int
var reflectStaticSizeTermVector int
func init() {
var tfr TermFrequencyRow
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
var tv TermVector
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
}
const ByteSeparator byte = 0xff const ByteSeparator byte = 0xff
type UpsideDownCouchRowStream chan UpsideDownCouchRow type UpsideDownCouchRowStream chan UpsideDownCouchRow
@ -358,6 +370,11 @@ type TermVector struct {
end uint64 end uint64
} }
func (tv *TermVector) Size() int {
return reflectStaticSizeTermVector + size.SizeOfPtr +
len(tv.arrayPositions)*size.SizeOfUint64
}
func (tv *TermVector) String() string { func (tv *TermVector) String() string {
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
} }
@ -371,6 +388,18 @@ type TermFrequencyRow struct {
field uint16 field uint16
} }
func (tfr *TermFrequencyRow) Size() int {
sizeInBytes := reflectStaticSizeTermFrequencyRow +
len(tfr.term) +
len(tfr.doc)
for _, entry := range tfr.vectors {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (tfr *TermFrequencyRow) Term() []byte { func (tfr *TermFrequencyRow) Term() []byte {
return tfr.term return tfr.term
} }

View File

@ -362,8 +362,59 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
return i.SearchInContext(context.Background(), req) return i.SearchInContext(context.Background(), req)
} }
// memNeededForSearch is a helper function that returns an estimate of RAM
// needed to execute a search request.
func memNeededForSearch(req *SearchRequest,
searcher search.Searcher,
topnCollector *collector.TopNCollector) uint64 {
backingSize := req.Size + req.From + 1
if req.Size+req.From > collector.PreAllocSizeSkipCap {
backingSize = collector.PreAllocSizeSkipCap + 1
}
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
estimate := 0
// overhead, size in bytes from collector
estimate += topnCollector.Size()
var dm search.DocumentMatch
sizeOfDocumentMatch := dm.Size()
// pre-allocing DocumentMatchPool
var sc search.SearchContext
estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch
// searcher overhead
estimate += searcher.Size()
// overhead from results, lowestMatchOutsideResults
estimate += (numDocMatches + 1) * sizeOfDocumentMatch
// additional overhead from SearchResult
var sr SearchResult
estimate += sr.Size()
// overhead from facet results
if req.Facets != nil {
var fr search.FacetResult
estimate += len(req.Facets) * fr.Size()
}
// highlighting, store
var d document.Document
if len(req.Fields) > 0 || req.Highlight != nil {
for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits
estimate += (req.Size + req.From) * d.Size()
}
}
return uint64(estimate)
}
// SearchInContext executes a search request operation within the provided // SearchInContext executes a search request operation within the provided
// Context. Returns a SearchResult object or an error. // Context. Returns a SearchResult object or an error.
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
i.mutex.RLock() i.mutex.RLock()
defer i.mutex.RUnlock() defer i.mutex.RUnlock()

View File

@ -36,6 +36,9 @@ import (
"github.com/blevesearch/bleve/mapping" "github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/query" "github.com/blevesearch/bleve/search/query"
"github.com/blevesearch/bleve/index/scorch"
"github.com/blevesearch/bleve/index/upsidedown"
) )
func TestCrud(t *testing.T) { func TestCrud(t *testing.T) {
@ -1815,3 +1818,55 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
} }
func benchmarkSearchOverhead(indexType string, b *testing.B) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
b.Fatal(err)
}
}()
index, err := NewUsing("testidx", NewIndexMapping(),
indexType, Config.DefaultKVStore, nil)
if err != nil {
b.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
b.Fatal(err)
}
}()
elements := []string{"air", "water", "fire", "earth"}
for j := 0; j < 10000; j++ {
err = index.Index(fmt.Sprintf("%d", j),
map[string]interface{}{"name": elements[j%len(elements)]})
if err != nil {
b.Fatal(err)
}
}
query1 := NewTermQuery("water")
query2 := NewTermQuery("fire")
query := NewDisjunctionQuery(query1, query2)
req := NewSearchRequest(query)
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err = index.Search(req)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
benchmarkSearchOverhead(upsidedown.Name, b)
}
func BenchmarkScorchSearchOverhead(b *testing.B) {
benchmarkSearchOverhead(scorch.Name, b)
}

View File

@ -17,6 +17,7 @@ package bleve
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"reflect"
"time" "time"
"github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/analysis"
@ -24,8 +25,19 @@ import (
"github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/query" "github.com/blevesearch/bleve/search/query"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeSearchResult int
var reflectStaticSizeSearchStatus int
func init() {
var sr SearchResult
reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
var ss SearchStatus
reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
}
var cache = registry.NewCache() var cache = registry.NewCache()
const defaultDateTimeParser = optional.Name const defaultDateTimeParser = optional.Name
@ -432,6 +444,24 @@ type SearchResult struct {
Facets search.FacetResults `json:"facets"` Facets search.FacetResults `json:"facets"`
} }
func (sr *SearchResult) Size() int {
sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
reflectStaticSizeSearchStatus
for _, entry := range sr.Hits {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for k, v := range sr.Facets {
sizeInBytes += size.SizeOfString + len(k) +
v.Size()
}
return sizeInBytes
}
func (sr *SearchResult) String() string { func (sr *SearchResult) String() string {
rv := "" rv := ""
if sr.Total > 0 { if sr.Total > 0 {

View File

@ -15,6 +15,8 @@
package collector package collector
import ( import (
"reflect"
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
@ -25,6 +27,18 @@ type stubSearcher struct {
matches []*search.DocumentMatch matches []*search.DocumentMatch
} }
func (ss *stubSearcher) Size() int {
sizeInBytes := int(reflect.TypeOf(*ss).Size())
for _, entry := range ss.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if ss.index < len(ss.matches) { if ss.index < len(ss.matches) {
rv := ctx.DocumentMatchPool.Get() rv := ctx.DocumentMatchPool.Get()
@ -76,6 +90,10 @@ func (ss *stubSearcher) DocumentMatchPoolSize() int {
type stubReader struct{} type stubReader struct{}
func (sr *stubReader) Size() int {
return 0
}
func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
return nil, nil return nil, nil
} }

View File

@ -16,12 +16,21 @@ package collector
import ( import (
"context" "context"
"reflect"
"time" "time"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeTopNCollector int
func init() {
var coll TopNCollector
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
}
type collectorStore interface { type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size // Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been // the last element is removed and returned. If the size has not been
@ -98,6 +107,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
return hc return hc
} }
func (hc *TopNCollector) Size() int {
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
if hc.facetsBuilder != nil {
sizeInBytes += hc.facetsBuilder.Size()
}
for _, entry := range hc.neededFields {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
return sizeInBytes
}
// Collect goes to the index to find the matching documents // Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error { func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now() startTime := time.Now()

View File

@ -17,8 +17,18 @@ package search
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"reflect"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeExplanation int
func init() {
var e Explanation
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
}
type Explanation struct { type Explanation struct {
Value float64 `json:"value"` Value float64 `json:"value"`
Message string `json:"message"` Message string `json:"message"`
@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
} }
return string(js) return string(js)
} }
func (expl *Explanation) Size() int {
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
len(expl.Message)
for _, entry := range expl.Children {
sizeInBytes += entry.Size()
}
return sizeInBytes
}

View File

@ -15,13 +15,25 @@
package facet package facet
import ( import (
"reflect"
"sort" "sort"
"time" "time"
"github.com/blevesearch/bleve/numeric" "github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeDateTimeFacetBuilder int
var reflectStaticSizedateTimeRange int
func init() {
var dtfb DateTimeFacetBuilder
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
var dtr dateTimeRange
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
}
type dateTimeRange struct { type dateTimeRange struct {
start time.Time start time.Time
end time.Time end time.Time
@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
} }
} }
func (fb *DateTimeFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizedateTimeRange
}
return sizeInBytes
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) { func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{ r := dateTimeRange{
start: start, start: start,

View File

@ -15,12 +15,24 @@
package facet package facet
import ( import (
"reflect"
"sort" "sort"
"github.com/blevesearch/bleve/numeric" "github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeNumericFacetBuilder int
var reflectStaticSizenumericRange int
func init() {
var nfb NumericFacetBuilder
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
var nr numericRange
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
}
type numericRange struct { type numericRange struct {
min *float64 min *float64
max *float64 max *float64
@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
} }
} }
func (fb *NumericFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizenumericRange
}
return sizeInBytes
}
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) { func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{ r := numericRange{
min: min, min: min,

View File

@ -15,11 +15,20 @@
package facet package facet
import ( import (
"reflect"
"sort" "sort"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeTermsFacetBuilder int
func init() {
var tfb TermsFacetBuilder
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
}
type TermsFacetBuilder struct { type TermsFacetBuilder struct {
size int size int
field string field string
@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
} }
} }
func (fb *TermsFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
return sizeInBytes
}
func (fb *TermsFacetBuilder) Field() string { func (fb *TermsFacetBuilder) Field() string {
return fb.field return fb.field
} }

View File

@ -15,11 +15,32 @@
package search package search
import ( import (
"reflect"
"sort" "sort"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeFacetsBuilder int
var reflectStaticSizeFacetResult int
var reflectStaticSizeTermFacet int
var reflectStaticSizeNumericRangeFacet int
var reflectStaticSizeDateRangeFacet int
func init() {
var fb FacetsBuilder
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
var fr FacetResult
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
var tf TermFacet
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
var nrf NumericRangeFacet
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
var drf DateRangeFacet
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
}
type FacetBuilder interface { type FacetBuilder interface {
StartDoc() StartDoc()
UpdateVisitor(field string, term []byte) UpdateVisitor(field string, term []byte)
@ -27,6 +48,8 @@ type FacetBuilder interface {
Result() *FacetResult Result() *FacetResult
Field() string Field() string
Size() int
} }
type FacetsBuilder struct { type FacetsBuilder struct {
@ -42,6 +65,22 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
} }
} }
func (fb *FacetsBuilder) Size() int {
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
fb.indexReader.Size()
for k, v := range fb.facets {
sizeInBytes += size.SizeOfString + len(k) +
v.Size()
}
for _, entry := range fb.fields {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder fb.facets[name] = facetBuilder
fb.fields = append(fb.fields, facetBuilder.Field()) fb.fields = append(fb.fields, facetBuilder.Field())
@ -213,6 +252,14 @@ type FacetResult struct {
DateRanges DateRangeFacets `json:"date_ranges,omitempty"` DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
} }
func (fr *FacetResult) Size() int {
return reflectStaticSizeFacetResult + size.SizeOfPtr +
len(fr.Field) +
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
}
func (fr *FacetResult) Merge(other *FacetResult) { func (fr *FacetResult) Merge(other *FacetResult) {
fr.Total += other.Total fr.Total += other.Total
fr.Missing += other.Missing fr.Missing += other.Missing

View File

@ -14,6 +14,17 @@
package search package search
import (
"reflect"
)
var reflectStaticSizeDocumentMatchPool int
func init() {
var dmp DocumentMatchPool
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
}
// DocumentMatchPoolTooSmall is a callback function that can be executed // DocumentMatchPoolTooSmall is a callback function that can be executed
// when the DocumentMatchPool does not have sufficient capacity // when the DocumentMatchPool does not have sufficient capacity
// By default we just perform just-in-time allocation, but you could log // By default we just perform just-in-time allocation, but you could log

View File

@ -15,13 +15,27 @@
package scorer package scorer
import ( import (
"reflect"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeConjunctionQueryScorer int
func init() {
var cqs ConjunctionQueryScorer
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
}
type ConjunctionQueryScorer struct { type ConjunctionQueryScorer struct {
options search.SearcherOptions options search.SearcherOptions
} }
func (s *ConjunctionQueryScorer) Size() int {
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
}
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer { func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{ return &ConjunctionQueryScorer{
options: options, options: options,

View File

@ -16,11 +16,20 @@ package scorer
import ( import (
"fmt" "fmt"
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeConstantScorer int
func init() {
var cs ConstantScorer
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
}
type ConstantScorer struct { type ConstantScorer struct {
constant float64 constant float64
boost float64 boost float64
@ -30,6 +39,16 @@ type ConstantScorer struct {
queryWeightExplanation *search.Explanation queryWeightExplanation *search.Explanation
} }
func (s *ConstantScorer) Size() int {
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer { func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{ rv := ConstantScorer{
options: options, options: options,

View File

@ -16,14 +16,27 @@ package scorer
import ( import (
"fmt" "fmt"
"reflect"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeDisjunctionQueryScorer int
func init() {
var dqs DisjunctionQueryScorer
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
}
type DisjunctionQueryScorer struct { type DisjunctionQueryScorer struct {
options search.SearcherOptions options search.SearcherOptions
} }
func (s *DisjunctionQueryScorer) Size() int {
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
}
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer { func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{ return &DisjunctionQueryScorer{
options: options, options: options,

View File

@ -17,11 +17,20 @@ package scorer
import ( import (
"fmt" "fmt"
"math" "math"
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeTermQueryScorer int
func init() {
var tqs TermQueryScorer
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
}
type TermQueryScorer struct { type TermQueryScorer struct {
queryTerm []byte queryTerm []byte
queryField string queryField string
@ -36,6 +45,21 @@ type TermQueryScorer struct {
queryWeightExplanation *search.Explanation queryWeightExplanation *search.Explanation
} }
func (s *TermQueryScorer) Size() int {
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
len(s.queryTerm) + len(s.queryField)
if s.idfExplanation != nil {
sizeInBytes += s.idfExplanation.Size()
}
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{ rv := TermQueryScorer{
queryTerm: queryTerm, queryTerm: queryTerm,

View File

@ -16,11 +16,26 @@ package search
import ( import (
"fmt" "fmt"
"reflect"
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeDocumentMatch int
var reflectStaticSizeSearchContext int
var reflectStaticSizeLocation int
func init() {
var dm DocumentMatch
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
var sc SearchContext
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
type ArrayPositions []uint64 type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool { func (ap ArrayPositions) Equals(other ArrayPositions) bool {
@ -47,6 +62,11 @@ type Location struct {
ArrayPositions ArrayPositions `json:"array_positions"` ArrayPositions ArrayPositions `json:"array_positions"`
} }
func (l *Location) Size() int {
return reflectStaticSizeLocation + size.SizeOfPtr +
len(l.ArrayPositions)*size.SizeOfUint64
}
type Locations []*Location type Locations []*Location
type TermLocationMap map[string]Locations type TermLocationMap map[string]Locations
@ -117,6 +137,52 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
return dm return dm
} }
func (dm *DocumentMatch) Size() int {
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
len(dm.Index) +
len(dm.ID) +
len(dm.IndexInternalID)
if dm.Expl != nil {
sizeInBytes += dm.Expl.Size()
}
for k, v := range dm.Locations {
sizeInBytes += size.SizeOfString + len(k)
for k1, v1 := range v {
sizeInBytes += size.SizeOfString + len(k1) +
size.SizeOfSlice
for _, entry := range v1 {
sizeInBytes += entry.Size()
}
}
}
for k, v := range dm.Fragments {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfSlice
for _, entry := range v {
sizeInBytes += size.SizeOfString + len(entry)
}
}
for _, entry := range dm.Sort {
sizeInBytes += size.SizeOfString + len(entry)
}
for k, _ := range dm.Fields {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr
}
if dm.Document != nil {
sizeInBytes += dm.Document.Size()
}
return sizeInBytes
}
func (dm *DocumentMatch) String() string { func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score) return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
} }
@ -135,6 +201,7 @@ type Searcher interface {
SetQueryNorm(float64) SetQueryNorm(float64)
Count() uint64 Count() uint64
Min() int Min() int
Size() int
DocumentMatchPoolSize() int DocumentMatchPoolSize() int
} }
@ -148,3 +215,18 @@ type SearcherOptions struct {
type SearchContext struct { type SearchContext struct {
DocumentMatchPool *DocumentMatchPool DocumentMatchPool *DocumentMatchPool
} }
func (sc *SearchContext) Size() int {
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
if sc.DocumentMatchPool != nil {
for _, entry := range sc.DocumentMatchPool.avail {
if entry != nil {
sizeInBytes += entry.Size()
}
}
}
return sizeInBytes
}

View File

@ -16,12 +16,21 @@ package searcher
import ( import (
"math" "math"
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer" "github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeBooleanSearcher int
func init() {
var bs BooleanSearcher
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
}
type BooleanSearcher struct { type BooleanSearcher struct {
indexReader index.IndexReader indexReader index.IndexReader
mustSearcher search.Searcher mustSearcher search.Searcher
@ -52,6 +61,33 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
return &rv, nil return &rv, nil
} }
func (s *BooleanSearcher) Size() int {
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
s.indexReader.Size()
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.shouldSearcher != nil {
sizeInBytes += s.shouldSearcher.Size()
}
if s.mustNotSearcher != nil {
sizeInBytes += s.mustNotSearcher.Size()
}
sizeInBytes += s.scorer.Size()
for _, entry := range s.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *BooleanSearcher) computeQueryNorm() { func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights // first calculate sum of squared weights
sumOfSquaredWeights := 0.0 sumOfSquaredWeights := 0.0

View File

@ -16,13 +16,22 @@ package searcher
import ( import (
"math" "math"
"reflect"
"sort" "sort"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer" "github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeConjunctionSearcher int
func init() {
var cs ConjunctionSearcher
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
}
type ConjunctionSearcher struct { type ConjunctionSearcher struct {
indexReader index.IndexReader indexReader index.IndexReader
searchers OrderedSearcherList searchers OrderedSearcherList
@ -54,6 +63,23 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
return &rv, nil return &rv, nil
} }
func (s *ConjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *ConjunctionSearcher) computeQueryNorm() { func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights // first calculate sum of squared weights
sumOfSquaredWeights := 0.0 sumOfSquaredWeights := 0.0

View File

@ -17,13 +17,22 @@ package searcher
import ( import (
"fmt" "fmt"
"math" "math"
"reflect"
"sort" "sort"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer" "github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeDisjunctionSearcher int
func init() {
var ds DisjunctionSearcher
reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size())
}
// DisjunctionMaxClauseCount is a compile time setting that applications can // DisjunctionMaxClauseCount is a compile time setting that applications can
// adjust to non-zero value to cause the DisjunctionSearcher to return an // adjust to non-zero value to cause the DisjunctionSearcher to return an
// error instead of exeucting searches when the size exceeds this value. // error instead of exeucting searches when the size exceeds this value.
@ -90,6 +99,32 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
return &rv, nil return &rv, nil
} }
func (s *DisjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
s.indexReader.Size() +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
return sizeInBytes
}
func (s *DisjunctionSearcher) computeQueryNorm() { func (s *DisjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights // first calculate sum of squared weights
sumOfSquaredWeights := 0.0 sumOfSquaredWeights := 0.0

View File

@ -15,11 +15,21 @@
package searcher package searcher
import ( import (
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer" "github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeDocIDSearcher int
func init() {
var ds DocIDSearcher
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
}
// DocIDSearcher returns documents matching a predefined set of identifiers. // DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct { type DocIDSearcher struct {
reader index.DocIDReader reader index.DocIDReader
@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
}, nil }, nil
} }
func (s *DocIDSearcher) Size() int {
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *DocIDSearcher) Count() uint64 { func (s *DocIDSearcher) Count() uint64 {
return uint64(s.count) return uint64(s.count)
} }

View File

@ -15,10 +15,20 @@
package searcher package searcher
import ( import (
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeFilteringSearcher int
func init() {
var fs FilteringSearcher
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
}
// FilterFunc defines a function which can filter documents // FilterFunc defines a function which can filter documents
// returning true means keep the document // returning true means keep the document
// returning false means do not keep the document // returning false means do not keep the document
@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
} }
} }
func (f *FilteringSearcher) Size() int {
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
f.child.Size()
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx) next, err := f.child.Next(ctx)
for next != nil && err == nil { for next != nil && err == nil {

View File

@ -15,11 +15,21 @@
package searcher package searcher
import ( import (
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer" "github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeMatchAllSearcher int
func init() {
var mas MatchAllSearcher
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
}
type MatchAllSearcher struct { type MatchAllSearcher struct {
indexReader index.IndexReader indexReader index.IndexReader
reader index.DocIDReader reader index.DocIDReader
@ -46,6 +56,13 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
}, nil }, nil
} }
func (s *MatchAllSearcher) Size() int {
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
s.indexReader.Size() +
s.reader.Size() +
s.scorer.Size()
}
func (s *MatchAllSearcher) Count() uint64 { func (s *MatchAllSearcher) Count() uint64 {
return s.count return s.count
} }

View File

@ -15,10 +15,20 @@
package searcher package searcher
import ( import (
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeMatchNoneSearcher int
func init() {
var mns MatchNoneSearcher
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
}
type MatchNoneSearcher struct { type MatchNoneSearcher struct {
indexReader index.IndexReader indexReader index.IndexReader
} }
@ -29,6 +39,11 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
}, nil }, nil
} }
func (s *MatchNoneSearcher) Size() int {
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
s.indexReader.Size()
}
func (s *MatchNoneSearcher) Count() uint64 { func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0) return uint64(0)
} }

View File

@ -17,11 +17,20 @@ package searcher
import ( import (
"fmt" "fmt"
"math" "math"
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizePhraseSearcher int
func init() {
var ps PhraseSearcher
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
}
type PhraseSearcher struct { type PhraseSearcher struct {
indexReader index.IndexReader indexReader index.IndexReader
mustSearcher *ConjunctionSearcher mustSearcher *ConjunctionSearcher
@ -32,6 +41,28 @@ type PhraseSearcher struct {
initialized bool initialized bool
} }
func (s *PhraseSearcher) Size() int {
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
s.indexReader.Size()
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.currMust != nil {
sizeInBytes += s.currMust.Size()
}
for _, entry := range s.terms {
sizeInBytes += size.SizeOfSlice
for _, entry1 := range entry {
sizeInBytes += size.SizeOfString + len(entry1)
}
}
return sizeInBytes
}
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) { func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string // turn flat terms []string into [][]string
mterms := make([][]string, len(terms)) mterms := make([][]string, len(terms))

View File

@ -15,11 +15,21 @@
package searcher package searcher
import ( import (
"reflect"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer" "github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
) )
var reflectStaticSizeTermSearcher int
func init() {
var ts TermSearcher
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
}
type TermSearcher struct { type TermSearcher struct {
indexReader index.IndexReader indexReader index.IndexReader
reader index.TermFieldReader reader index.TermFieldReader
@ -63,6 +73,14 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
}, nil }, nil
} }
func (s *TermSearcher) Size() int {
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
s.indexReader.Size() +
s.reader.Size() +
s.tfd.Size() +
s.scorer.Size()
}
func (s *TermSearcher) Count() uint64 { func (s *TermSearcher) Count() uint64 {
return s.reader.Count() return s.reader.Count()
} }

57
size/sizes.go Normal file
View File

@ -0,0 +1,57 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package size
import (
"reflect"
)
func init() {
var a bool
SizeOfBool = int(reflect.TypeOf(a).Size())
var b float32
SizeOfFloat32 = int(reflect.TypeOf(b).Size())
var c float64
SizeOfFloat64 = int(reflect.TypeOf(c).Size())
var d map[int]int
SizeOfMap = int(reflect.TypeOf(d).Size())
var e *int
SizeOfPtr = int(reflect.TypeOf(e).Size())
var f []int
SizeOfSlice = int(reflect.TypeOf(f).Size())
var g string
SizeOfString = int(reflect.TypeOf(g).Size())
var h uint8
SizeOfUint8 = int(reflect.TypeOf(h).Size())
var i uint16
SizeOfUint16 = int(reflect.TypeOf(i).Size())
var j uint32
SizeOfUint32 = int(reflect.TypeOf(j).Size())
var k uint64
SizeOfUint64 = int(reflect.TypeOf(k).Size())
}
var SizeOfBool int
var SizeOfFloat32 int
var SizeOfFloat64 int
var SizeOfInt int
var SizeOfMap int
var SizeOfPtr int
var SizeOfSlice int
var SizeOfString int
var SizeOfUint8 int
var SizeOfUint16 int
var SizeOfUint32 int
var SizeOfUint64 int