0
0
Fork 0

MB-28162: Provide API to estimate memory needed to run a search query

This API (unexported) will estimate the amount of memory needed to execute
a search query over an index before the collector begins data collection.

Sample estimates for certain queries:
{Size: 10, BenchmarkUpsidedownSearchOverhead}
                                                           ESTIMATE    BENCHMEM
TermQuery                                                  4616        4796
MatchQuery                                                 5210        5405
DisjunctionQuery (Match queries)                           7700        8447
DisjunctionQuery (Term queries)                            6514        6591
ConjunctionQuery (Match queries)                           7524        8175
Nested disjunction query (disjunction of disjunctions)     10306       10708
…
This commit is contained in:
abhinavdangeti 2018-03-01 17:12:16 -08:00 committed by Abhinav Dangeti
parent 2b005f1e23
commit 7e36109b3c
48 changed files with 1242 additions and 118 deletions

View File

@ -14,7 +14,19 @@
package document
import "fmt"
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDocument int
func init() {
var d Document
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
}
type Document struct {
ID string `json:"id"`
@ -30,6 +42,13 @@ func NewDocument(id string) *Document {
}
}
func (d *Document) Size() int {
return reflectStaticSizeDocument + size.SizeOfPtr +
len(d.ID) +
len(d.Fields)*size.SizeOfPtr +
len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField)
}
func (d *Document) AddField(f Field) *Document {
switch f := f.(type) {
case *CompositeField:

View File

@ -15,9 +15,18 @@
package document
import (
"reflect"
"github.com/blevesearch/bleve/analysis"
)
var reflectStaticSizeCompositeField int
func init() {
var cf CompositeField
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
}
const DefaultCompositeIndexingOptions = IndexField
type CompositeField struct {

View File

@ -18,11 +18,23 @@ import (
"bytes"
"encoding/json"
"fmt"
"reflect"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermFieldDoc int
var reflectStaticSizeTermFieldVector int
func init() {
var tfd TermFieldDoc
reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
var tfv TermFieldVector
reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
}
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
type Index interface {
@ -82,6 +94,8 @@ type IndexReader interface {
DumpFields() chan interface{}
Close() error
Size() int
}
// FieldTerms contains the terms used by a document, keyed by field
@ -115,6 +129,11 @@ type TermFieldVector struct {
End uint64
}
func (tfv *TermFieldVector) Size() int {
return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
}
// IndexInternalID is an opaque document identifier interal to the index impl
type IndexInternalID []byte
@ -134,6 +153,17 @@ type TermFieldDoc struct {
Vectors []*TermFieldVector
}
func (tfd *TermFieldDoc) Size() int {
sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
len(tfd.Term) + len(tfd.ID)
for _, entry := range tfd.Vectors {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
// Reset allows an already allocated TermFieldDoc to be reused
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
// remember the []byte used for the ID
@ -161,6 +191,8 @@ type TermFieldReader interface {
// Count returns the number of documents contains the term in this field.
Count() uint64
Close() error
Size() int
}
type DictEntry struct {
@ -185,6 +217,9 @@ type DocIDReader interface {
// will start there instead. If ID is greater than or equal to the end of
// the range, Next() call will return io.EOF.
Advance(ID IndexInternalID) (IndexInternalID, error)
Size() int
Close() error
}

View File

@ -472,20 +472,20 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
}
func (s *Scorch) MemoryUsed() uint64 {
var memUsed uint64
var memUsed int
s.rootLock.RLock()
if s.root != nil {
for _, segmentSnapshot := range s.root.segment {
memUsed += 8 /* size of id -> uint64 */ +
segmentSnapshot.segment.SizeInBytes()
segmentSnapshot.segment.Size()
if segmentSnapshot.deleted != nil {
memUsed += segmentSnapshot.deleted.GetSizeInBytes()
memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
}
memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
memUsed += segmentSnapshot.cachedDocs.size()
}
}
s.rootLock.RUnlock()
return memUsed
return uint64(memUsed)
}
func (s *Scorch) markIneligibleForRemoval(filename string) {

View File

@ -46,6 +46,10 @@ func (e *EmptySegment) Close() error {
return nil
}
func (e *EmptySegment) Size() uint64 {
return 0
}
func (e *EmptySegment) AddRef() {
}
@ -84,6 +88,10 @@ func (e *EmptyPostingsList) Iterator() PostingsIterator {
return &EmptyPostingsIterator{}
}
func (e *EmptyPostingsList) Size() int {
return 0
}
func (e *EmptyPostingsList) Count() uint64 {
return 0
}
@ -93,3 +101,7 @@ type EmptyPostingsIterator struct{}
func (e *EmptyPostingsIterator) Next() (Posting, error) {
return nil, nil
}
func (e *EmptyPostingsIterator) Size() int {
return 0
}

View File

@ -45,7 +45,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
}
// compute memory usage of segment
s.updateSizeInBytes()
s.updateSize()
// professional debugging
//

View File

@ -15,14 +15,23 @@
package mem
import (
"reflect"
"sort"
"strings"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDictionary int
func init() {
var d Dictionary
reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size())
}
// Dictionary is the in-memory representation of the term dictionary
type Dictionary struct {
segment *Segment
@ -30,6 +39,17 @@ type Dictionary struct {
fieldID uint16
}
func (d *Dictionary) Size() int {
sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr +
len(d.field)
if d.segment != nil {
sizeInBytes += int(d.segment.Size())
}
return sizeInBytes
}
// PostingsList returns the postings list for the specified term
func (d *Dictionary) PostingsList(term string,
except *roaring.Bitmap) (segment.PostingsList, error) {

View File

@ -15,10 +15,29 @@
package mem
import (
"reflect"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizePostingsList int
var reflectStaticSizePostingsIterator int
var reflectStaticSizePosting int
var reflectStaticSizeLocation int
func init() {
var pl PostingsList
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
var pi PostingsIterator
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
var p Posting
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
// PostingsList is an in-memory represenation of a postings list
type PostingsList struct {
dictionary *Dictionary
@ -27,6 +46,20 @@ type PostingsList struct {
except *roaring.Bitmap
}
func (p *PostingsList) Size() int {
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
if p.dictionary != nil {
sizeInBytes += p.dictionary.Size()
}
if p.except != nil {
sizeInBytes += int(p.except.GetSizeInBytes())
}
return sizeInBytes
}
// Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 {
var rv uint64
@ -83,6 +116,16 @@ type PostingsIterator struct {
reuse Posting
}
func (i *PostingsIterator) Size() int {
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr
if i.locations != nil {
sizeInBytes += int(i.locations.GetSizeInBytes())
}
return sizeInBytes
}
// Next returns the next posting on the postings list, or nil at the end
func (i *PostingsIterator) Next() (segment.Posting, error) {
if i.actual == nil || !i.actual.HasNext() {
@ -121,6 +164,16 @@ type Posting struct {
hasLoc bool
}
func (p *Posting) Size() int {
sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr
if p.iterator != nil {
sizeInBytes += p.iterator.Size()
}
return sizeInBytes
}
// Number returns the document number of this posting in this segment
func (p *Posting) Number() uint64 {
return p.docNum
@ -158,6 +211,15 @@ type Location struct {
offset int
}
func (l *Location) Size() int {
sizeInBytes := reflectStaticSizeLocation
if l.p != nil {
sizeInBytes += l.p.Size()
}
return sizeInBytes
}
// Field returns the name of the field (useful in composite fields to know
// which original field the value came from)
func (l *Location) Field() string {

View File

@ -16,11 +16,20 @@ package mem
import (
"fmt"
"reflect"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeSegment int
func init() {
var s Segment
reflectStaticSizeSegment = int(reflect.TypeOf(s).Size())
}
// _id field is always guaranteed to have fieldID of 0
const idFieldID uint16 = 0
@ -96,7 +105,7 @@ type Segment struct {
// Footprint of the segment, updated when analyzed document mutations
// are added into the segment
sizeInBytes uint64
sizeInBytes int
}
// New builds a new empty Segment
@ -107,99 +116,87 @@ func New() *Segment {
}
}
func (s *Segment) updateSizeInBytes() {
var sizeInBytes uint64
func (s *Segment) updateSize() {
sizeInBytes := reflectStaticSizeSegment
// FieldsMap, FieldsInv
for k, _ := range s.FieldsMap {
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
2 /* size of uint16 */)
sizeInBytes += (len(k)+size.SizeOfString)*2 +
size.SizeOfUint16
}
// overhead from the data structures
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
// Dicts, DictKeys
for _, entry := range s.Dicts {
for k, _ := range entry {
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
8 /* size of uint64 */)
sizeInBytes += (len(k)+size.SizeOfString)*2 +
size.SizeOfUint64
}
// overhead from the data structures
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
sizeInBytes += (size.SizeOfMap + size.SizeOfSlice)
}
sizeInBytes += (segment.SizeOfSlice * 2)
// Postings, PostingsLocs
for i := 0; i < len(s.Postings); i++ {
sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) +
(int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr)
}
sizeInBytes += (segment.SizeOfSlice * 2)
// Freqs, Norms
for i := 0; i < len(s.Freqs); i++ {
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
len(s.Norms[i])*4 /* size of float32 */) +
(segment.SizeOfSlice * 2)
sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 +
len(s.Norms[i])*size.SizeOfFloat32) +
(size.SizeOfSlice * 2)
}
sizeInBytes += (segment.SizeOfSlice * 2)
// Location data
for i := 0; i < len(s.Locfields); i++ {
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
len(s.Locstarts[i])*8 /* size of uint64 */ +
len(s.Locends[i])*8 /* size of uint64 */ +
len(s.Locpos[i])*8 /* size of uint64 */)
sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 +
len(s.Locstarts[i])*size.SizeOfUint64 +
len(s.Locends[i])*size.SizeOfUint64 +
len(s.Locpos[i])*size.SizeOfUint64
for j := 0; j < len(s.Locarraypos[i]); j++ {
sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
segment.SizeOfSlice
sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 +
size.SizeOfSlice
}
sizeInBytes += (segment.SizeOfSlice * 5)
sizeInBytes += (size.SizeOfSlice * 5)
}
sizeInBytes += (segment.SizeOfSlice * 5)
// Stored data
for i := 0; i < len(s.Stored); i++ {
for _, v := range s.Stored[i] {
sizeInBytes += uint64(2 /* size of uint16 */)
sizeInBytes += size.SizeOfUint16
for _, arr := range v {
sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
sizeInBytes += len(arr) + size.SizeOfSlice
}
sizeInBytes += segment.SizeOfSlice
sizeInBytes += size.SizeOfSlice
}
for _, v := range s.StoredTypes[i] {
sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice
}
for _, v := range s.StoredPos[i] {
sizeInBytes += uint64(2 /* size of uint16 */)
sizeInBytes += size.SizeOfUint16
for _, arr := range v {
sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
segment.SizeOfSlice
sizeInBytes += len(arr)*size.SizeOfUint64 +
size.SizeOfSlice
}
sizeInBytes += segment.SizeOfSlice
sizeInBytes += size.SizeOfSlice
}
// overhead from map(s) within Stored, StoredTypes, StoredPos
sizeInBytes += (segment.SizeOfMap * 3)
sizeInBytes += (size.SizeOfMap * 3)
}
// overhead from data structures: Stored, StoredTypes, StoredPos
sizeInBytes += (segment.SizeOfSlice * 3)
// DocValueFields
sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
segment.SizeOfMap
// SizeInBytes
sizeInBytes += uint64(8)
sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool)
s.sizeInBytes = sizeInBytes
}
func (s *Segment) SizeInBytes() uint64 {
func (s *Segment) Size() int {
return s.sizeInBytes
}

View File

@ -169,7 +169,7 @@ func TestSingle(t *testing.T) {
t.Fatalf("segment nil, not expected")
}
if segment.SizeInBytes() <= 0 {
if segment.Size() <= 0 {
t.Fatalf("segment size not updated")
}

View File

@ -19,12 +19,6 @@ import (
"github.com/blevesearch/bleve/index"
)
// Overhead from go data structures when deployed on a 64-bit system.
const SizeOfMap uint64 = 8
const SizeOfPointer uint64 = 8
const SizeOfSlice uint64 = 24
const SizeOfString uint64 = 16
// DocumentFieldValueVisitor defines a callback to be visited for each
// stored field value. The return value determines if the visitor
// should keep going. Returning true continues visiting, false stops.
@ -42,7 +36,7 @@ type Segment interface {
Close() error
SizeInBytes() uint64
Size() int
AddRef()
DecRef() error
@ -63,6 +57,8 @@ type DictionaryIterator interface {
type PostingsList interface {
Iterator() PostingsIterator
Size() int
Count() uint64
// NOTE deferred for future work
@ -77,6 +73,8 @@ type PostingsIterator interface {
// implementations may return a shared instance to reduce memory
// allocations.
Next() (Posting, error)
Size() int
}
type Posting interface {
@ -86,6 +84,8 @@ type Posting interface {
Norm() float64
Locations() []Location
Size() int
}
type Location interface {
@ -94,6 +94,7 @@ type Location interface {
End() uint64
Pos() uint64
ArrayPositions() []uint64
Size() int
}
// DocumentFieldTermVisitable is implemented by various scorch segment

View File

@ -18,10 +18,18 @@ import (
"bytes"
"encoding/binary"
"io"
"reflect"
"github.com/golang/snappy"
)
var reflectStaticSizeMetaData int
func init() {
var md MetaData
reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
}
var termSeparator byte = 0xff
var termSeparatorSplitSlice = []byte{termSeparator}

View File

@ -19,13 +19,21 @@ import (
"encoding/binary"
"fmt"
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
"github.com/golang/snappy"
)
var reflectStaticSizedocValueIterator int
func init() {
var dvi docValueIterator
reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
}
type docValueIterator struct {
field string
curChunkNum uint64
@ -36,21 +44,12 @@ type docValueIterator struct {
curChunkData []byte // compressed data cache
}
func (di *docValueIterator) sizeInBytes() uint64 {
// curChunkNum, numChunks, dvDataLoc --> uint64
sizeInBytes := 24
// field
sizeInBytes += (len(di.field) + int(segment.SizeOfString))
// chunkLens, curChunkHeader
sizeInBytes += len(di.chunkLens)*8 +
len(di.curChunkHeader)*24 +
int(segment.SizeOfSlice*2) /* overhead from slices */
// curChunkData is mmap'ed, not included
return uint64(sizeInBytes)
func (di *docValueIterator) size() int {
return reflectStaticSizedocValueIterator + size.SizeOfPtr +
len(di.field) +
len(di.chunkLens)*size.SizeOfUint64 +
len(di.curChunkHeader)*reflectStaticSizeMetaData +
len(di.curChunkData)
}
func (di *docValueIterator) fieldName() string {

View File

@ -19,12 +19,30 @@ import (
"encoding/binary"
"fmt"
"math"
"reflect"
"github.com/RoaringBitmap/roaring"
"github.com/Smerity/govarint"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizePostingsList int
var reflectStaticSizePostingsIterator int
var reflectStaticSizePosting int
var reflectStaticSizeLocation int
func init() {
var pl PostingsList
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
var pi PostingsIterator
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
var p Posting
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
// PostingsList is an in-memory represenation of a postings list
type PostingsList struct {
sb *SegmentBase
@ -36,6 +54,28 @@ type PostingsList struct {
except *roaring.Bitmap
}
func (p *PostingsList) Size() int {
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
if p.sb != nil {
sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part
}
if p.locBitmap != nil {
sizeInBytes += int(p.locBitmap.GetSizeInBytes())
}
if p.postings != nil {
sizeInBytes += int(p.postings.GetSizeInBytes())
}
if p.except != nil {
sizeInBytes += int(p.except.GetSizeInBytes())
}
return sizeInBytes
}
// Iterator returns an iterator for this postings list
func (p *PostingsList) Iterator() segment.PostingsIterator {
return p.iterator(nil)
@ -193,6 +233,25 @@ type PostingsIterator struct {
nextLocs []Location // reused across Next() calls
}
func (i *PostingsIterator) Size() int {
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
len(i.currChunkFreqNorm) +
len(i.currChunkLoc) +
len(i.freqChunkLens)*size.SizeOfUint64 +
len(i.locChunkLens)*size.SizeOfUint64 +
i.next.Size()
if i.locBitmap != nil {
sizeInBytes += int(i.locBitmap.GetSizeInBytes())
}
for _, entry := range i.nextLocs {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (i *PostingsIterator) loadChunk(chunk int) error {
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
@ -444,6 +503,20 @@ type Posting struct {
locs []segment.Location
}
func (p *Posting) Size() int {
sizeInBytes := reflectStaticSizePosting
if p.iterator != nil {
sizeInBytes += p.iterator.Size()
}
for _, entry := range p.locs {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
// Number returns the document number of this posting in this segment
func (p *Posting) Number() uint64 {
return p.docNum
@ -473,6 +546,12 @@ type Location struct {
ap []uint64
}
func (l *Location) Size() int {
return reflectStaticSizeLocation +
len(l.field) +
len(l.ap)*size.SizeOfUint64
}
// Field returns the name of the field (useful in composite fields to know
// which original field the value came from)
func (l *Location) Field() string {

View File

@ -20,16 +20,25 @@ import (
"fmt"
"io"
"os"
"reflect"
"sync"
"github.com/RoaringBitmap/roaring"
"github.com/Smerity/govarint"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
"github.com/couchbase/vellum"
mmap "github.com/edsrzf/mmap-go"
"github.com/golang/snappy"
)
var reflectStaticSizeSegmentBase int
func init() {
var sb SegmentBase
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
}
// Open returns a zap impl of a segment
func Open(path string) (segment.Segment, error) {
f, err := os.Open(path)
@ -92,6 +101,32 @@ type SegmentBase struct {
fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
}
func (sb *SegmentBase) Size() int {
sizeInBytes := reflectStaticSizeSegmentBase +
len(sb.mem)
// fieldsMap
for k, _ := range sb.fieldsMap {
sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
}
// fieldsInv, dictLocs
for _, entry := range sb.fieldsInv {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
// fieldDvIterMap
for _, v := range sb.fieldDvIterMap {
sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
if v != nil {
sizeInBytes += v.size()
}
}
return sizeInBytes
}
func (sb *SegmentBase) AddRef() {}
func (sb *SegmentBase) DecRef() (err error) { return nil }
func (sb *SegmentBase) Close() (err error) { return nil }
@ -111,56 +146,19 @@ type Segment struct {
refs int64
}
func (s *Segment) SizeInBytes() uint64 {
func (s *Segment) Size() int {
// 8 /* size of file pointer */
// 4 /* size of version -> uint32 */
// 4 /* size of crc -> uint32 */
sizeOfUints := 16
sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
// mutex, refs -> int64
sizeInBytes += 16
// do not include the mmap'ed part
return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem))
}
func (s *SegmentBase) SizeInBytes() uint64 {
// 4 /* size of memCRC -> uint32 */
// 4 /* size of chunkFactor -> uint32 */
// 8 /* size of numDocs -> uint64 */
// 8 /* size of storedIndexOffset -> uint64 */
// 8 /* size of fieldsIndexOffset -> uint64 */
// 8 /* size of docValueOffset -> uint64 */
sizeInBytes := 40
sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
// fieldsMap
for k, _ := range s.fieldsMap {
sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
}
sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
// fieldsInv, dictLocs
for _, entry := range s.fieldsInv {
sizeInBytes += (len(entry) + int(segment.SizeOfString))
}
sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */
sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
// fieldDvIterMap
sizeInBytes += len(s.fieldDvIterMap) *
int(segment.SizeOfPointer+2 /* size of uint16 */)
for _, entry := range s.fieldDvIterMap {
if entry != nil {
sizeInBytes += int(entry.sizeInBytes())
}
}
sizeInBytes += int(segment.SizeOfMap)
return uint64(sizeInBytes)
return sizeInBytes + s.SegmentBase.Size() - len(s.mem)
}
func (s *Segment) AddRef() {

View File

@ -27,6 +27,7 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
type asynchSegmentResult struct {
@ -89,6 +90,12 @@ func (i *IndexSnapshot) Close() error {
return i.DecRef()
}
func (i *IndexSnapshot) Size() int {
// Just return the size of the pointer for estimating the overhead
// during Search, a reference of the IndexSnapshot serves as the reader.
return size.SizeOfPtr
}
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)

View File

@ -16,17 +16,30 @@ package scorch
import (
"bytes"
"reflect"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeIndexSnapshotDocIDReader int
func init() {
var isdr IndexSnapshotDocIDReader
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
}
type IndexSnapshotDocIDReader struct {
snapshot *IndexSnapshot
iterators []roaring.IntIterable
segmentOffset int
}
func (i *IndexSnapshotDocIDReader) Size() int {
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
}
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
for i.segmentOffset < len(i.iterators) {
if !i.iterators[i.segmentOffset].HasNext() {

View File

@ -16,12 +16,21 @@ package scorch
import (
"bytes"
"reflect"
"sync/atomic"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeIndexSnapshotTermFieldReader int
func init() {
var istfr IndexSnapshotTermFieldReader
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
}
type IndexSnapshotTermFieldReader struct {
term []byte
field string
@ -36,6 +45,27 @@ type IndexSnapshotTermFieldReader struct {
currID index.IndexInternalID
}
func (i *IndexSnapshotTermFieldReader) Size() int {
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
len(i.term) +
len(i.field) +
len(i.currID)
for _, entry := range i.postings {
sizeInBytes += entry.Size()
}
for _, entry := range i.iterators {
sizeInBytes += entry.Size()
}
if i.currPosting != nil {
sizeInBytes += i.currPosting.Size()
}
return sizeInBytes
}
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
rv := preAlloced
if rv == nil {

View File

@ -213,7 +213,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
return nil
}
func (c *cachedDocs) sizeInBytes() uint64 {
func (c *cachedDocs) size() int {
sizeInBytes := 0
c.m.Lock()
for k, v := range c.cache { // cachedFieldDocs
@ -225,5 +225,5 @@ func (c *cachedDocs) sizeInBytes() uint64 {
}
}
c.m.Unlock()
return uint64(sizeInBytes)
return sizeInBytes
}

View File

@ -15,17 +15,31 @@
package upsidedown
import (
"reflect"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeIndexReader int
func init() {
var ir IndexReader
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
}
type IndexReader struct {
index *UpsideDownCouch
kvreader store.KVReader
docCount uint64
}
func (i *IndexReader) Size() int {
return reflectStaticSizeIndexReader + size.SizeOfPtr
}
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists {

View File

@ -16,13 +16,27 @@ package upsidedown
import (
"bytes"
"reflect"
"sort"
"sync/atomic"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeUpsideDownCouchTermFieldReader int
var reflectStaticSizeUpsideDownCouchDocIDReader int
func init() {
var tfr UpsideDownCouchTermFieldReader
reflectStaticSizeUpsideDownCouchTermFieldReader =
int(reflect.TypeOf(tfr).Size())
var cdr UpsideDownCouchDocIDReader
reflectStaticSizeUpsideDownCouchDocIDReader =
int(reflect.TypeOf(cdr).Size())
}
type UpsideDownCouchTermFieldReader struct {
count uint64
indexReader *IndexReader
@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
includeTermVectors bool
}
func (r *UpsideDownCouchTermFieldReader) Size() int {
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
len(r.term) +
r.tfrPrealloc.Size() +
len(r.keyBuf)
if r.tfrNext != nil {
sizeInBytes += r.tfrNext.Size()
}
return sizeInBytes
}
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
bufNeeded := termFrequencyRowKeySize(term, nil)
if bufNeeded < dictionaryRowKeySize(term) {
@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
onlyMode bool
}
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
func (r *UpsideDownCouchDocIDReader) Size() int {
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
r.indexReader.Size()
for _, entry := range r.only {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
startBytes := []byte{0x0}
endBytes := []byte{0xff}

View File

@ -20,10 +20,22 @@ import (
"fmt"
"io"
"math"
"reflect"
"github.com/blevesearch/bleve/size"
"github.com/golang/protobuf/proto"
)
var reflectStaticSizeTermFrequencyRow int
var reflectStaticSizeTermVector int
func init() {
var tfr TermFrequencyRow
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
var tv TermVector
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
}
const ByteSeparator byte = 0xff
type UpsideDownCouchRowStream chan UpsideDownCouchRow
@ -358,6 +370,11 @@ type TermVector struct {
end uint64
}
func (tv *TermVector) Size() int {
return reflectStaticSizeTermVector + size.SizeOfPtr +
len(tv.arrayPositions)*size.SizeOfUint64
}
func (tv *TermVector) String() string {
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
}
@ -371,6 +388,18 @@ type TermFrequencyRow struct {
field uint16
}
func (tfr *TermFrequencyRow) Size() int {
sizeInBytes := reflectStaticSizeTermFrequencyRow +
len(tfr.term) +
len(tfr.doc)
for _, entry := range tfr.vectors {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (tfr *TermFrequencyRow) Term() []byte {
return tfr.term
}

View File

@ -362,8 +362,59 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
return i.SearchInContext(context.Background(), req)
}
// memNeededForSearch is a helper function that returns an estimate of RAM
// needed to execute a search request.
func memNeededForSearch(req *SearchRequest,
searcher search.Searcher,
topnCollector *collector.TopNCollector) uint64 {
backingSize := req.Size + req.From + 1
if req.Size+req.From > collector.PreAllocSizeSkipCap {
backingSize = collector.PreAllocSizeSkipCap + 1
}
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
estimate := 0
// overhead, size in bytes from collector
estimate += topnCollector.Size()
var dm search.DocumentMatch
sizeOfDocumentMatch := dm.Size()
// pre-allocing DocumentMatchPool
var sc search.SearchContext
estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch
// searcher overhead
estimate += searcher.Size()
// overhead from results, lowestMatchOutsideResults
estimate += (numDocMatches + 1) * sizeOfDocumentMatch
// additional overhead from SearchResult
var sr SearchResult
estimate += sr.Size()
// overhead from facet results
if req.Facets != nil {
var fr search.FacetResult
estimate += len(req.Facets) * fr.Size()
}
// highlighting, store
var d document.Document
if len(req.Fields) > 0 || req.Highlight != nil {
for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits
estimate += (req.Size + req.From) * d.Size()
}
}
return uint64(estimate)
}
// SearchInContext executes a search request operation within the provided
// Context. Returns a SearchResult object or an error.
// Context. Returns a SearchResult object or an error.
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()

View File

@ -36,6 +36,9 @@ import (
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/query"
"github.com/blevesearch/bleve/index/scorch"
"github.com/blevesearch/bleve/index/upsidedown"
)
func TestCrud(t *testing.T) {
@ -1815,3 +1818,55 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) {
t.Fatal(err)
}
}
func benchmarkSearchOverhead(indexType string, b *testing.B) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
b.Fatal(err)
}
}()
index, err := NewUsing("testidx", NewIndexMapping(),
indexType, Config.DefaultKVStore, nil)
if err != nil {
b.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
b.Fatal(err)
}
}()
elements := []string{"air", "water", "fire", "earth"}
for j := 0; j < 10000; j++ {
err = index.Index(fmt.Sprintf("%d", j),
map[string]interface{}{"name": elements[j%len(elements)]})
if err != nil {
b.Fatal(err)
}
}
query1 := NewTermQuery("water")
query2 := NewTermQuery("fire")
query := NewDisjunctionQuery(query1, query2)
req := NewSearchRequest(query)
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err = index.Search(req)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
benchmarkSearchOverhead(upsidedown.Name, b)
}
func BenchmarkScorchSearchOverhead(b *testing.B) {
benchmarkSearchOverhead(scorch.Name, b)
}

View File

@ -17,6 +17,7 @@ package bleve
import (
"encoding/json"
"fmt"
"reflect"
"time"
"github.com/blevesearch/bleve/analysis"
@ -24,8 +25,19 @@ import (
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/query"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeSearchResult int
var reflectStaticSizeSearchStatus int
func init() {
var sr SearchResult
reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
var ss SearchStatus
reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
}
var cache = registry.NewCache()
const defaultDateTimeParser = optional.Name
@ -432,6 +444,24 @@ type SearchResult struct {
Facets search.FacetResults `json:"facets"`
}
func (sr *SearchResult) Size() int {
sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
reflectStaticSizeSearchStatus
for _, entry := range sr.Hits {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for k, v := range sr.Facets {
sizeInBytes += size.SizeOfString + len(k) +
v.Size()
}
return sizeInBytes
}
func (sr *SearchResult) String() string {
rv := ""
if sr.Total > 0 {

View File

@ -15,6 +15,8 @@
package collector
import (
"reflect"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
@ -25,6 +27,18 @@ type stubSearcher struct {
matches []*search.DocumentMatch
}
func (ss *stubSearcher) Size() int {
sizeInBytes := int(reflect.TypeOf(*ss).Size())
for _, entry := range ss.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if ss.index < len(ss.matches) {
rv := ctx.DocumentMatchPool.Get()
@ -76,6 +90,10 @@ func (ss *stubSearcher) DocumentMatchPoolSize() int {
type stubReader struct{}
func (sr *stubReader) Size() int {
return 0
}
func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
return nil, nil
}

View File

@ -16,12 +16,21 @@ package collector
import (
"context"
"reflect"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTopNCollector int
func init() {
var coll TopNCollector
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
}
type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
@ -98,6 +107,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
return hc
}
func (hc *TopNCollector) Size() int {
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
if hc.facetsBuilder != nil {
sizeInBytes += hc.facetsBuilder.Size()
}
for _, entry := range hc.neededFields {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
return sizeInBytes
}
// Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()

View File

@ -17,8 +17,18 @@ package search
import (
"encoding/json"
"fmt"
"reflect"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeExplanation int
func init() {
var e Explanation
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
}
type Explanation struct {
Value float64 `json:"value"`
Message string `json:"message"`
@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
}
return string(js)
}
func (expl *Explanation) Size() int {
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
len(expl.Message)
for _, entry := range expl.Children {
sizeInBytes += entry.Size()
}
return sizeInBytes
}

View File

@ -15,13 +15,25 @@
package facet
import (
"reflect"
"sort"
"time"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDateTimeFacetBuilder int
var reflectStaticSizedateTimeRange int
func init() {
var dtfb DateTimeFacetBuilder
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
var dtr dateTimeRange
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
}
type dateTimeRange struct {
start time.Time
end time.Time
@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
}
}
func (fb *DateTimeFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizedateTimeRange
}
return sizeInBytes
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,

View File

@ -15,12 +15,24 @@
package facet
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeNumericFacetBuilder int
var reflectStaticSizenumericRange int
func init() {
var nfb NumericFacetBuilder
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
var nr numericRange
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
}
type numericRange struct {
min *float64
max *float64
@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
}
}
func (fb *NumericFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizenumericRange
}
return sizeInBytes
}
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{
min: min,

View File

@ -15,11 +15,20 @@
package facet
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermsFacetBuilder int
func init() {
var tfb TermsFacetBuilder
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
}
type TermsFacetBuilder struct {
size int
field string
@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
}
}
func (fb *TermsFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
return sizeInBytes
}
func (fb *TermsFacetBuilder) Field() string {
return fb.field
}

View File

@ -15,11 +15,32 @@
package search
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeFacetsBuilder int
var reflectStaticSizeFacetResult int
var reflectStaticSizeTermFacet int
var reflectStaticSizeNumericRangeFacet int
var reflectStaticSizeDateRangeFacet int
func init() {
var fb FacetsBuilder
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
var fr FacetResult
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
var tf TermFacet
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
var nrf NumericRangeFacet
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
var drf DateRangeFacet
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
}
type FacetBuilder interface {
StartDoc()
UpdateVisitor(field string, term []byte)
@ -27,6 +48,8 @@ type FacetBuilder interface {
Result() *FacetResult
Field() string
Size() int
}
type FacetsBuilder struct {
@ -42,6 +65,22 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
}
}
func (fb *FacetsBuilder) Size() int {
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
fb.indexReader.Size()
for k, v := range fb.facets {
sizeInBytes += size.SizeOfString + len(k) +
v.Size()
}
for _, entry := range fb.fields {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder
fb.fields = append(fb.fields, facetBuilder.Field())
@ -213,6 +252,14 @@ type FacetResult struct {
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
}
func (fr *FacetResult) Size() int {
return reflectStaticSizeFacetResult + size.SizeOfPtr +
len(fr.Field) +
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
}
func (fr *FacetResult) Merge(other *FacetResult) {
fr.Total += other.Total
fr.Missing += other.Missing

View File

@ -14,6 +14,17 @@
package search
import (
"reflect"
)
var reflectStaticSizeDocumentMatchPool int
func init() {
var dmp DocumentMatchPool
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
}
// DocumentMatchPoolTooSmall is a callback function that can be executed
// when the DocumentMatchPool does not have sufficient capacity
// By default we just perform just-in-time allocation, but you could log

View File

@ -15,13 +15,27 @@
package scorer
import (
"reflect"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeConjunctionQueryScorer int
func init() {
var cqs ConjunctionQueryScorer
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
}
type ConjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *ConjunctionQueryScorer) Size() int {
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
}
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
options: options,

View File

@ -16,11 +16,20 @@ package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeConstantScorer int
func init() {
var cs ConstantScorer
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
}
type ConstantScorer struct {
constant float64
boost float64
@ -30,6 +39,16 @@ type ConstantScorer struct {
queryWeightExplanation *search.Explanation
}
func (s *ConstantScorer) Size() int {
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{
options: options,

View File

@ -16,14 +16,27 @@ package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDisjunctionQueryScorer int
func init() {
var dqs DisjunctionQueryScorer
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
}
type DisjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *DisjunctionQueryScorer) Size() int {
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
}
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
options: options,

View File

@ -17,11 +17,20 @@ package scorer
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermQueryScorer int
func init() {
var tqs TermQueryScorer
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
}
type TermQueryScorer struct {
queryTerm []byte
queryField string
@ -36,6 +45,21 @@ type TermQueryScorer struct {
queryWeightExplanation *search.Explanation
}
func (s *TermQueryScorer) Size() int {
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
len(s.queryTerm) + len(s.queryField)
if s.idfExplanation != nil {
sizeInBytes += s.idfExplanation.Size()
}
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: queryTerm,

View File

@ -16,11 +16,26 @@ package search
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDocumentMatch int
var reflectStaticSizeSearchContext int
var reflectStaticSizeLocation int
func init() {
var dm DocumentMatch
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
var sc SearchContext
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
@ -47,6 +62,11 @@ type Location struct {
ArrayPositions ArrayPositions `json:"array_positions"`
}
func (l *Location) Size() int {
return reflectStaticSizeLocation + size.SizeOfPtr +
len(l.ArrayPositions)*size.SizeOfUint64
}
type Locations []*Location
type TermLocationMap map[string]Locations
@ -117,6 +137,52 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
return dm
}
func (dm *DocumentMatch) Size() int {
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
len(dm.Index) +
len(dm.ID) +
len(dm.IndexInternalID)
if dm.Expl != nil {
sizeInBytes += dm.Expl.Size()
}
for k, v := range dm.Locations {
sizeInBytes += size.SizeOfString + len(k)
for k1, v1 := range v {
sizeInBytes += size.SizeOfString + len(k1) +
size.SizeOfSlice
for _, entry := range v1 {
sizeInBytes += entry.Size()
}
}
}
for k, v := range dm.Fragments {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfSlice
for _, entry := range v {
sizeInBytes += size.SizeOfString + len(entry)
}
}
for _, entry := range dm.Sort {
sizeInBytes += size.SizeOfString + len(entry)
}
for k, _ := range dm.Fields {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr
}
if dm.Document != nil {
sizeInBytes += dm.Document.Size()
}
return sizeInBytes
}
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
@ -135,6 +201,7 @@ type Searcher interface {
SetQueryNorm(float64)
Count() uint64
Min() int
Size() int
DocumentMatchPoolSize() int
}
@ -148,3 +215,18 @@ type SearcherOptions struct {
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
}
func (sc *SearchContext) Size() int {
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
if sc.DocumentMatchPool != nil {
for _, entry := range sc.DocumentMatchPool.avail {
if entry != nil {
sizeInBytes += entry.Size()
}
}
}
return sizeInBytes
}

View File

@ -16,12 +16,21 @@ package searcher
import (
"math"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeBooleanSearcher int
func init() {
var bs BooleanSearcher
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
}
type BooleanSearcher struct {
indexReader index.IndexReader
mustSearcher search.Searcher
@ -52,6 +61,33 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
return &rv, nil
}
func (s *BooleanSearcher) Size() int {
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
s.indexReader.Size()
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.shouldSearcher != nil {
sizeInBytes += s.shouldSearcher.Size()
}
if s.mustNotSearcher != nil {
sizeInBytes += s.mustNotSearcher.Size()
}
sizeInBytes += s.scorer.Size()
for _, entry := range s.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0

View File

@ -16,13 +16,22 @@ package searcher
import (
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeConjunctionSearcher int
func init() {
var cs ConjunctionSearcher
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
}
type ConjunctionSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
@ -54,6 +63,23 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
return &rv, nil
}
func (s *ConjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0

View File

@ -17,13 +17,22 @@ package searcher
import (
"fmt"
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDisjunctionSearcher int
func init() {
var ds DisjunctionSearcher
reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size())
}
// DisjunctionMaxClauseCount is a compile time setting that applications can
// adjust to non-zero value to cause the DisjunctionSearcher to return an
// error instead of exeucting searches when the size exceeds this value.
@ -90,6 +99,32 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
return &rv, nil
}
func (s *DisjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
s.indexReader.Size() +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
return sizeInBytes
}
func (s *DisjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0

View File

@ -15,11 +15,21 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDocIDSearcher int
func init() {
var ds DocIDSearcher
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
}
// DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct {
reader index.DocIDReader
@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
}, nil
}
func (s *DocIDSearcher) Size() int {
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *DocIDSearcher) Count() uint64 {
return uint64(s.count)
}

View File

@ -15,10 +15,20 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeFilteringSearcher int
func init() {
var fs FilteringSearcher
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
}
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
}
}
func (f *FilteringSearcher) Size() int {
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
f.child.Size()
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {

View File

@ -15,11 +15,21 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeMatchAllSearcher int
func init() {
var mas MatchAllSearcher
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
}
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
@ -46,6 +56,13 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
}, nil
}
func (s *MatchAllSearcher) Size() int {
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
s.indexReader.Size() +
s.reader.Size() +
s.scorer.Size()
}
func (s *MatchAllSearcher) Count() uint64 {
return s.count
}

View File

@ -15,10 +15,20 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeMatchNoneSearcher int
func init() {
var mns MatchNoneSearcher
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
}
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
@ -29,6 +39,11 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
}, nil
}
func (s *MatchNoneSearcher) Size() int {
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
s.indexReader.Size()
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}

View File

@ -17,11 +17,20 @@ package searcher
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizePhraseSearcher int
func init() {
var ps PhraseSearcher
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
}
type PhraseSearcher struct {
indexReader index.IndexReader
mustSearcher *ConjunctionSearcher
@ -32,6 +41,28 @@ type PhraseSearcher struct {
initialized bool
}
func (s *PhraseSearcher) Size() int {
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
s.indexReader.Size()
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.currMust != nil {
sizeInBytes += s.currMust.Size()
}
for _, entry := range s.terms {
sizeInBytes += size.SizeOfSlice
for _, entry1 := range entry {
sizeInBytes += size.SizeOfString + len(entry1)
}
}
return sizeInBytes
}
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))

View File

@ -15,11 +15,21 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermSearcher int
func init() {
var ts TermSearcher
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
}
type TermSearcher struct {
indexReader index.IndexReader
reader index.TermFieldReader
@ -63,6 +73,14 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
}, nil
}
func (s *TermSearcher) Size() int {
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
s.indexReader.Size() +
s.reader.Size() +
s.tfd.Size() +
s.scorer.Size()
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}

57
size/sizes.go Normal file
View File

@ -0,0 +1,57 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package size
import (
"reflect"
)
func init() {
var a bool
SizeOfBool = int(reflect.TypeOf(a).Size())
var b float32
SizeOfFloat32 = int(reflect.TypeOf(b).Size())
var c float64
SizeOfFloat64 = int(reflect.TypeOf(c).Size())
var d map[int]int
SizeOfMap = int(reflect.TypeOf(d).Size())
var e *int
SizeOfPtr = int(reflect.TypeOf(e).Size())
var f []int
SizeOfSlice = int(reflect.TypeOf(f).Size())
var g string
SizeOfString = int(reflect.TypeOf(g).Size())
var h uint8
SizeOfUint8 = int(reflect.TypeOf(h).Size())
var i uint16
SizeOfUint16 = int(reflect.TypeOf(i).Size())
var j uint32
SizeOfUint32 = int(reflect.TypeOf(j).Size())
var k uint64
SizeOfUint64 = int(reflect.TypeOf(k).Size())
}
var SizeOfBool int
var SizeOfFloat32 int
var SizeOfFloat64 int
var SizeOfInt int
var SizeOfMap int
var SizeOfPtr int
var SizeOfSlice int
var SizeOfString int
var SizeOfUint8 int
var SizeOfUint16 int
var SizeOfUint32 int
var SizeOfUint64 int