MB-28162: Provide API to estimate memory needed to run a search query
This API (unexported) will estimate the amount of memory needed to execute a search query over an index before the collector begins data collection. Sample estimates for certain queries: {Size: 10, BenchmarkUpsidedownSearchOverhead} ESTIMATE BENCHMEM TermQuery 4616 4796 MatchQuery 5210 5405 DisjunctionQuery (Match queries) 7700 8447 DisjunctionQuery (Term queries) 6514 6591 ConjunctionQuery (Match queries) 7524 8175 Nested disjunction query (disjunction of disjunctions) 10306 10708 …
This commit is contained in:
parent
2b005f1e23
commit
7e36109b3c
|
@ -14,7 +14,19 @@
|
|||
|
||||
package document
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocument int
|
||||
|
||||
func init() {
|
||||
var d Document
|
||||
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
|
||||
}
|
||||
|
||||
type Document struct {
|
||||
ID string `json:"id"`
|
||||
|
@ -30,6 +42,13 @@ func NewDocument(id string) *Document {
|
|||
}
|
||||
}
|
||||
|
||||
func (d *Document) Size() int {
|
||||
return reflectStaticSizeDocument + size.SizeOfPtr +
|
||||
len(d.ID) +
|
||||
len(d.Fields)*size.SizeOfPtr +
|
||||
len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField)
|
||||
}
|
||||
|
||||
func (d *Document) AddField(f Field) *Document {
|
||||
switch f := f.(type) {
|
||||
case *CompositeField:
|
||||
|
|
|
@ -15,9 +15,18 @@
|
|||
package document
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
var reflectStaticSizeCompositeField int
|
||||
|
||||
func init() {
|
||||
var cf CompositeField
|
||||
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
|
||||
}
|
||||
|
||||
const DefaultCompositeIndexingOptions = IndexField
|
||||
|
||||
type CompositeField struct {
|
||||
|
|
|
@ -18,11 +18,23 @@ import (
|
|||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermFieldDoc int
|
||||
var reflectStaticSizeTermFieldVector int
|
||||
|
||||
func init() {
|
||||
var tfd TermFieldDoc
|
||||
reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
|
||||
var tfv TermFieldVector
|
||||
reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
|
||||
}
|
||||
|
||||
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
|
||||
|
||||
type Index interface {
|
||||
|
@ -82,6 +94,8 @@ type IndexReader interface {
|
|||
DumpFields() chan interface{}
|
||||
|
||||
Close() error
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
|
@ -115,6 +129,11 @@ type TermFieldVector struct {
|
|||
End uint64
|
||||
}
|
||||
|
||||
func (tfv *TermFieldVector) Size() int {
|
||||
return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
|
||||
len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
// IndexInternalID is an opaque document identifier interal to the index impl
|
||||
type IndexInternalID []byte
|
||||
|
||||
|
@ -134,6 +153,17 @@ type TermFieldDoc struct {
|
|||
Vectors []*TermFieldVector
|
||||
}
|
||||
|
||||
func (tfd *TermFieldDoc) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
|
||||
len(tfd.Term) + len(tfd.ID)
|
||||
|
||||
for _, entry := range tfd.Vectors {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Reset allows an already allocated TermFieldDoc to be reused
|
||||
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
|
||||
// remember the []byte used for the ID
|
||||
|
@ -161,6 +191,8 @@ type TermFieldReader interface {
|
|||
// Count returns the number of documents contains the term in this field.
|
||||
Count() uint64
|
||||
Close() error
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type DictEntry struct {
|
||||
|
@ -185,6 +217,9 @@ type DocIDReader interface {
|
|||
// will start there instead. If ID is greater than or equal to the end of
|
||||
// the range, Next() call will return io.EOF.
|
||||
Advance(ID IndexInternalID) (IndexInternalID, error)
|
||||
|
||||
Size() int
|
||||
|
||||
Close() error
|
||||
}
|
||||
|
||||
|
|
|
@ -472,20 +472,20 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
|
|||
}
|
||||
|
||||
func (s *Scorch) MemoryUsed() uint64 {
|
||||
var memUsed uint64
|
||||
var memUsed int
|
||||
s.rootLock.RLock()
|
||||
if s.root != nil {
|
||||
for _, segmentSnapshot := range s.root.segment {
|
||||
memUsed += 8 /* size of id -> uint64 */ +
|
||||
segmentSnapshot.segment.SizeInBytes()
|
||||
segmentSnapshot.segment.Size()
|
||||
if segmentSnapshot.deleted != nil {
|
||||
memUsed += segmentSnapshot.deleted.GetSizeInBytes()
|
||||
memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
|
||||
}
|
||||
memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
|
||||
memUsed += segmentSnapshot.cachedDocs.size()
|
||||
}
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
return memUsed
|
||||
return uint64(memUsed)
|
||||
}
|
||||
|
||||
func (s *Scorch) markIneligibleForRemoval(filename string) {
|
||||
|
|
|
@ -46,6 +46,10 @@ func (e *EmptySegment) Close() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (e *EmptySegment) Size() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *EmptySegment) AddRef() {
|
||||
}
|
||||
|
||||
|
@ -84,6 +88,10 @@ func (e *EmptyPostingsList) Iterator() PostingsIterator {
|
|||
return &EmptyPostingsIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsList) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsList) Count() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
@ -93,3 +101,7 @@ type EmptyPostingsIterator struct{}
|
|||
func (e *EmptyPostingsIterator) Next() (Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
|
|||
}
|
||||
|
||||
// compute memory usage of segment
|
||||
s.updateSizeInBytes()
|
||||
s.updateSize()
|
||||
|
||||
// professional debugging
|
||||
//
|
||||
|
|
|
@ -15,14 +15,23 @@
|
|||
package mem
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDictionary int
|
||||
|
||||
func init() {
|
||||
var d Dictionary
|
||||
reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size())
|
||||
}
|
||||
|
||||
// Dictionary is the in-memory representation of the term dictionary
|
||||
type Dictionary struct {
|
||||
segment *Segment
|
||||
|
@ -30,6 +39,17 @@ type Dictionary struct {
|
|||
fieldID uint16
|
||||
}
|
||||
|
||||
func (d *Dictionary) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr +
|
||||
len(d.field)
|
||||
|
||||
if d.segment != nil {
|
||||
sizeInBytes += int(d.segment.Size())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// PostingsList returns the postings list for the specified term
|
||||
func (d *Dictionary) PostingsList(term string,
|
||||
except *roaring.Bitmap) (segment.PostingsList, error) {
|
||||
|
|
|
@ -15,10 +15,29 @@
|
|||
package mem
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizePostingsList int
|
||||
var reflectStaticSizePostingsIterator int
|
||||
var reflectStaticSizePosting int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
func init() {
|
||||
var pl PostingsList
|
||||
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
|
||||
var pi PostingsIterator
|
||||
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
|
||||
var p Posting
|
||||
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
|
||||
var l Location
|
||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||
}
|
||||
|
||||
// PostingsList is an in-memory represenation of a postings list
|
||||
type PostingsList struct {
|
||||
dictionary *Dictionary
|
||||
|
@ -27,6 +46,20 @@ type PostingsList struct {
|
|||
except *roaring.Bitmap
|
||||
}
|
||||
|
||||
func (p *PostingsList) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
|
||||
|
||||
if p.dictionary != nil {
|
||||
sizeInBytes += p.dictionary.Size()
|
||||
}
|
||||
|
||||
if p.except != nil {
|
||||
sizeInBytes += int(p.except.GetSizeInBytes())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Count returns the number of items on this postings list
|
||||
func (p *PostingsList) Count() uint64 {
|
||||
var rv uint64
|
||||
|
@ -83,6 +116,16 @@ type PostingsIterator struct {
|
|||
reuse Posting
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr
|
||||
|
||||
if i.locations != nil {
|
||||
sizeInBytes += int(i.locations.GetSizeInBytes())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Next returns the next posting on the postings list, or nil at the end
|
||||
func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||
if i.actual == nil || !i.actual.HasNext() {
|
||||
|
@ -121,6 +164,16 @@ type Posting struct {
|
|||
hasLoc bool
|
||||
}
|
||||
|
||||
func (p *Posting) Size() int {
|
||||
sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr
|
||||
|
||||
if p.iterator != nil {
|
||||
sizeInBytes += p.iterator.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Number returns the document number of this posting in this segment
|
||||
func (p *Posting) Number() uint64 {
|
||||
return p.docNum
|
||||
|
@ -158,6 +211,15 @@ type Location struct {
|
|||
offset int
|
||||
}
|
||||
|
||||
func (l *Location) Size() int {
|
||||
sizeInBytes := reflectStaticSizeLocation
|
||||
if l.p != nil {
|
||||
sizeInBytes += l.p.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Field returns the name of the field (useful in composite fields to know
|
||||
// which original field the value came from)
|
||||
func (l *Location) Field() string {
|
||||
|
|
|
@ -16,11 +16,20 @@ package mem
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSegment int
|
||||
|
||||
func init() {
|
||||
var s Segment
|
||||
reflectStaticSizeSegment = int(reflect.TypeOf(s).Size())
|
||||
}
|
||||
|
||||
// _id field is always guaranteed to have fieldID of 0
|
||||
const idFieldID uint16 = 0
|
||||
|
||||
|
@ -96,7 +105,7 @@ type Segment struct {
|
|||
|
||||
// Footprint of the segment, updated when analyzed document mutations
|
||||
// are added into the segment
|
||||
sizeInBytes uint64
|
||||
sizeInBytes int
|
||||
}
|
||||
|
||||
// New builds a new empty Segment
|
||||
|
@ -107,99 +116,87 @@ func New() *Segment {
|
|||
}
|
||||
}
|
||||
|
||||
func (s *Segment) updateSizeInBytes() {
|
||||
var sizeInBytes uint64
|
||||
func (s *Segment) updateSize() {
|
||||
sizeInBytes := reflectStaticSizeSegment
|
||||
|
||||
// FieldsMap, FieldsInv
|
||||
for k, _ := range s.FieldsMap {
|
||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
||||
2 /* size of uint16 */)
|
||||
sizeInBytes += (len(k)+size.SizeOfString)*2 +
|
||||
size.SizeOfUint16
|
||||
}
|
||||
// overhead from the data structures
|
||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
||||
|
||||
// Dicts, DictKeys
|
||||
for _, entry := range s.Dicts {
|
||||
for k, _ := range entry {
|
||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
||||
8 /* size of uint64 */)
|
||||
sizeInBytes += (len(k)+size.SizeOfString)*2 +
|
||||
size.SizeOfUint64
|
||||
}
|
||||
// overhead from the data structures
|
||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
||||
sizeInBytes += (size.SizeOfMap + size.SizeOfSlice)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Postings, PostingsLocs
|
||||
for i := 0; i < len(s.Postings); i++ {
|
||||
sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
|
||||
(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
|
||||
sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) +
|
||||
(int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Freqs, Norms
|
||||
for i := 0; i < len(s.Freqs); i++ {
|
||||
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
|
||||
len(s.Norms[i])*4 /* size of float32 */) +
|
||||
(segment.SizeOfSlice * 2)
|
||||
sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 +
|
||||
len(s.Norms[i])*size.SizeOfFloat32) +
|
||||
(size.SizeOfSlice * 2)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Location data
|
||||
for i := 0; i < len(s.Locfields); i++ {
|
||||
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
|
||||
len(s.Locstarts[i])*8 /* size of uint64 */ +
|
||||
len(s.Locends[i])*8 /* size of uint64 */ +
|
||||
len(s.Locpos[i])*8 /* size of uint64 */)
|
||||
sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 +
|
||||
len(s.Locstarts[i])*size.SizeOfUint64 +
|
||||
len(s.Locends[i])*size.SizeOfUint64 +
|
||||
len(s.Locpos[i])*size.SizeOfUint64
|
||||
|
||||
for j := 0; j < len(s.Locarraypos[i]); j++ {
|
||||
sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
|
||||
segment.SizeOfSlice
|
||||
sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 +
|
||||
size.SizeOfSlice
|
||||
}
|
||||
|
||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
||||
sizeInBytes += (size.SizeOfSlice * 5)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
||||
|
||||
// Stored data
|
||||
for i := 0; i < len(s.Stored); i++ {
|
||||
for _, v := range s.Stored[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
||||
sizeInBytes += size.SizeOfUint16
|
||||
for _, arr := range v {
|
||||
sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
|
||||
sizeInBytes += len(arr) + size.SizeOfSlice
|
||||
}
|
||||
sizeInBytes += segment.SizeOfSlice
|
||||
sizeInBytes += size.SizeOfSlice
|
||||
}
|
||||
|
||||
for _, v := range s.StoredTypes[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
|
||||
sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice
|
||||
}
|
||||
|
||||
for _, v := range s.StoredPos[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
||||
sizeInBytes += size.SizeOfUint16
|
||||
for _, arr := range v {
|
||||
sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
|
||||
segment.SizeOfSlice
|
||||
sizeInBytes += len(arr)*size.SizeOfUint64 +
|
||||
size.SizeOfSlice
|
||||
}
|
||||
sizeInBytes += segment.SizeOfSlice
|
||||
sizeInBytes += size.SizeOfSlice
|
||||
}
|
||||
|
||||
// overhead from map(s) within Stored, StoredTypes, StoredPos
|
||||
sizeInBytes += (segment.SizeOfMap * 3)
|
||||
sizeInBytes += (size.SizeOfMap * 3)
|
||||
}
|
||||
// overhead from data structures: Stored, StoredTypes, StoredPos
|
||||
sizeInBytes += (segment.SizeOfSlice * 3)
|
||||
|
||||
// DocValueFields
|
||||
sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
|
||||
segment.SizeOfMap
|
||||
|
||||
// SizeInBytes
|
||||
sizeInBytes += uint64(8)
|
||||
sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool)
|
||||
|
||||
s.sizeInBytes = sizeInBytes
|
||||
}
|
||||
|
||||
func (s *Segment) SizeInBytes() uint64 {
|
||||
func (s *Segment) Size() int {
|
||||
return s.sizeInBytes
|
||||
}
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ func TestSingle(t *testing.T) {
|
|||
t.Fatalf("segment nil, not expected")
|
||||
}
|
||||
|
||||
if segment.SizeInBytes() <= 0 {
|
||||
if segment.Size() <= 0 {
|
||||
t.Fatalf("segment size not updated")
|
||||
}
|
||||
|
||||
|
|
|
@ -19,12 +19,6 @@ import (
|
|||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
// Overhead from go data structures when deployed on a 64-bit system.
|
||||
const SizeOfMap uint64 = 8
|
||||
const SizeOfPointer uint64 = 8
|
||||
const SizeOfSlice uint64 = 24
|
||||
const SizeOfString uint64 = 16
|
||||
|
||||
// DocumentFieldValueVisitor defines a callback to be visited for each
|
||||
// stored field value. The return value determines if the visitor
|
||||
// should keep going. Returning true continues visiting, false stops.
|
||||
|
@ -42,7 +36,7 @@ type Segment interface {
|
|||
|
||||
Close() error
|
||||
|
||||
SizeInBytes() uint64
|
||||
Size() int
|
||||
|
||||
AddRef()
|
||||
DecRef() error
|
||||
|
@ -63,6 +57,8 @@ type DictionaryIterator interface {
|
|||
type PostingsList interface {
|
||||
Iterator() PostingsIterator
|
||||
|
||||
Size() int
|
||||
|
||||
Count() uint64
|
||||
|
||||
// NOTE deferred for future work
|
||||
|
@ -77,6 +73,8 @@ type PostingsIterator interface {
|
|||
// implementations may return a shared instance to reduce memory
|
||||
// allocations.
|
||||
Next() (Posting, error)
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type Posting interface {
|
||||
|
@ -86,6 +84,8 @@ type Posting interface {
|
|||
Norm() float64
|
||||
|
||||
Locations() []Location
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type Location interface {
|
||||
|
@ -94,6 +94,7 @@ type Location interface {
|
|||
End() uint64
|
||||
Pos() uint64
|
||||
ArrayPositions() []uint64
|
||||
Size() int
|
||||
}
|
||||
|
||||
// DocumentFieldTermVisitable is implemented by various scorch segment
|
||||
|
|
|
@ -18,10 +18,18 @@ import (
|
|||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"reflect"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizeMetaData int
|
||||
|
||||
func init() {
|
||||
var md MetaData
|
||||
reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
|
||||
}
|
||||
|
||||
var termSeparator byte = 0xff
|
||||
var termSeparatorSplitSlice = []byte{termSeparator}
|
||||
|
||||
|
|
|
@ -19,13 +19,21 @@ import (
|
|||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizedocValueIterator int
|
||||
|
||||
func init() {
|
||||
var dvi docValueIterator
|
||||
reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
|
||||
}
|
||||
|
||||
type docValueIterator struct {
|
||||
field string
|
||||
curChunkNum uint64
|
||||
|
@ -36,21 +44,12 @@ type docValueIterator struct {
|
|||
curChunkData []byte // compressed data cache
|
||||
}
|
||||
|
||||
func (di *docValueIterator) sizeInBytes() uint64 {
|
||||
// curChunkNum, numChunks, dvDataLoc --> uint64
|
||||
sizeInBytes := 24
|
||||
|
||||
// field
|
||||
sizeInBytes += (len(di.field) + int(segment.SizeOfString))
|
||||
|
||||
// chunkLens, curChunkHeader
|
||||
sizeInBytes += len(di.chunkLens)*8 +
|
||||
len(di.curChunkHeader)*24 +
|
||||
int(segment.SizeOfSlice*2) /* overhead from slices */
|
||||
|
||||
// curChunkData is mmap'ed, not included
|
||||
|
||||
return uint64(sizeInBytes)
|
||||
func (di *docValueIterator) size() int {
|
||||
return reflectStaticSizedocValueIterator + size.SizeOfPtr +
|
||||
len(di.field) +
|
||||
len(di.chunkLens)*size.SizeOfUint64 +
|
||||
len(di.curChunkHeader)*reflectStaticSizeMetaData +
|
||||
len(di.curChunkData)
|
||||
}
|
||||
|
||||
func (di *docValueIterator) fieldName() string {
|
||||
|
|
|
@ -19,12 +19,30 @@ import (
|
|||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/Smerity/govarint"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizePostingsList int
|
||||
var reflectStaticSizePostingsIterator int
|
||||
var reflectStaticSizePosting int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
func init() {
|
||||
var pl PostingsList
|
||||
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
|
||||
var pi PostingsIterator
|
||||
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
|
||||
var p Posting
|
||||
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
|
||||
var l Location
|
||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||
}
|
||||
|
||||
// PostingsList is an in-memory represenation of a postings list
|
||||
type PostingsList struct {
|
||||
sb *SegmentBase
|
||||
|
@ -36,6 +54,28 @@ type PostingsList struct {
|
|||
except *roaring.Bitmap
|
||||
}
|
||||
|
||||
func (p *PostingsList) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
|
||||
|
||||
if p.sb != nil {
|
||||
sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part
|
||||
}
|
||||
|
||||
if p.locBitmap != nil {
|
||||
sizeInBytes += int(p.locBitmap.GetSizeInBytes())
|
||||
}
|
||||
|
||||
if p.postings != nil {
|
||||
sizeInBytes += int(p.postings.GetSizeInBytes())
|
||||
}
|
||||
|
||||
if p.except != nil {
|
||||
sizeInBytes += int(p.except.GetSizeInBytes())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this postings list
|
||||
func (p *PostingsList) Iterator() segment.PostingsIterator {
|
||||
return p.iterator(nil)
|
||||
|
@ -193,6 +233,25 @@ type PostingsIterator struct {
|
|||
nextLocs []Location // reused across Next() calls
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
|
||||
len(i.currChunkFreqNorm) +
|
||||
len(i.currChunkLoc) +
|
||||
len(i.freqChunkLens)*size.SizeOfUint64 +
|
||||
len(i.locChunkLens)*size.SizeOfUint64 +
|
||||
i.next.Size()
|
||||
|
||||
if i.locBitmap != nil {
|
||||
sizeInBytes += int(i.locBitmap.GetSizeInBytes())
|
||||
}
|
||||
|
||||
for _, entry := range i.nextLocs {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
|
||||
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
|
||||
|
@ -444,6 +503,20 @@ type Posting struct {
|
|||
locs []segment.Location
|
||||
}
|
||||
|
||||
func (p *Posting) Size() int {
|
||||
sizeInBytes := reflectStaticSizePosting
|
||||
|
||||
if p.iterator != nil {
|
||||
sizeInBytes += p.iterator.Size()
|
||||
}
|
||||
|
||||
for _, entry := range p.locs {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Number returns the document number of this posting in this segment
|
||||
func (p *Posting) Number() uint64 {
|
||||
return p.docNum
|
||||
|
@ -473,6 +546,12 @@ type Location struct {
|
|||
ap []uint64
|
||||
}
|
||||
|
||||
func (l *Location) Size() int {
|
||||
return reflectStaticSizeLocation +
|
||||
len(l.field) +
|
||||
len(l.ap)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
// Field returns the name of the field (useful in composite fields to know
|
||||
// which original field the value came from)
|
||||
func (l *Location) Field() string {
|
||||
|
|
|
@ -20,16 +20,25 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/Smerity/govarint"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/couchbase/vellum"
|
||||
mmap "github.com/edsrzf/mmap-go"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSegmentBase int
|
||||
|
||||
func init() {
|
||||
var sb SegmentBase
|
||||
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
|
||||
}
|
||||
|
||||
// Open returns a zap impl of a segment
|
||||
func Open(path string) (segment.Segment, error) {
|
||||
f, err := os.Open(path)
|
||||
|
@ -92,6 +101,32 @@ type SegmentBase struct {
|
|||
fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) Size() int {
|
||||
sizeInBytes := reflectStaticSizeSegmentBase +
|
||||
len(sb.mem)
|
||||
|
||||
// fieldsMap
|
||||
for k, _ := range sb.fieldsMap {
|
||||
sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
|
||||
}
|
||||
|
||||
// fieldsInv, dictLocs
|
||||
for _, entry := range sb.fieldsInv {
|
||||
sizeInBytes += len(entry) + size.SizeOfString
|
||||
}
|
||||
sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
|
||||
|
||||
// fieldDvIterMap
|
||||
for _, v := range sb.fieldDvIterMap {
|
||||
sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
|
||||
if v != nil {
|
||||
sizeInBytes += v.size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) AddRef() {}
|
||||
func (sb *SegmentBase) DecRef() (err error) { return nil }
|
||||
func (sb *SegmentBase) Close() (err error) { return nil }
|
||||
|
@ -111,56 +146,19 @@ type Segment struct {
|
|||
refs int64
|
||||
}
|
||||
|
||||
func (s *Segment) SizeInBytes() uint64 {
|
||||
func (s *Segment) Size() int {
|
||||
// 8 /* size of file pointer */
|
||||
// 4 /* size of version -> uint32 */
|
||||
// 4 /* size of crc -> uint32 */
|
||||
sizeOfUints := 16
|
||||
|
||||
sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
|
||||
sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
|
||||
|
||||
// mutex, refs -> int64
|
||||
sizeInBytes += 16
|
||||
|
||||
// do not include the mmap'ed part
|
||||
return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem))
|
||||
}
|
||||
|
||||
func (s *SegmentBase) SizeInBytes() uint64 {
|
||||
// 4 /* size of memCRC -> uint32 */
|
||||
// 4 /* size of chunkFactor -> uint32 */
|
||||
// 8 /* size of numDocs -> uint64 */
|
||||
// 8 /* size of storedIndexOffset -> uint64 */
|
||||
// 8 /* size of fieldsIndexOffset -> uint64 */
|
||||
// 8 /* size of docValueOffset -> uint64 */
|
||||
sizeInBytes := 40
|
||||
|
||||
sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
|
||||
|
||||
// fieldsMap
|
||||
for k, _ := range s.fieldsMap {
|
||||
sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
|
||||
}
|
||||
sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
|
||||
|
||||
// fieldsInv, dictLocs
|
||||
for _, entry := range s.fieldsInv {
|
||||
sizeInBytes += (len(entry) + int(segment.SizeOfString))
|
||||
}
|
||||
sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */
|
||||
sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
|
||||
|
||||
// fieldDvIterMap
|
||||
sizeInBytes += len(s.fieldDvIterMap) *
|
||||
int(segment.SizeOfPointer+2 /* size of uint16 */)
|
||||
for _, entry := range s.fieldDvIterMap {
|
||||
if entry != nil {
|
||||
sizeInBytes += int(entry.sizeInBytes())
|
||||
}
|
||||
}
|
||||
sizeInBytes += int(segment.SizeOfMap)
|
||||
|
||||
return uint64(sizeInBytes)
|
||||
return sizeInBytes + s.SegmentBase.Size() - len(s.mem)
|
||||
}
|
||||
|
||||
func (s *Segment) AddRef() {
|
||||
|
|
|
@ -27,6 +27,7 @@ import (
|
|||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
|
@ -89,6 +90,12 @@ func (i *IndexSnapshot) Close() error {
|
|||
return i.DecRef()
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Size() int {
|
||||
// Just return the size of the pointer for estimating the overhead
|
||||
// during Search, a reference of the IndexSnapshot serves as the reader.
|
||||
return size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
||||
|
||||
results := make(chan *asynchSegmentResult)
|
||||
|
|
|
@ -16,17 +16,30 @@ package scorch
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotDocIDReader int
|
||||
|
||||
func init() {
|
||||
var isdr IndexSnapshotDocIDReader
|
||||
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotDocIDReader struct {
|
||||
snapshot *IndexSnapshot
|
||||
iterators []roaring.IntIterable
|
||||
segmentOffset int
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Size() int {
|
||||
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
if !i.iterators[i.segmentOffset].HasNext() {
|
||||
|
|
|
@ -16,12 +16,21 @@ package scorch
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotTermFieldReader int
|
||||
|
||||
func init() {
|
||||
var istfr IndexSnapshotTermFieldReader
|
||||
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotTermFieldReader struct {
|
||||
term []byte
|
||||
field string
|
||||
|
@ -36,6 +45,27 @@ type IndexSnapshotTermFieldReader struct {
|
|||
currID index.IndexInternalID
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
|
||||
len(i.term) +
|
||||
len(i.field) +
|
||||
len(i.currID)
|
||||
|
||||
for _, entry := range i.postings {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range i.iterators {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
if i.currPosting != nil {
|
||||
sizeInBytes += i.currPosting.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
|
|
|
@ -213,7 +213,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *cachedDocs) sizeInBytes() uint64 {
|
||||
func (c *cachedDocs) size() int {
|
||||
sizeInBytes := 0
|
||||
c.m.Lock()
|
||||
for k, v := range c.cache { // cachedFieldDocs
|
||||
|
@ -225,5 +225,5 @@ func (c *cachedDocs) sizeInBytes() uint64 {
|
|||
}
|
||||
}
|
||||
c.m.Unlock()
|
||||
return uint64(sizeInBytes)
|
||||
return sizeInBytes
|
||||
}
|
||||
|
|
|
@ -15,17 +15,31 @@
|
|||
package upsidedown
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexReader int
|
||||
|
||||
func init() {
|
||||
var ir IndexReader
|
||||
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
|
||||
}
|
||||
|
||||
type IndexReader struct {
|
||||
index *UpsideDownCouch
|
||||
kvreader store.KVReader
|
||||
docCount uint64
|
||||
}
|
||||
|
||||
func (i *IndexReader) Size() int {
|
||||
return reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||
if fieldExists {
|
||||
|
|
|
@ -16,13 +16,27 @@ package upsidedown
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUpsideDownCouchTermFieldReader int
|
||||
var reflectStaticSizeUpsideDownCouchDocIDReader int
|
||||
|
||||
func init() {
|
||||
var tfr UpsideDownCouchTermFieldReader
|
||||
reflectStaticSizeUpsideDownCouchTermFieldReader =
|
||||
int(reflect.TypeOf(tfr).Size())
|
||||
var cdr UpsideDownCouchDocIDReader
|
||||
reflectStaticSizeUpsideDownCouchDocIDReader =
|
||||
int(reflect.TypeOf(cdr).Size())
|
||||
}
|
||||
|
||||
type UpsideDownCouchTermFieldReader struct {
|
||||
count uint64
|
||||
indexReader *IndexReader
|
||||
|
@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
|
|||
includeTermVectors bool
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
|
||||
len(r.term) +
|
||||
r.tfrPrealloc.Size() +
|
||||
len(r.keyBuf)
|
||||
|
||||
if r.tfrNext != nil {
|
||||
sizeInBytes += r.tfrNext.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||
if bufNeeded < dictionaryRowKeySize(term) {
|
||||
|
@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
|
|||
onlyMode bool
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
func (r *UpsideDownCouchDocIDReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
|
||||
r.indexReader.Size()
|
||||
|
||||
for _, entry := range r.only {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
startBytes := []byte{0x0}
|
||||
endBytes := []byte{0xff}
|
||||
|
||||
|
|
|
@ -20,10 +20,22 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/golang/protobuf/proto"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermFrequencyRow int
|
||||
var reflectStaticSizeTermVector int
|
||||
|
||||
func init() {
|
||||
var tfr TermFrequencyRow
|
||||
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
|
||||
var tv TermVector
|
||||
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
|
||||
}
|
||||
|
||||
const ByteSeparator byte = 0xff
|
||||
|
||||
type UpsideDownCouchRowStream chan UpsideDownCouchRow
|
||||
|
@ -358,6 +370,11 @@ type TermVector struct {
|
|||
end uint64
|
||||
}
|
||||
|
||||
func (tv *TermVector) Size() int {
|
||||
return reflectStaticSizeTermVector + size.SizeOfPtr +
|
||||
len(tv.arrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
func (tv *TermVector) String() string {
|
||||
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
|
||||
}
|
||||
|
@ -371,6 +388,18 @@ type TermFrequencyRow struct {
|
|||
field uint16
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermFrequencyRow +
|
||||
len(tfr.term) +
|
||||
len(tfr.doc)
|
||||
|
||||
for _, entry := range tfr.vectors {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Term() []byte {
|
||||
return tfr.term
|
||||
}
|
||||
|
|
|
@ -362,8 +362,59 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
|
|||
return i.SearchInContext(context.Background(), req)
|
||||
}
|
||||
|
||||
// memNeededForSearch is a helper function that returns an estimate of RAM
|
||||
// needed to execute a search request.
|
||||
func memNeededForSearch(req *SearchRequest,
|
||||
searcher search.Searcher,
|
||||
topnCollector *collector.TopNCollector) uint64 {
|
||||
|
||||
backingSize := req.Size + req.From + 1
|
||||
if req.Size+req.From > collector.PreAllocSizeSkipCap {
|
||||
backingSize = collector.PreAllocSizeSkipCap + 1
|
||||
}
|
||||
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
|
||||
|
||||
estimate := 0
|
||||
|
||||
// overhead, size in bytes from collector
|
||||
estimate += topnCollector.Size()
|
||||
|
||||
var dm search.DocumentMatch
|
||||
sizeOfDocumentMatch := dm.Size()
|
||||
|
||||
// pre-allocing DocumentMatchPool
|
||||
var sc search.SearchContext
|
||||
estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch
|
||||
|
||||
// searcher overhead
|
||||
estimate += searcher.Size()
|
||||
|
||||
// overhead from results, lowestMatchOutsideResults
|
||||
estimate += (numDocMatches + 1) * sizeOfDocumentMatch
|
||||
|
||||
// additional overhead from SearchResult
|
||||
var sr SearchResult
|
||||
estimate += sr.Size()
|
||||
|
||||
// overhead from facet results
|
||||
if req.Facets != nil {
|
||||
var fr search.FacetResult
|
||||
estimate += len(req.Facets) * fr.Size()
|
||||
}
|
||||
|
||||
// highlighting, store
|
||||
var d document.Document
|
||||
if len(req.Fields) > 0 || req.Highlight != nil {
|
||||
for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits
|
||||
estimate += (req.Size + req.From) * d.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return uint64(estimate)
|
||||
}
|
||||
|
||||
// SearchInContext executes a search request operation within the provided
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
|
|
@ -36,6 +36,9 @@ import (
|
|||
"github.com/blevesearch/bleve/mapping"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/query"
|
||||
|
||||
"github.com/blevesearch/bleve/index/scorch"
|
||||
"github.com/blevesearch/bleve/index/upsidedown"
|
||||
)
|
||||
|
||||
func TestCrud(t *testing.T) {
|
||||
|
@ -1815,3 +1818,55 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSearchOverhead(indexType string, b *testing.B) {
|
||||
defer func() {
|
||||
err := os.RemoveAll("testidx")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
index, err := NewUsing("testidx", NewIndexMapping(),
|
||||
indexType, Config.DefaultKVStore, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := index.Close()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
elements := []string{"air", "water", "fire", "earth"}
|
||||
for j := 0; j < 10000; j++ {
|
||||
err = index.Index(fmt.Sprintf("%d", j),
|
||||
map[string]interface{}{"name": elements[j%len(elements)]})
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
query1 := NewTermQuery("water")
|
||||
query2 := NewTermQuery("fire")
|
||||
query := NewDisjunctionQuery(query1, query2)
|
||||
req := NewSearchRequest(query)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err = index.Search(req)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
|
||||
benchmarkSearchOverhead(upsidedown.Name, b)
|
||||
}
|
||||
|
||||
func BenchmarkScorchSearchOverhead(b *testing.B) {
|
||||
benchmarkSearchOverhead(scorch.Name, b)
|
||||
}
|
||||
|
|
30
search.go
30
search.go
|
@ -17,6 +17,7 @@ package bleve
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
|
@ -24,8 +25,19 @@ import (
|
|||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/query"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSearchResult int
|
||||
var reflectStaticSizeSearchStatus int
|
||||
|
||||
func init() {
|
||||
var sr SearchResult
|
||||
reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
|
||||
var ss SearchStatus
|
||||
reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
|
||||
}
|
||||
|
||||
var cache = registry.NewCache()
|
||||
|
||||
const defaultDateTimeParser = optional.Name
|
||||
|
@ -432,6 +444,24 @@ type SearchResult struct {
|
|||
Facets search.FacetResults `json:"facets"`
|
||||
}
|
||||
|
||||
func (sr *SearchResult) Size() int {
|
||||
sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
|
||||
reflectStaticSizeSearchStatus
|
||||
|
||||
for _, entry := range sr.Hits {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range sr.Facets {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
v.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (sr *SearchResult) String() string {
|
||||
rv := ""
|
||||
if sr.Total > 0 {
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
|
@ -25,6 +27,18 @@ type stubSearcher struct {
|
|||
matches []*search.DocumentMatch
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Size() int {
|
||||
sizeInBytes := int(reflect.TypeOf(*ss).Size())
|
||||
|
||||
for _, entry := range ss.matches {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
if ss.index < len(ss.matches) {
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
|
@ -76,6 +90,10 @@ func (ss *stubSearcher) DocumentMatchPoolSize() int {
|
|||
|
||||
type stubReader struct{}
|
||||
|
||||
func (sr *stubReader) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -16,12 +16,21 @@ package collector
|
|||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTopNCollector int
|
||||
|
||||
func init() {
|
||||
var coll TopNCollector
|
||||
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
|
||||
}
|
||||
|
||||
type collectorStore interface {
|
||||
// Add the document, and if the new store size exceeds the provided size
|
||||
// the last element is removed and returned. If the size has not been
|
||||
|
@ -98,6 +107,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
|
|||
return hc
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
|
||||
|
||||
if hc.facetsBuilder != nil {
|
||||
sizeInBytes += hc.facetsBuilder.Size()
|
||||
}
|
||||
|
||||
for _, entry := range hc.neededFields {
|
||||
sizeInBytes += len(entry) + size.SizeOfString
|
||||
}
|
||||
|
||||
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Collect goes to the index to find the matching documents
|
||||
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
||||
startTime := time.Now()
|
||||
|
|
|
@ -17,8 +17,18 @@ package search
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeExplanation int
|
||||
|
||||
func init() {
|
||||
var e Explanation
|
||||
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
|
||||
}
|
||||
|
||||
type Explanation struct {
|
||||
Value float64 `json:"value"`
|
||||
Message string `json:"message"`
|
||||
|
@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
|
|||
}
|
||||
return string(js)
|
||||
}
|
||||
|
||||
func (expl *Explanation) Size() int {
|
||||
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
|
||||
len(expl.Message)
|
||||
|
||||
for _, entry := range expl.Children {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
|
|
@ -15,13 +15,25 @@
|
|||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDateTimeFacetBuilder int
|
||||
var reflectStaticSizedateTimeRange int
|
||||
|
||||
func init() {
|
||||
var dtfb DateTimeFacetBuilder
|
||||
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
|
||||
var dtr dateTimeRange
|
||||
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
|
||||
}
|
||||
|
||||
type dateTimeRange struct {
|
||||
start time.Time
|
||||
end time.Time
|
||||
|
@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k, _ := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
for k, _ := range fb.ranges {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr + reflectStaticSizedateTimeRange
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
|
||||
r := dateTimeRange{
|
||||
start: start,
|
||||
|
|
|
@ -15,12 +15,24 @@
|
|||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeNumericFacetBuilder int
|
||||
var reflectStaticSizenumericRange int
|
||||
|
||||
func init() {
|
||||
var nfb NumericFacetBuilder
|
||||
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
|
||||
var nr numericRange
|
||||
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
|
||||
}
|
||||
|
||||
type numericRange struct {
|
||||
min *float64
|
||||
max *float64
|
||||
|
@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k, _ := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
for k, _ := range fb.ranges {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr + reflectStaticSizenumericRange
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
|
||||
r := numericRange{
|
||||
min: min,
|
||||
|
|
|
@ -15,11 +15,20 @@
|
|||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermsFacetBuilder int
|
||||
|
||||
func init() {
|
||||
var tfb TermsFacetBuilder
|
||||
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
|
||||
}
|
||||
|
||||
type TermsFacetBuilder struct {
|
||||
size int
|
||||
field string
|
||||
|
@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k, _ := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Field() string {
|
||||
return fb.field
|
||||
}
|
||||
|
|
|
@ -15,11 +15,32 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeFacetsBuilder int
|
||||
var reflectStaticSizeFacetResult int
|
||||
var reflectStaticSizeTermFacet int
|
||||
var reflectStaticSizeNumericRangeFacet int
|
||||
var reflectStaticSizeDateRangeFacet int
|
||||
|
||||
func init() {
|
||||
var fb FacetsBuilder
|
||||
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
|
||||
var fr FacetResult
|
||||
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
|
||||
var tf TermFacet
|
||||
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
|
||||
var nrf NumericRangeFacet
|
||||
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
|
||||
var drf DateRangeFacet
|
||||
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
|
||||
}
|
||||
|
||||
type FacetBuilder interface {
|
||||
StartDoc()
|
||||
UpdateVisitor(field string, term []byte)
|
||||
|
@ -27,6 +48,8 @@ type FacetBuilder interface {
|
|||
|
||||
Result() *FacetResult
|
||||
Field() string
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type FacetsBuilder struct {
|
||||
|
@ -42,6 +65,22 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
func (fb *FacetsBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
|
||||
fb.indexReader.Size()
|
||||
|
||||
for k, v := range fb.facets {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
v.Size()
|
||||
}
|
||||
|
||||
for _, entry := range fb.fields {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
|
||||
fb.facets[name] = facetBuilder
|
||||
fb.fields = append(fb.fields, facetBuilder.Field())
|
||||
|
@ -213,6 +252,14 @@ type FacetResult struct {
|
|||
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
|
||||
}
|
||||
|
||||
func (fr *FacetResult) Size() int {
|
||||
return reflectStaticSizeFacetResult + size.SizeOfPtr +
|
||||
len(fr.Field) +
|
||||
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
|
||||
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
|
||||
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
|
||||
}
|
||||
|
||||
func (fr *FacetResult) Merge(other *FacetResult) {
|
||||
fr.Total += other.Total
|
||||
fr.Missing += other.Missing
|
||||
|
|
|
@ -14,6 +14,17 @@
|
|||
|
||||
package search
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocumentMatchPool int
|
||||
|
||||
func init() {
|
||||
var dmp DocumentMatchPool
|
||||
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
|
||||
}
|
||||
|
||||
// DocumentMatchPoolTooSmall is a callback function that can be executed
|
||||
// when the DocumentMatchPool does not have sufficient capacity
|
||||
// By default we just perform just-in-time allocation, but you could log
|
||||
|
|
|
@ -15,13 +15,27 @@
|
|||
package scorer
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConjunctionQueryScorer int
|
||||
|
||||
func init() {
|
||||
var cqs ConjunctionQueryScorer
|
||||
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
|
||||
}
|
||||
|
||||
type ConjunctionQueryScorer struct {
|
||||
options search.SearcherOptions
|
||||
}
|
||||
|
||||
func (s *ConjunctionQueryScorer) Size() int {
|
||||
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
|
||||
return &ConjunctionQueryScorer{
|
||||
options: options,
|
||||
|
|
|
@ -16,11 +16,20 @@ package scorer
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConstantScorer int
|
||||
|
||||
func init() {
|
||||
var cs ConstantScorer
|
||||
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
|
||||
}
|
||||
|
||||
type ConstantScorer struct {
|
||||
constant float64
|
||||
boost float64
|
||||
|
@ -30,6 +39,16 @@ type ConstantScorer struct {
|
|||
queryWeightExplanation *search.Explanation
|
||||
}
|
||||
|
||||
func (s *ConstantScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
|
||||
rv := ConstantScorer{
|
||||
options: options,
|
||||
|
|
|
@ -16,14 +16,27 @@ package scorer
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDisjunctionQueryScorer int
|
||||
|
||||
func init() {
|
||||
var dqs DisjunctionQueryScorer
|
||||
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
|
||||
}
|
||||
|
||||
type DisjunctionQueryScorer struct {
|
||||
options search.SearcherOptions
|
||||
}
|
||||
|
||||
func (s *DisjunctionQueryScorer) Size() int {
|
||||
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
|
||||
return &DisjunctionQueryScorer{
|
||||
options: options,
|
||||
|
|
|
@ -17,11 +17,20 @@ package scorer
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermQueryScorer int
|
||||
|
||||
func init() {
|
||||
var tqs TermQueryScorer
|
||||
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
|
||||
}
|
||||
|
||||
type TermQueryScorer struct {
|
||||
queryTerm []byte
|
||||
queryField string
|
||||
|
@ -36,6 +45,21 @@ type TermQueryScorer struct {
|
|||
queryWeightExplanation *search.Explanation
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
|
||||
len(s.queryTerm) + len(s.queryField)
|
||||
|
||||
if s.idfExplanation != nil {
|
||||
sizeInBytes += s.idfExplanation.Size()
|
||||
}
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
|
||||
rv := TermQueryScorer{
|
||||
queryTerm: queryTerm,
|
||||
|
|
|
@ -16,11 +16,26 @@ package search
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocumentMatch int
|
||||
var reflectStaticSizeSearchContext int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
func init() {
|
||||
var dm DocumentMatch
|
||||
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
|
||||
var sc SearchContext
|
||||
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
|
||||
var l Location
|
||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||
}
|
||||
|
||||
type ArrayPositions []uint64
|
||||
|
||||
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
|
||||
|
@ -47,6 +62,11 @@ type Location struct {
|
|||
ArrayPositions ArrayPositions `json:"array_positions"`
|
||||
}
|
||||
|
||||
func (l *Location) Size() int {
|
||||
return reflectStaticSizeLocation + size.SizeOfPtr +
|
||||
len(l.ArrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
type Locations []*Location
|
||||
|
||||
type TermLocationMap map[string]Locations
|
||||
|
@ -117,6 +137,52 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
|
|||
return dm
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
|
||||
len(dm.Index) +
|
||||
len(dm.ID) +
|
||||
len(dm.IndexInternalID)
|
||||
|
||||
if dm.Expl != nil {
|
||||
sizeInBytes += dm.Expl.Size()
|
||||
}
|
||||
|
||||
for k, v := range dm.Locations {
|
||||
sizeInBytes += size.SizeOfString + len(k)
|
||||
for k1, v1 := range v {
|
||||
sizeInBytes += size.SizeOfString + len(k1) +
|
||||
size.SizeOfSlice
|
||||
for _, entry := range v1 {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range dm.Fragments {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfSlice
|
||||
|
||||
for _, entry := range v {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
}
|
||||
|
||||
for _, entry := range dm.Sort {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
for k, _ := range dm.Fields {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr
|
||||
}
|
||||
|
||||
if dm.Document != nil {
|
||||
sizeInBytes += dm.Document.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) String() string {
|
||||
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
|
||||
}
|
||||
|
@ -135,6 +201,7 @@ type Searcher interface {
|
|||
SetQueryNorm(float64)
|
||||
Count() uint64
|
||||
Min() int
|
||||
Size() int
|
||||
|
||||
DocumentMatchPoolSize() int
|
||||
}
|
||||
|
@ -148,3 +215,18 @@ type SearcherOptions struct {
|
|||
type SearchContext struct {
|
||||
DocumentMatchPool *DocumentMatchPool
|
||||
}
|
||||
|
||||
func (sc *SearchContext) Size() int {
|
||||
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
|
||||
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
|
||||
|
||||
if sc.DocumentMatchPool != nil {
|
||||
for _, entry := range sc.DocumentMatchPool.avail {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
|
|
@ -16,12 +16,21 @@ package searcher
|
|||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeBooleanSearcher int
|
||||
|
||||
func init() {
|
||||
var bs BooleanSearcher
|
||||
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
|
||||
}
|
||||
|
||||
type BooleanSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
mustSearcher search.Searcher
|
||||
|
@ -52,6 +61,33 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
|
|||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *BooleanSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
|
||||
if s.mustSearcher != nil {
|
||||
sizeInBytes += s.mustSearcher.Size()
|
||||
}
|
||||
|
||||
if s.shouldSearcher != nil {
|
||||
sizeInBytes += s.shouldSearcher.Size()
|
||||
}
|
||||
|
||||
if s.mustNotSearcher != nil {
|
||||
sizeInBytes += s.mustNotSearcher.Size()
|
||||
}
|
||||
|
||||
sizeInBytes += s.scorer.Size()
|
||||
|
||||
for _, entry := range s.matches {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *BooleanSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
|
|
|
@ -16,13 +16,22 @@ package searcher
|
|||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConjunctionSearcher int
|
||||
|
||||
func init() {
|
||||
var cs ConjunctionSearcher
|
||||
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
|
||||
}
|
||||
|
||||
type ConjunctionSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
searchers OrderedSearcherList
|
||||
|
@ -54,6 +63,23 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
|
|||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
|
||||
s.scorer.Size()
|
||||
|
||||
for _, entry := range s.searchers {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range s.currs {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
|
|
|
@ -17,13 +17,22 @@ package searcher
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDisjunctionSearcher int
|
||||
|
||||
func init() {
|
||||
var ds DisjunctionSearcher
|
||||
reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size())
|
||||
}
|
||||
|
||||
// DisjunctionMaxClauseCount is a compile time setting that applications can
|
||||
// adjust to non-zero value to cause the DisjunctionSearcher to return an
|
||||
// error instead of exeucting searches when the size exceeds this value.
|
||||
|
@ -90,6 +99,32 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
|
|||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *DisjunctionSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.scorer.Size()
|
||||
|
||||
for _, entry := range s.searchers {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range s.currs {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
for _, entry := range s.matching {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *DisjunctionSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
|
|
|
@ -15,11 +15,21 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocIDSearcher int
|
||||
|
||||
func init() {
|
||||
var ds DocIDSearcher
|
||||
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
|
||||
}
|
||||
|
||||
// DocIDSearcher returns documents matching a predefined set of identifiers.
|
||||
type DocIDSearcher struct {
|
||||
reader index.DocIDReader
|
||||
|
@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Size() int {
|
||||
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
|
||||
s.reader.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Count() uint64 {
|
||||
return uint64(s.count)
|
||||
}
|
||||
|
|
|
@ -15,10 +15,20 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeFilteringSearcher int
|
||||
|
||||
func init() {
|
||||
var fs FilteringSearcher
|
||||
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
|
||||
}
|
||||
|
||||
// FilterFunc defines a function which can filter documents
|
||||
// returning true means keep the document
|
||||
// returning false means do not keep the document
|
||||
|
@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
|
|||
}
|
||||
}
|
||||
|
||||
func (f *FilteringSearcher) Size() int {
|
||||
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
|
||||
f.child.Size()
|
||||
}
|
||||
|
||||
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
next, err := f.child.Next(ctx)
|
||||
for next != nil && err == nil {
|
||||
|
|
|
@ -15,11 +15,21 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeMatchAllSearcher int
|
||||
|
||||
func init() {
|
||||
var mas MatchAllSearcher
|
||||
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
|
||||
}
|
||||
|
||||
type MatchAllSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
reader index.DocIDReader
|
||||
|
@ -46,6 +56,13 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *MatchAllSearcher) Size() int {
|
||||
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.reader.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *MatchAllSearcher) Count() uint64 {
|
||||
return s.count
|
||||
}
|
||||
|
|
|
@ -15,10 +15,20 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeMatchNoneSearcher int
|
||||
|
||||
func init() {
|
||||
var mns MatchNoneSearcher
|
||||
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
|
||||
}
|
||||
|
||||
type MatchNoneSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
}
|
||||
|
@ -29,6 +39,11 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *MatchNoneSearcher) Size() int {
|
||||
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
}
|
||||
|
||||
func (s *MatchNoneSearcher) Count() uint64 {
|
||||
return uint64(0)
|
||||
}
|
||||
|
|
|
@ -17,11 +17,20 @@ package searcher
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizePhraseSearcher int
|
||||
|
||||
func init() {
|
||||
var ps PhraseSearcher
|
||||
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
|
||||
}
|
||||
|
||||
type PhraseSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
mustSearcher *ConjunctionSearcher
|
||||
|
@ -32,6 +41,28 @@ type PhraseSearcher struct {
|
|||
initialized bool
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
|
||||
if s.mustSearcher != nil {
|
||||
sizeInBytes += s.mustSearcher.Size()
|
||||
}
|
||||
|
||||
if s.currMust != nil {
|
||||
sizeInBytes += s.currMust.Size()
|
||||
}
|
||||
|
||||
for _, entry := range s.terms {
|
||||
sizeInBytes += size.SizeOfSlice
|
||||
for _, entry1 := range entry {
|
||||
sizeInBytes += size.SizeOfString + len(entry1)
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
|
||||
// turn flat terms []string into [][]string
|
||||
mterms := make([][]string, len(terms))
|
||||
|
|
|
@ -15,11 +15,21 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorer"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermSearcher int
|
||||
|
||||
func init() {
|
||||
var ts TermSearcher
|
||||
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
|
||||
}
|
||||
|
||||
type TermSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
reader index.TermFieldReader
|
||||
|
@ -63,6 +73,14 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Size() int {
|
||||
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.reader.Size() +
|
||||
s.tfd.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Count() uint64 {
|
||||
return s.reader.Count()
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package size
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
)
|
||||
|
||||
func init() {
|
||||
var a bool
|
||||
SizeOfBool = int(reflect.TypeOf(a).Size())
|
||||
var b float32
|
||||
SizeOfFloat32 = int(reflect.TypeOf(b).Size())
|
||||
var c float64
|
||||
SizeOfFloat64 = int(reflect.TypeOf(c).Size())
|
||||
var d map[int]int
|
||||
SizeOfMap = int(reflect.TypeOf(d).Size())
|
||||
var e *int
|
||||
SizeOfPtr = int(reflect.TypeOf(e).Size())
|
||||
var f []int
|
||||
SizeOfSlice = int(reflect.TypeOf(f).Size())
|
||||
var g string
|
||||
SizeOfString = int(reflect.TypeOf(g).Size())
|
||||
var h uint8
|
||||
SizeOfUint8 = int(reflect.TypeOf(h).Size())
|
||||
var i uint16
|
||||
SizeOfUint16 = int(reflect.TypeOf(i).Size())
|
||||
var j uint32
|
||||
SizeOfUint32 = int(reflect.TypeOf(j).Size())
|
||||
var k uint64
|
||||
SizeOfUint64 = int(reflect.TypeOf(k).Size())
|
||||
}
|
||||
|
||||
var SizeOfBool int
|
||||
var SizeOfFloat32 int
|
||||
var SizeOfFloat64 int
|
||||
var SizeOfInt int
|
||||
var SizeOfMap int
|
||||
var SizeOfPtr int
|
||||
var SizeOfSlice int
|
||||
var SizeOfString int
|
||||
var SizeOfUint8 int
|
||||
var SizeOfUint16 int
|
||||
var SizeOfUint32 int
|
||||
var SizeOfUint64 int
|
Loading…
Reference in New Issue