0
0
Fork 0

memoize the size of an entire index snapshot

by memoizing the size of index snapshots and their
constituent parts, we significantly reduce the amount
of time that the lock is held in the app_herder, when
calculating the total memory used
This commit is contained in:
Marty Schoch 2018-03-15 16:31:04 -04:00
parent d1b84d4578
commit 45e0e5c666
6 changed files with 64 additions and 20 deletions

View File

@ -186,6 +186,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
}
}
newSnapshot.updateSize()
s.rootLock.Lock()
if next.persisted != nil {
s.rootPersisted = append(s.rootPersisted, next.persisted)
@ -251,6 +252,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
newIndexSnapshot.internal[k] = v
}
newIndexSnapshot.updateSize()
s.rootLock.Lock()
rootPrev := s.root
s.root = newIndexSnapshot
@ -348,6 +350,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
newSnapshot.updateSize()
s.rootLock.Lock()
// swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch
@ -409,6 +412,7 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
}
newSnapshot.updateSize()
// swap in new snapshot
rootPrev := s.root
s.root = newSnapshot

View File

@ -397,11 +397,15 @@ func (s *Scorch) DeleteInternal(key []byte) error {
// Reader returns a low-level accessor on the index data. Close it to
// release associated resources.
func (s *Scorch) Reader() (index.IndexReader, error) {
return s.currentSnapshot(), nil
}
func (s *Scorch) currentSnapshot() *IndexSnapshot {
s.rootLock.RLock()
rv := s.root
rv.AddRef()
s.rootLock.RUnlock()
return rv, nil
return rv
}
func (s *Scorch) Stats() json.Marshaler {
@ -484,20 +488,11 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
}
func (s *Scorch) MemoryUsed() uint64 {
var memUsed int
s.rootLock.RLock()
if s.root != nil {
for _, segmentSnapshot := range s.root.segment {
memUsed += 8 /* size of id -> uint64 */ +
segmentSnapshot.segment.Size()
if segmentSnapshot.deleted != nil {
memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
}
memUsed += segmentSnapshot.cachedDocs.size()
}
}
s.rootLock.RUnlock()
return uint64(memUsed)
indexSnapshot := s.currentSnapshot()
defer func() {
_ = indexSnapshot.Close()
}()
return uint64(indexSnapshot.SizeFull())
}
func (s *Scorch) markIneligibleForRemoval(filename string) {

View File

@ -138,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
dictLocs: dictLocs,
fieldDvIterMap: make(map[uint16]*docValueIterator),
}
sb.updateSize()
err := sb.loadDvIterators()
if err != nil {

View File

@ -63,6 +63,7 @@ func Open(path string) (segment.Segment, error) {
path: path,
refs: 1,
}
rv.SegmentBase.updateSize()
err = rv.loadConfig()
if err != nil {
@ -99,9 +100,14 @@ type SegmentBase struct {
docValueOffset uint64
dictLocs []uint64
fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
size uint64
}
func (sb *SegmentBase) Size() int {
return int(sb.size)
}
func (sb *SegmentBase) updateSize() {
sizeInBytes := reflectStaticSizeSegmentBase +
len(sb.mem)
@ -124,7 +130,7 @@ func (sb *SegmentBase) Size() int {
}
}
return sizeInBytes
sb.size = uint64(sizeInBytes)
}
func (sb *SegmentBase) AddRef() {}

View File

@ -19,6 +19,7 @@ import (
"container/heap"
"encoding/binary"
"fmt"
"reflect"
"sort"
"sync"
"sync/atomic"
@ -41,12 +42,20 @@ type asynchSegmentResult struct {
err error
}
var reflectStaticSizeIndexSnapshot int
func init() {
var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
}
type IndexSnapshot struct {
parent *Scorch
segment []*SegmentSnapshot
offsets []uint64
internal map[string][]byte
epoch uint64
size uint64
m sync.Mutex // Protects the fields that follow.
refs int64
@ -96,6 +105,17 @@ func (i *IndexSnapshot) Size() int {
return size.SizeOfPtr
}
func (i *IndexSnapshot) SizeFull() int {
return int(i.size)
}
func (i *IndexSnapshot) updateSize() {
i.size += uint64(reflectStaticSizeIndexSnapshot)
for _, s := range i.segment {
i.size += uint64(s.Size())
}
}
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)

View File

@ -19,6 +19,7 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
var TermSeparator byte = 0xff
@ -128,15 +129,26 @@ func (s *SegmentSnapshot) Fields() []string {
return s.segment.Fields()
}
func (s *SegmentSnapshot) Size() (rv int) {
rv = s.segment.Size()
if s.deleted != nil {
rv += int(s.deleted.GetSizeInBytes())
}
rv += s.cachedDocs.Size()
return
}
type cachedFieldDocs struct {
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
size uint64
}
func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
defer close(cfd.readyCh)
cfd.size += uint64(size.SizeOfUint64) /* size field */
dict, err := ss.segment.Dictionary(field)
if err != nil {
cfd.err = err
@ -152,12 +164,14 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
return
}
cfd.size += uint64(size.SizeOfUint64) /* map key */
postingsItr := postings.Iterator()
nextPosting, err2 := postingsItr.Next()
for err2 == nil && nextPosting != nil {
docNum := nextPosting.Number()
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
cfd.size += uint64(len(next.Term) + 1) // map value
nextPosting, err2 = postingsItr.Next()
}
@ -178,6 +192,7 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
type cachedDocs struct {
m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field
size uint64
}
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
@ -208,14 +223,18 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
}
c.m.Lock()
}
c.updateSizeLOCKED()
c.m.Unlock()
return nil
}
func (c *cachedDocs) size() int {
func (c *cachedDocs) Size() int {
return int(c.size)
}
func (c *cachedDocs) updateSizeLOCKED() {
sizeInBytes := 0
c.m.Lock()
for k, v := range c.cache { // cachedFieldDocs
sizeInBytes += len(k)
if v != nil {
@ -224,6 +243,5 @@ func (c *cachedDocs) size() int {
}
}
}
c.m.Unlock()
return sizeInBytes
c.size = uint64(sizeInBytes)
}