0
0
Fork 0

Tracking memory consumption for a scorch index

+ Track memory usage at a segment level
+ Add a new scorch API: MemoryUsed()
    - Aggregate the memory consumption across
      segments when API is invoked.

+ TODO:
    - Revisit the second iteration if it can be gotten
      rid off, and the size accounted for during the first
      run while building an in-mem segment.
    - Accounting for pointer and slice overhead.
This commit is contained in:
abhinavdangeti 2017-12-28 18:48:38 -07:00
parent a475ee886d
commit 5c26f5a86d
7 changed files with 132 additions and 0 deletions

View File

@ -361,6 +361,21 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
s.rootLock.Unlock()
}
func (s *Scorch) MemoryUsed() uint64 {
var memUsed uint64
s.rootLock.RLock()
for _, segmentSnapshot := range s.root.segment {
memUsed += 8 /* size of id -> uint64 */ +
segmentSnapshot.segment.SizeInBytes()
if segmentSnapshot.deleted != nil {
memUsed += segmentSnapshot.deleted.GetSizeInBytes()
}
memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
}
s.rootLock.RUnlock()
return memUsed
}
func (s *Scorch) markIneligibleForRemoval(filename string) {
s.rootLock.Lock()
s.ineligibleForRemoval[filename] = true

View File

@ -41,6 +41,9 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
sort.Strings(dict)
}
// compute memory usage of segment
s.updateSizeInBytes()
// professional debugging
//
// log.Printf("fields: %v\n", s.FieldsMap)

View File

@ -87,6 +87,10 @@ type Segment struct {
// stored field array positions
// docNum -> field id -> slice of array positions (each is []uint64)
StoredPos []map[uint16][][]uint64
// footprint of the segment, updated when analyzed document mutations
// are added into the segment
sizeInBytes uint64
}
// New builds a new empty Segment
@ -96,6 +100,70 @@ func New() *Segment {
}
}
func (s *Segment) updateSizeInBytes() {
var sizeInBytes uint64
for k, _ := range s.FieldsMap {
sizeInBytes += uint64(len(k)*2 /* FieldsMap + FieldsInv */ +
2 /* size of uint16 */)
}
for _, entry := range s.Dicts {
for k, _ := range entry {
sizeInBytes += uint64(len(k)*2 /* Dicts + DictKeys */ +
8 /* size of uint64 */)
}
}
for i := 0; i < len(s.Postings); i++ {
sizeInBytes += s.Postings[i].GetSizeInBytes() + s.PostingsLocs[i].GetSizeInBytes()
}
for i := 0; i < len(s.Freqs); i++ {
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
len(s.Norms[i])*4 /* size of float32 */)
}
for i := 0; i < len(s.Locfields); i++ {
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
len(s.Locstarts[i])*8 /* size of uint64 */ +
len(s.Locends[i])*8 /* size of uint64 */ +
len(s.Locpos[i])*8 /* size of uint64 */)
for j := 0; j < len(s.Locarraypos[i]); j++ {
sizeInBytes += uint64(len(s.Locarraypos[i][j]) * 8 /* size of uint64 */)
}
}
for i := 0; i < len(s.Stored); i++ {
for _, v := range s.Stored[i] {
sizeInBytes += uint64(2 /* size of uint16 */)
for _, arr := range v {
sizeInBytes += uint64(len(arr))
}
}
for _, v := range s.StoredTypes[i] {
sizeInBytes += uint64(2 /* size of uint16 */ + len(v))
}
for _, v := range s.StoredPos[i] {
sizeInBytes += uint64(2 /* size of uint16 */)
for _, arr := range v {
sizeInBytes += uint64(len(arr) * 8 /* size of uint64 */)
}
}
}
sizeInBytes += uint64(8 /* size of sizeInBytes -> uint64*/)
s.sizeInBytes = sizeInBytes
}
func (s *Segment) SizeInBytes() uint64 {
return s.sizeInBytes
}
func (s *Segment) AddRef() {
}

View File

@ -169,6 +169,10 @@ func TestSingle(t *testing.T) {
t.Fatalf("segment nil, not expected")
}
if segment.SizeInBytes() <= 0 {
t.Fatalf("segment size not updated")
}
expectFields := map[string]struct{}{
"_id": struct{}{},
"_all": struct{}{},

View File

@ -36,6 +36,8 @@ type Segment interface {
Close() error
SizeInBytes() uint64
AddRef()
DecRef() error
}

View File

@ -86,6 +86,31 @@ type Segment struct {
refs int64
}
func (s *Segment) SizeInBytes() uint64 {
// 4 /* size of crc -> uint32 */ +
// 4 /* size of version -> uint32 */ +
// 4 /* size of chunkFactor -> uint32 */ +
// 8 /* size of numDocs -> uint64 */ +
// 8 /* size of storedIndexOffset -> uint64 */ +
// 8 /* size of fieldsIndexOffset -> uint64 */
sizeOfUints := 36
sizeInBytes := len(s.mm) + len(s.path) + sizeOfUints
for k, _ := range s.fieldsMap {
sizeInBytes += len(k) + 2 /* size of uint16 */
}
for _, entry := range s.fieldsInv {
sizeInBytes += len(entry)
}
sizeInBytes += len(s.fieldsOffsets) * 8 /* size of uint64 */
sizeInBytes += 8 /* size of refs -> int64 */
return uint64(sizeInBytes)
}
func (s *Segment) AddRef() {
s.m.Lock()
s.refs++

View File

@ -249,3 +249,18 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
c.m.Unlock()
return nil
}
func (c *cachedDocs) sizeInBytes() uint64 {
sizeInBytes := 0
c.m.Lock()
for k, v := range c.cache { // cachedFieldDocs
sizeInBytes += len(k)
if v != nil {
for _, entry := range v.docs { // docs
sizeInBytes += 8 /* size of uint64 */ + len(entry)
}
}
}
c.m.Unlock()
return uint64(sizeInBytes)
}