0
0
Fork 0

Include overhead from data structures in segment's SizeInBytes

+ Account for all the overhead incurred from the data structures
  within mem.Segment and zap.Segment.
    - SizeOfMap = 8
    - SizeOfPointer = 8
    - SizeOfSlice = 24
    - SizeOfString = 16
+ Include overhead from certain new fields as well.
This commit is contained in:
abhinavdangeti 2018-01-12 12:11:11 -08:00
parent 44c371582a
commit 1176c73a9c
4 changed files with 95 additions and 22 deletions

View File

@ -107,27 +107,41 @@ func New() *Segment {
func (s *Segment) updateSizeInBytes() {
var sizeInBytes uint64
// FieldsMap, FieldsInv
for k, _ := range s.FieldsMap {
sizeInBytes += uint64(len(k)*2 /* FieldsMap + FieldsInv */ +
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
2 /* size of uint16 */)
}
// overhead from the data structures
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
// Dicts, DictKeys
for _, entry := range s.Dicts {
for k, _ := range entry {
sizeInBytes += uint64(len(k)*2 /* Dicts + DictKeys */ +
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
8 /* size of uint64 */)
}
// overhead from the data structures
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
}
sizeInBytes += (segment.SizeOfSlice * 2)
// Postings, PostingsLocs
for i := 0; i < len(s.Postings); i++ {
sizeInBytes += s.Postings[i].GetSizeInBytes() + s.PostingsLocs[i].GetSizeInBytes()
sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
}
sizeInBytes += (segment.SizeOfSlice * 2)
// Freqs, Norms
for i := 0; i < len(s.Freqs); i++ {
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
len(s.Norms[i])*4 /* size of float32 */)
len(s.Norms[i])*4 /* size of float32 */) +
(segment.SizeOfSlice * 2)
}
sizeInBytes += (segment.SizeOfSlice * 2)
// Location data
for i := 0; i < len(s.Locfields); i++ {
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
len(s.Locstarts[i])*8 /* size of uint64 */ +
@ -135,31 +149,49 @@ func (s *Segment) updateSizeInBytes() {
len(s.Locpos[i])*8 /* size of uint64 */)
for j := 0; j < len(s.Locarraypos[i]); j++ {
sizeInBytes += uint64(len(s.Locarraypos[i][j]) * 8 /* size of uint64 */)
sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
segment.SizeOfSlice
}
}
sizeInBytes += (segment.SizeOfSlice * 5)
}
sizeInBytes += (segment.SizeOfSlice * 5)
// Stored data
for i := 0; i < len(s.Stored); i++ {
for _, v := range s.Stored[i] {
sizeInBytes += uint64(2 /* size of uint16 */)
for _, arr := range v {
sizeInBytes += uint64(len(arr))
sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
}
sizeInBytes += segment.SizeOfSlice
}
for _, v := range s.StoredTypes[i] {
sizeInBytes += uint64(2 /* size of uint16 */ + len(v))
sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
}
for _, v := range s.StoredPos[i] {
sizeInBytes += uint64(2 /* size of uint16 */)
for _, arr := range v {
sizeInBytes += uint64(len(arr) * 8 /* size of uint64 */)
sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
segment.SizeOfSlice
}
sizeInBytes += segment.SizeOfSlice
}
}
sizeInBytes += uint64(8 /* size of sizeInBytes -> uint64*/)
// overhead from map(s) within Stored, StoredTypes, StoredPos
sizeInBytes += (segment.SizeOfMap * 3)
}
// overhead from data structures: Stored, StoredTypes, StoredPos
sizeInBytes += (segment.SizeOfSlice * 3)
// DocValueFields
sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
segment.SizeOfMap
// SizeInBytes
sizeInBytes += uint64(8)
s.sizeInBytes = sizeInBytes
}

View File

@ -19,6 +19,12 @@ import (
"github.com/blevesearch/bleve/index"
)
// Overhead from go data structures when deployed on a 64-bit system.
const SizeOfMap uint64 = 8
const SizeOfPointer uint64 = 8
const SizeOfSlice uint64 = 24
const SizeOfString uint64 = 16
// DocumentFieldValueVisitor defines a callback to be visited for each
// stored field value. The return value determines if the visitor
// should keep going. Returning true continues visiting, false stops.

View File

@ -22,6 +22,7 @@ import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/golang/snappy"
)
@ -35,6 +36,23 @@ type docValueIterator struct {
curChunkData []byte // compressed data cache
}
func (di *docValueIterator) sizeInBytes() uint64 {
// curChunkNum, numChunks, dvDataLoc --> uint64
sizeInBytes := 24
// field
sizeInBytes += (len(di.field) + int(segment.SizeOfString))
// chunkLens, curChunkHeader
sizeInBytes += len(di.chunkLens)*8 +
len(di.curChunkHeader)*24 +
int(segment.SizeOfSlice*2) /* overhead from slices */
// curChunkData is mmap'ed, not included
return uint64(sizeInBytes)
}
func (di *docValueIterator) fieldName() string {
return di.field
}

View File

@ -97,27 +97,44 @@ type Segment struct {
}
func (s *Segment) SizeInBytes() uint64 {
// 4 /* size of crc -> uint32 */ +
// 4 /* size of version -> uint32 */ +
// 4 /* size of chunkFactor -> uint32 */ +
// 8 /* size of numDocs -> uint64 */ +
// 8 /* size of storedIndexOffset -> uint64 */ +
// 8 /* size of file pointer */
// 4 /* size of crc -> uint32 */
// 4 /* size of version -> uint32 */
// 4 /* size of chunkFactor -> uint32 */
// 8 /* size of numDocs -> uint64 */
// 8 /* size of storedIndexOffset -> uint64 */
// 8 /* size of fieldsIndexOffset -> uint64 */
sizeOfUints := 36
// 8 /* size of docValueOffset -> uint64 */
sizeOfUints := 52
// Do not include the mmap'ed part
sizeInBytes := len(s.path) + sizeOfUints
sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
// fieldsMap
for k, _ := range s.fieldsMap {
sizeInBytes += len(k) + 2 /* size of uint16 */
sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
}
sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
// fieldsInv, fieldsOffsets
for _, entry := range s.fieldsInv {
sizeInBytes += len(entry)
sizeInBytes += (len(entry) + int(segment.SizeOfString))
}
sizeInBytes += len(s.fieldsOffsets) * 8 /* size of uint64 */
sizeInBytes += int(segment.SizeOfSlice) * 2 /* overhead from slices */
sizeInBytes += len(s.fieldsOffsets) * 8 /* size of uint64 */
sizeInBytes += 8 /* size of refs -> int64 */
// fieldDvIterMap
sizeInBytes += len(s.fieldDvIterMap) *
int(segment.SizeOfPointer+2 /* size of uint16 */)
for _, entry := range s.fieldDvIterMap {
if entry != nil {
sizeInBytes += int(entry.sizeInBytes())
}
}
sizeInBytes += int(segment.SizeOfMap)
// mutex, refs -> int64
sizeInBytes += 16
return uint64(sizeInBytes)
}