Merge pull request #733 from abhinavdangeti/scorch-segment-sizeinbytes
Include overhead from data structures in segment's SizeInBytes
This commit is contained in:
commit
cb6391e75e
|
@ -107,27 +107,41 @@ func New() *Segment {
|
|||
func (s *Segment) updateSizeInBytes() {
|
||||
var sizeInBytes uint64
|
||||
|
||||
// FieldsMap, FieldsInv
|
||||
for k, _ := range s.FieldsMap {
|
||||
sizeInBytes += uint64(len(k)*2 /* FieldsMap + FieldsInv */ +
|
||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
||||
2 /* size of uint16 */)
|
||||
}
|
||||
// overhead from the data structures
|
||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
||||
|
||||
// Dicts, DictKeys
|
||||
for _, entry := range s.Dicts {
|
||||
for k, _ := range entry {
|
||||
sizeInBytes += uint64(len(k)*2 /* Dicts + DictKeys */ +
|
||||
sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
|
||||
8 /* size of uint64 */)
|
||||
}
|
||||
// overhead from the data structures
|
||||
sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Postings, PostingsLocs
|
||||
for i := 0; i < len(s.Postings); i++ {
|
||||
sizeInBytes += s.Postings[i].GetSizeInBytes() + s.PostingsLocs[i].GetSizeInBytes()
|
||||
sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
|
||||
(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Freqs, Norms
|
||||
for i := 0; i < len(s.Freqs); i++ {
|
||||
sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
|
||||
len(s.Norms[i])*4 /* size of float32 */)
|
||||
len(s.Norms[i])*4 /* size of float32 */) +
|
||||
(segment.SizeOfSlice * 2)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 2)
|
||||
|
||||
// Location data
|
||||
for i := 0; i < len(s.Locfields); i++ {
|
||||
sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
|
||||
len(s.Locstarts[i])*8 /* size of uint64 */ +
|
||||
|
@ -135,31 +149,49 @@ func (s *Segment) updateSizeInBytes() {
|
|||
len(s.Locpos[i])*8 /* size of uint64 */)
|
||||
|
||||
for j := 0; j < len(s.Locarraypos[i]); j++ {
|
||||
sizeInBytes += uint64(len(s.Locarraypos[i][j]) * 8 /* size of uint64 */)
|
||||
sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
|
||||
segment.SizeOfSlice
|
||||
}
|
||||
}
|
||||
|
||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
||||
}
|
||||
sizeInBytes += (segment.SizeOfSlice * 5)
|
||||
|
||||
// Stored data
|
||||
for i := 0; i < len(s.Stored); i++ {
|
||||
for _, v := range s.Stored[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
||||
for _, arr := range v {
|
||||
sizeInBytes += uint64(len(arr))
|
||||
sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
|
||||
}
|
||||
sizeInBytes += segment.SizeOfSlice
|
||||
}
|
||||
|
||||
for _, v := range s.StoredTypes[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */ + len(v))
|
||||
sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
|
||||
}
|
||||
|
||||
for _, v := range s.StoredPos[i] {
|
||||
sizeInBytes += uint64(2 /* size of uint16 */)
|
||||
for _, arr := range v {
|
||||
sizeInBytes += uint64(len(arr) * 8 /* size of uint64 */)
|
||||
sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
|
||||
segment.SizeOfSlice
|
||||
}
|
||||
sizeInBytes += segment.SizeOfSlice
|
||||
}
|
||||
}
|
||||
|
||||
sizeInBytes += uint64(8 /* size of sizeInBytes -> uint64*/)
|
||||
// overhead from map(s) within Stored, StoredTypes, StoredPos
|
||||
sizeInBytes += (segment.SizeOfMap * 3)
|
||||
}
|
||||
// overhead from data structures: Stored, StoredTypes, StoredPos
|
||||
sizeInBytes += (segment.SizeOfSlice * 3)
|
||||
|
||||
// DocValueFields
|
||||
sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
|
||||
segment.SizeOfMap
|
||||
|
||||
// SizeInBytes
|
||||
sizeInBytes += uint64(8)
|
||||
|
||||
s.sizeInBytes = sizeInBytes
|
||||
}
|
||||
|
|
|
@ -19,6 +19,12 @@ import (
|
|||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
// Overhead from go data structures when deployed on a 64-bit system.
|
||||
const SizeOfMap uint64 = 8
|
||||
const SizeOfPointer uint64 = 8
|
||||
const SizeOfSlice uint64 = 24
|
||||
const SizeOfString uint64 = 16
|
||||
|
||||
// DocumentFieldValueVisitor defines a callback to be visited for each
|
||||
// stored field value. The return value determines if the visitor
|
||||
// should keep going. Returning true continues visiting, false stops.
|
||||
|
|
|
@ -22,6 +22,7 @@ import (
|
|||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
|
@ -35,6 +36,23 @@ type docValueIterator struct {
|
|||
curChunkData []byte // compressed data cache
|
||||
}
|
||||
|
||||
func (di *docValueIterator) sizeInBytes() uint64 {
|
||||
// curChunkNum, numChunks, dvDataLoc --> uint64
|
||||
sizeInBytes := 24
|
||||
|
||||
// field
|
||||
sizeInBytes += (len(di.field) + int(segment.SizeOfString))
|
||||
|
||||
// chunkLens, curChunkHeader
|
||||
sizeInBytes += len(di.chunkLens)*8 +
|
||||
len(di.curChunkHeader)*24 +
|
||||
int(segment.SizeOfSlice*2) /* overhead from slices */
|
||||
|
||||
// curChunkData is mmap'ed, not included
|
||||
|
||||
return uint64(sizeInBytes)
|
||||
}
|
||||
|
||||
func (di *docValueIterator) fieldName() string {
|
||||
return di.field
|
||||
}
|
||||
|
|
|
@ -97,27 +97,44 @@ type Segment struct {
|
|||
}
|
||||
|
||||
func (s *Segment) SizeInBytes() uint64 {
|
||||
// 4 /* size of crc -> uint32 */ +
|
||||
// 4 /* size of version -> uint32 */ +
|
||||
// 4 /* size of chunkFactor -> uint32 */ +
|
||||
// 8 /* size of numDocs -> uint64 */ +
|
||||
// 8 /* size of storedIndexOffset -> uint64 */ +
|
||||
// 8 /* size of file pointer */
|
||||
// 4 /* size of crc -> uint32 */
|
||||
// 4 /* size of version -> uint32 */
|
||||
// 4 /* size of chunkFactor -> uint32 */
|
||||
// 8 /* size of numDocs -> uint64 */
|
||||
// 8 /* size of storedIndexOffset -> uint64 */
|
||||
// 8 /* size of fieldsIndexOffset -> uint64 */
|
||||
sizeOfUints := 36
|
||||
// 8 /* size of docValueOffset -> uint64 */
|
||||
sizeOfUints := 52
|
||||
|
||||
// Do not include the mmap'ed part
|
||||
sizeInBytes := len(s.path) + sizeOfUints
|
||||
sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
|
||||
|
||||
// fieldsMap
|
||||
for k, _ := range s.fieldsMap {
|
||||
sizeInBytes += len(k) + 2 /* size of uint16 */
|
||||
sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
|
||||
}
|
||||
sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
|
||||
|
||||
// fieldsInv, fieldsOffsets
|
||||
for _, entry := range s.fieldsInv {
|
||||
sizeInBytes += len(entry)
|
||||
sizeInBytes += (len(entry) + int(segment.SizeOfString))
|
||||
}
|
||||
sizeInBytes += len(s.fieldsOffsets) * 8 /* size of uint64 */
|
||||
sizeInBytes += int(segment.SizeOfSlice) * 2 /* overhead from slices */
|
||||
|
||||
sizeInBytes += len(s.fieldsOffsets) * 8 /* size of uint64 */
|
||||
sizeInBytes += 8 /* size of refs -> int64 */
|
||||
// fieldDvIterMap
|
||||
sizeInBytes += len(s.fieldDvIterMap) *
|
||||
int(segment.SizeOfPointer+2 /* size of uint16 */)
|
||||
for _, entry := range s.fieldDvIterMap {
|
||||
if entry != nil {
|
||||
sizeInBytes += int(entry.sizeInBytes())
|
||||
}
|
||||
}
|
||||
sizeInBytes += int(segment.SizeOfMap)
|
||||
|
||||
// mutex, refs -> int64
|
||||
sizeInBytes += 16
|
||||
|
||||
return uint64(sizeInBytes)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue