0
0
Fork 0

Merge pull request #826 from steveyen/scorch-estimate-buf-size

estimate interim buffer size based on previous results
This commit is contained in:
Steve Yen 2018-03-16 11:22:42 -07:00 committed by GitHub
commit 5411d9ae4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 17 additions and 2 deletions

View File

@ -34,10 +34,18 @@ import (
// SegmentBase from analysis results
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
chunkFactor uint32) (*SegmentBase, error) {
var br bytes.Buffer
s := interimPool.Get().(*interim)
var br bytes.Buffer
if s.lastNumDocs > 0 {
// use previous results to initialize the buf with an estimate
// size, but note that the interim instance comes from a
// global interimPool, so multiple scorch instances indexing
// different docs can lead to low quality estimates
avgBytesPerDoc := s.lastOutSize / s.lastNumDocs
br.Grow(avgBytesPerDoc * (len(results) + 1))
}
s.results = results
s.chunkFactor = chunkFactor
s.w = NewCountHashWriter(&br)
@ -53,6 +61,8 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
if err == nil && s.reset() == nil {
s.lastNumDocs = len(results)
s.lastOutSize = len(br.Bytes())
interimPool.Put(s)
}
@ -114,6 +124,9 @@ type interim struct {
tmp0 []byte
tmp1 []byte
lastNumDocs int
lastOutSize int
}
func (s *interim) reset() (err error) {
@ -161,6 +174,8 @@ func (s *interim) reset() (err error) {
s.metaBuf.Reset()
s.tmp0 = s.tmp0[:0]
s.tmp1 = s.tmp1[:0]
s.lastNumDocs = 0
s.lastOutSize = 0
return err
}