From 4af65a78460e4168ed1e8474b30d2b39a202e93c Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Tue, 13 Mar 2018 11:44:56 -0700 Subject: [PATCH] scorch zap prealloc buf via estimate from previous interim work --- index/scorch/segment/zap/new.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go index 4c9ec9c1..dd2740fb 100644 --- a/index/scorch/segment/zap/new.go +++ b/index/scorch/segment/zap/new.go @@ -34,10 +34,18 @@ import ( // SegmentBase from analysis results func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, chunkFactor uint32) (*SegmentBase, error) { - var br bytes.Buffer - s := interimPool.Get().(*interim) + var br bytes.Buffer + if s.lastNumDocs > 0 { + // use previous results to initialize the buf with an estimate + // size, but note that the interim instance comes from a + // global interimPool, so multiple scorch instances indexing + // different docs can lead to low quality estimates + avgBytesPerDoc := s.lastOutSize / s.lastNumDocs + br.Grow(avgBytesPerDoc * (len(results) + 1)) + } + s.results = results s.chunkFactor = chunkFactor s.w = NewCountHashWriter(&br) @@ -53,6 +61,8 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets) if err == nil && s.reset() == nil { + s.lastNumDocs = len(results) + s.lastOutSize = len(br.Bytes()) interimPool.Put(s) } @@ -114,6 +124,9 @@ type interim struct { tmp0 []byte tmp1 []byte + + lastNumDocs int + lastOutSize int } func (s *interim) reset() (err error) { @@ -161,6 +174,8 @@ func (s *interim) reset() (err error) { s.metaBuf.Reset() s.tmp0 = s.tmp0[:0] s.tmp1 = s.tmp1[:0] + s.lastNumDocs = 0 + s.lastOutSize = 0 return err }