From f05794c6aace58cf791ee8f0d2108c5ecf9ea484 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Fri, 15 Dec 2017 11:11:18 -0800 Subject: [PATCH 1/2] scorch removed worker goroutines from TermFieldReader() On a couple of micro benchmarks on a dev macbook using bleve-query on an index of 50K wikipedia docs, scorch is now in more the same neighborhood of upsidedown/moss... high-freq term search "text:date"... 400 qps - upsidedown/moss 360 qps - scorch before 404 qps - scorch after zero-freq term search "text:mschoch"... 100K qps - upsidedown/moss 55K qps - scorch before 99K qps - scorch after Of note, the scorch index had ~150 *.zap files in it, which likely made made the worker goroutine overhead more costly than for a case with few segments, where goroutine and channel related work appeared relatively prominently in the pprof SVG's. --- index/scorch/snapshot_index.go | 41 +++++++++------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 5b54669b..c0b50a25 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -341,27 +341,6 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { - results := make(chan *asynchSegmentResult) - for index, segment := range i.segment { - go func(index int, segment *SegmentSnapshot) { - dict, err := segment.Dictionary(field) - if err != nil { - results <- &asynchSegmentResult{err: err} - } else { - pl, err := dict.PostingsList(string(term), nil) - if err != nil { - results <- &asynchSegmentResult{err: err} - } else { - results <- &asynchSegmentResult{ - index: index, - postings: pl, - } - } - } - }(index, segment) - } - - var err error rv := &IndexSnapshotTermFieldReader{ term: term, snapshot: i, @@ -371,17 +350,17 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, includeNorm: includeNorm, includeTermVectors: includeTermVectors, } - for count := 0; count < len(i.segment); count++ { - asr := <-results - if asr.err != nil && err == nil { - err = asr.err - } else { - rv.postings[asr.index] = asr.postings - rv.iterators[asr.index] = asr.postings.Iterator() + for i, segment := range i.segment { + dict, err := segment.Dictionary(field) + if err != nil { + return nil, err } - } - if err != nil { - return nil, err + pl, err := dict.PostingsList(string(term), nil) + if err != nil { + return nil, err + } + rv.postings[i] = pl + rv.iterators[i] = pl.Iterator() } return rv, nil } From 620dcdb6f87dcc78c5c1baec713d141f7c7e604d Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Fri, 15 Dec 2017 11:54:52 -0800 Subject: [PATCH 2/2] scorch uses prealloc'ed buffer for docNumberToBytes() On a couple of micro benchmarks on a dev macbook using bleve-query on an index of 50K wikipedia docs, scorch is now faster than upsidedown/moss on high-freq term search "text:date"... 400 qps - upsidedown/moss 404 qps - scorch before 565 qps - scorch after --- index/scorch/snapshot_index.go | 11 ++++++----- index/scorch/snapshot_index_doc.go | 2 +- index/scorch/snapshot_index_tfr.go | 3 +-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index c0b50a25..7f5f4103 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -365,11 +365,12 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, return rv, nil } -func docNumberToBytes(in uint64) []byte { - - buf := new(bytes.Buffer) - _ = binary.Write(buf, binary.BigEndian, in) - return buf.Bytes() +func docNumberToBytes(buf []byte, in uint64) []byte { + if len(buf) != 8 { + buf = make([]byte, 8) + } + binary.BigEndian.PutUint64(buf, in) + return buf } func docInternalToNumber(in index.IndexInternalID) (uint64, error) { diff --git a/index/scorch/snapshot_index_doc.go b/index/scorch/snapshot_index_doc.go index 4656079b..d1205ff8 100644 --- a/index/scorch/snapshot_index_doc.go +++ b/index/scorch/snapshot_index_doc.go @@ -36,7 +36,7 @@ func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) { next := i.iterators[i.segmentOffset].Next() // make segment number into global number by adding offset globalOffset := i.snapshot.offsets[i.segmentOffset] - return docNumberToBytes(uint64(next) + globalOffset), nil + return docNumberToBytes(nil, uint64(next)+globalOffset), nil } return nil, nil } diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index 1fbabdfb..d6c8dcd1 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -49,8 +49,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // make segment number into global number by adding offset globalOffset := i.snapshot.offsets[i.segmentOffset] nnum := next.Number() - rv.ID = docNumberToBytes(nnum + globalOffset) - + rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset) i.postingToTermFieldDoc(next, rv) i.currID = rv.ID