From c9310b906d6bb591e18fe33ceb8d7ceb30826aaf Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Fri, 26 Aug 2016 11:50:38 -0400 Subject: [PATCH] introduced new collector store impl based on slice counter-intuitively the list impl was faster than the heap the theory was the heap did more comparisons and swapping so even though it benefited from no interface and some cache locality, it was still slower the idea was to just use a raw slice kept in order this avoids the need for interface, but can take same comparison approach as the list it seems to work out: go test -run=xxx -bench=. -benchmem -cpuprofile=cpu.out BenchmarkTop10of100000Scores-4 5000 299959 ns/op 2600 B/op 36 allocs/op BenchmarkTop100of100000Scores-4 2000 601104 ns/op 20720 B/op 216 allocs/op BenchmarkTop10of1000000Scores-4 500 3450196 ns/op 2616 B/op 36 allocs/op BenchmarkTop100of1000000Scores-4 500 3874276 ns/op 20856 B/op 216 allocs/op PASS ok github.com/blevesearch/bleve/search/collectors 7.440s --- search/collectors/slice.go | 63 ++++++++++++++++++++++++++++++++++++++ search/collectors/topn.go | 4 +-- 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 search/collectors/slice.go diff --git a/search/collectors/slice.go b/search/collectors/slice.go new file mode 100644 index 00000000..24eba815 --- /dev/null +++ b/search/collectors/slice.go @@ -0,0 +1,63 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package collectors + +import "github.com/blevesearch/bleve/search" + +type collectStoreSlice struct { + slice search.DocumentMatchCollection + compare collectorCompare +} + +func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice { + rv := &collectStoreSlice{ + slice: make(search.DocumentMatchCollection, 0, cap), + compare: compare, + } + return rv +} + +func (c *collectStoreSlice) Add(doc *search.DocumentMatch) { + // find where to insert, starting at end (lowest) + i := len(c.slice) + for ; i > 0; i-- { + cmp := c.compare(doc, c.slice[i-1]) + if cmp >= 0 { + break + } + } + if i < 0 { + i = 0 + } + // insert at i + c.slice = append(c.slice, nil) + copy(c.slice[i+1:], c.slice[i:]) + c.slice[i] = doc +} + +func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch { + var rv *search.DocumentMatch + rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1] + return rv +} + +func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) { + for i := skip; i < len(c.slice); i++ { + err := fixup(c.slice[i]) + if err != nil { + return nil, err + } + } + return c.slice[skip:], nil +} + +func (c *collectStoreSlice) Len() int { + return len(c.slice) +} diff --git a/search/collectors/topn.go b/search/collectors/topn.go index 15ce77c8..636b7593 100644 --- a/search/collectors/topn.go +++ b/search/collectors/topn.go @@ -32,7 +32,7 @@ type TopNCollector struct { results search.DocumentMatchCollection facetsBuilder *search.FacetsBuilder - store *collectStoreList + store *collectStoreSlice needDocIds bool neededFields []string @@ -52,7 +52,7 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector hc := &TopNCollector{size: size, skip: skip, sort: sort} // pre-allocate space on the heap, we need size+skip results // +1 additional while figuring out which to evict - hc.store = newStoreList(size+skip+1, func(i, j *search.DocumentMatch) int { + hc.store = newStoreSlice(size+skip+1, func(i, j *search.DocumentMatch) int { return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) })