From a4a34cc3b21cc97bd3bb6a500568fdd8757de657 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Thu, 27 Apr 2017 08:52:52 -0400 Subject: [PATCH] topn collector switch approach based on size+skip we now use the slice store when size+skip <= 10 and use the heap store when size+skip > 10 here are the new perf numbers: go test -run=xxx -bench=. -benchmem BenchmarkTop10of0Scores-4 1000000 1150 ns/op 2304 B/op 15 allocs/op BenchmarkTop10of3Scores-4 1000000 1417 ns/op 2304 B/op 18 allocs/op BenchmarkTop10of10Scores-4 1000000 2133 ns/op 2312 B/op 25 allocs/op BenchmarkTop10of25Scores-4 500000 3410 ns/op 2464 B/op 26 allocs/op BenchmarkTop10of50Scores-4 300000 5174 ns/op 2464 B/op 26 allocs/op BenchmarkTop10of10000Scores-4 5000 342955 ns/op 2488 B/op 26 allocs/op BenchmarkTop100of0Scores-4 300000 4796 ns/op 18320 B/op 15 allocs/op BenchmarkTop100of3Scores-4 300000 5160 ns/op 18352 B/op 19 allocs/op BenchmarkTop100of10Scores-4 200000 6354 ns/op 18408 B/op 26 allocs/op BenchmarkTop100of25Scores-4 200000 10023 ns/op 18568 B/op 41 allocs/op BenchmarkTop100of50Scores-4 100000 16821 ns/op 18832 B/op 66 allocs/op BenchmarkTop100of10000Scores-4 3000 508989 ns/op 19760 B/op 117 allocs/op BenchmarkTop1000of10000Scores-4 1000 1814198 ns/op 184768 B/op 1017 allocs/op BenchmarkTop10000of100000Scores-4 50 26623920 ns/op 1939592 B/op 19024 allocs/op BenchmarkTop10of100000Scores-4 500 3730204 ns/op 2496 B/op 26 allocs/op BenchmarkTop100of100000Scores-4 300 4057127 ns/op 19912 B/op 117 allocs/op BenchmarkTop1000of100000Scores-4 200 6390180 ns/op 186200 B/op 1017 allocs/op BenchmarkTop10000of1000000Scores-4 20 82785756 ns/op 1963897 B/op 19024 allocs/op PASS ok github.com/blevesearch/bleve/search/collector 31.537s Previously with heap: go test -run=xxx -bench=. -benchmem BenchmarkTop10of0Scores-4 1000000 1216 ns/op 2288 B/op 15 allocs/op BenchmarkTop10of3Scores-4 1000000 1593 ns/op 2320 B/op 19 allocs/op BenchmarkTop10of10Scores-4 500000 2734 ns/op 2376 B/op 26 allocs/op BenchmarkTop10of25Scores-4 300000 5077 ns/op 2520 B/op 27 allocs/op BenchmarkTop10of50Scores-4 200000 6875 ns/op 2528 B/op 27 allocs/op BenchmarkTop10of10000Scores-4 3000 351210 ns/op 2552 B/op 27 allocs/op BenchmarkTop100of0Scores-4 300000 4846 ns/op 18304 B/op 15 allocs/op BenchmarkTop100of3Scores-4 300000 5357 ns/op 18336 B/op 19 allocs/op BenchmarkTop100of10Scores-4 200000 6462 ns/op 18392 B/op 26 allocs/op BenchmarkTop100of25Scores-4 200000 10012 ns/op 18552 B/op 41 allocs/op BenchmarkTop100of50Scores-4 100000 17089 ns/op 18816 B/op 66 allocs/op BenchmarkTop100of10000Scores-4 3000 528193 ns/op 19744 B/op 117 allocs/op BenchmarkTop1000of10000Scores-4 1000 1859447 ns/op 184752 B/op 1017 allocs/op BenchmarkTop10000of100000Scores-4 50 28005664 ns/op 1939576 B/op 19024 allocs/op BenchmarkTop10of100000Scores-4 300 4120091 ns/op 2560 B/op 27 allocs/op BenchmarkTop100of100000Scores-4 300 4325227 ns/op 19896 B/op 117 allocs/op BenchmarkTop1000of100000Scores-4 200 6799804 ns/op 186184 B/op 1017 allocs/op BenchmarkTop10000of1000000Scores-4 20 88494230 ns/op 1963881 B/op 19024 allocs/op PASS ok github.com/blevesearch/bleve/search/collector 30.198s Previously with slice: go test -run=xxx -bench=. -benchmem BenchmarkTop10of0Scores-4 1000000 1202 ns/op 2288 B/op 15 allocs/op BenchmarkTop10of3Scores-4 1000000 1453 ns/op 2288 B/op 18 allocs/op BenchmarkTop10of10Scores-4 1000000 2162 ns/op 2296 B/op 25 allocs/op BenchmarkTop10of25Scores-4 500000 3420 ns/op 2448 B/op 26 allocs/op BenchmarkTop10of50Scores-4 300000 5336 ns/op 2448 B/op 26 allocs/op BenchmarkTop10of10000Scores-4 5000 356733 ns/op 2472 B/op 26 allocs/op BenchmarkTop100of0Scores-4 300000 4877 ns/op 18304 B/op 15 allocs/op BenchmarkTop100of3Scores-4 300000 5132 ns/op 18304 B/op 18 allocs/op BenchmarkTop100of10Scores-4 200000 5787 ns/op 18312 B/op 25 allocs/op BenchmarkTop100of25Scores-4 200000 8083 ns/op 18344 B/op 40 allocs/op BenchmarkTop100of50Scores-4 100000 14419 ns/op 18400 B/op 65 allocs/op BenchmarkTop100of10000Scores-4 2000 665401 ns/op 18848 B/op 116 allocs/op BenchmarkTop1000of10000Scores-4 100 15417063 ns/op 176560 B/op 1016 allocs/op BenchmarkTop10000of100000Scores-4 1 1860011022 ns/op 1857960 B/op 19023 allocs/op BenchmarkTop10of100000Scores-4 300 4099276 ns/op 2480 B/op 26 allocs/op BenchmarkTop100of100000Scores-4 300 4533645 ns/op 18984 B/op 116 allocs/op BenchmarkTop1000of100000Scores-4 50 30519235 ns/op 178008 B/op 1016 allocs/op BenchmarkTop10000of1000000Scores-4 1 3483977385 ns/op 1882072 B/op 19023 allocs/op PASS ok github.com/blevesearch/bleve/search/collector 31.666s It appears that this sucessfully gets the best of both, in these particular benchmark sizes. --- search/collector/heap.go | 13 +++++++++++-- search/collector/list.go | 15 ++++++++++++--- search/collector/slice.go | 15 ++++++++++++--- search/collector/topn.go | 28 +++++++++++++++++++++------- 4 files changed, 56 insertions(+), 15 deletions(-) diff --git a/search/collector/heap.go b/search/collector/heap.go index 19f3a059..e7fd21c6 100644 --- a/search/collector/heap.go +++ b/search/collector/heap.go @@ -34,11 +34,20 @@ func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap { return rv } -func (c *collectStoreHeap) Add(doc *search.DocumentMatch) { +func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch, + size int) *search.DocumentMatch { + c.add(doc) + if c.Len() > size { + return c.removeLast() + } + return nil +} + +func (c *collectStoreHeap) add(doc *search.DocumentMatch) { heap.Push(c, doc) } -func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch { +func (c *collectStoreHeap) removeLast() *search.DocumentMatch { return heap.Pop(c).(*search.DocumentMatch) } diff --git a/search/collector/list.go b/search/collector/list.go index 51b47b12..ec2f69cb 100644 --- a/search/collector/list.go +++ b/search/collector/list.go @@ -34,7 +34,16 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList { return rv } -func (c *collectStoreList) Add(doc *search.DocumentMatch) { +func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, + size int) *search.DocumentMatch { + c.add(doc) + if c.len() > size { + return c.removeLast() + } + return nil +} + +func (c *collectStoreList) add(doc *search.DocumentMatch) { for e := c.results.Front(); e != nil; e = e.Next() { curr := e.Value.(*search.DocumentMatch) if c.compare(doc, curr) >= 0 { @@ -46,7 +55,7 @@ func (c *collectStoreList) Add(doc *search.DocumentMatch) { c.results.PushBack(doc) } -func (c *collectStoreList) RemoveLast() *search.DocumentMatch { +func (c *collectStoreList) removeLast() *search.DocumentMatch { return c.results.Remove(c.results.Front()).(*search.DocumentMatch) } @@ -73,6 +82,6 @@ func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.Documen return search.DocumentMatchCollection{}, nil } -func (c *collectStoreList) Len() int { +func (c *collectStoreList) len() int { return c.results.Len() } diff --git a/search/collector/slice.go b/search/collector/slice.go index b061643e..32cb8624 100644 --- a/search/collector/slice.go +++ b/search/collector/slice.go @@ -29,7 +29,16 @@ func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice { return rv } -func (c *collectStoreSlice) Add(doc *search.DocumentMatch) { +func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch, + size int) *search.DocumentMatch { + c.add(doc) + if c.len() > size { + return c.removeLast() + } + return nil +} + +func (c *collectStoreSlice) add(doc *search.DocumentMatch) { // find where to insert, starting at end (lowest) i := len(c.slice) for ; i > 0; i-- { @@ -44,7 +53,7 @@ func (c *collectStoreSlice) Add(doc *search.DocumentMatch) { c.slice[i] = doc } -func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch { +func (c *collectStoreSlice) removeLast() *search.DocumentMatch { var rv *search.DocumentMatch rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1] return rv @@ -63,6 +72,6 @@ func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.Docume return search.DocumentMatchCollection{}, nil } -func (c *collectStoreSlice) Len() int { +func (c *collectStoreSlice) len() int { return len(c.slice) } diff --git a/search/collector/topn.go b/search/collector/topn.go index 946ca3ec..2c7c6752 100644 --- a/search/collector/topn.go +++ b/search/collector/topn.go @@ -22,6 +22,15 @@ import ( "golang.org/x/net/context" ) +type collectorStore interface { + // Add the document, and if the new store size exceeds the provided size + // the last element is removed and returned. If the size has not been + // exceeded, nil is returned. + AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch + + Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) +} + // PreAllocSizeSkipCap will cap preallocation to this amount when // size+skip exceeds this value var PreAllocSizeSkipCap = 1000 @@ -41,7 +50,7 @@ type TopNCollector struct { results search.DocumentMatchCollection facetsBuilder *search.FacetsBuilder - store *collectStoreHeap + store collectorStore needDocIds bool neededFields []string @@ -68,9 +77,15 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector backingSize = PreAllocSizeSkipCap + 1 } - hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int { - return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) - }) + if size+skip > 10 { + hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int { + return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) + }) + } else { + hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int { + return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) + }) + } // these lookups traverse an interface, so do once up-front if sort.RequiresDocID() { @@ -184,9 +199,8 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I } } - hc.store.Add(d) - if hc.store.Len() > hc.size+hc.skip { - removed := hc.store.RemoveLast() + removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip) + if removed != nil { if hc.lowestMatchOutsideResults == nil { hc.lowestMatchOutsideResults = removed } else {