0
0
Fork 0

cap preallocation by the collector to reasonable value

the collector has optimizations to avoid allocation and reslicing
during the common case of searching for top hits

however, in some cases users request an a very large number of
search hits to be returned (attempting to get them all)  this
caused unnecessary allocation of ram.

to address this we introduce a new constant PreAllocSizeSkipCap
it defaults the value of 1000.  if your search+skip is less than
this constant, you get the optimized behavior.  if your
search+skip is greater than this, we cap the preallcations to
this lower value.  additional space is acquired on an as needed
basis by growing the DocumentMatchPool and reslicing the
collector backing slice

applications can change the value of PreAllocSizeSkipCap to suit
their own needs

fixes #408
This commit is contained in:
Marty Schoch 2016-08-31 15:21:44 -04:00
parent a771e344ae
commit 60efecc8e9
2 changed files with 87 additions and 8 deletions

View File

@ -13,6 +13,7 @@ import (
"fmt"
"io/ioutil"
"log"
"math"
"os"
"reflect"
"sort"
@ -1668,3 +1669,67 @@ func TestOpenReadonlyMultiple(t *testing.T) {
t.Fatal(err)
}
}
// TestBug408 tests for VERY large values of size, even though actual result
// set may be reasonable size
func TestBug408(t *testing.T) {
type TestStruct struct {
ID string `json:"id"`
UserID *string `json:"user_id"`
}
docMapping := NewDocumentMapping()
docMapping.AddFieldMappingsAt("id", NewTextFieldMapping())
docMapping.AddFieldMappingsAt("user_id", NewTextFieldMapping())
indexMapping := NewIndexMapping()
indexMapping.DefaultMapping = docMapping
index, err := New("", indexMapping)
if err != nil {
t.Fatal(err)
}
numToTest := 10
matchUserID := "match"
noMatchUserID := "no_match"
matchingDocIds := make(map[string]struct{})
for i := 0; i < numToTest; i++ {
ds := &TestStruct{"id_" + strconv.Itoa(i), nil}
if i%2 == 0 {
ds.UserID = &noMatchUserID
} else {
ds.UserID = &matchUserID
matchingDocIds[ds.ID] = struct{}{}
}
err = index.Index(ds.ID, ds)
if err != nil {
t.Fatal(err)
}
}
cnt, err := index.DocCount()
if err != nil {
t.Fatal(err)
}
if int(cnt) != numToTest {
t.Fatalf("expected %d documents in index, got %d", numToTest, cnt)
}
q := NewTermQuery(matchUserID).SetField("user_id")
searchReq := NewSearchRequestOptions(q, math.MaxInt32, 0, false)
results, err := index.Search(searchReq)
if err != nil {
t.Fatal(err)
}
if int(results.Total) != numToTest/2 {
t.Fatalf("expected %d search hits, got %d", numToTest/2, results.Total)
}
for _, result := range results.Hits {
if _, found := matchingDocIds[result.ID]; !found {
t.Fatalf("document with ID %s not in results as expected", result.ID)
}
}
}

View File

@ -17,6 +17,10 @@ import (
"golang.org/x/net/context"
)
// PreAllocSizeSkipCap will cap preallocation to this amount when
// size+skip exceeds this value
const PreAllocSizeSkipCap = 1000
type collectorCompare func(i, j *search.DocumentMatch) int
type collectorFixup func(d *search.DocumentMatch) error
@ -50,9 +54,16 @@ const CheckDoneEvery = uint64(1024)
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the heap, we need size+skip results
// +1 additional while figuring out which to evict
hc.store = newStoreSlice(size+skip+1, func(i, j *search.DocumentMatch) int {
// pre-allocate space on the store to avoid reslicing
// unless the size + skip is too large, then cap it
// everything should still work, just reslices as necessary
backingSize := size + skip + 1
if size+skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
@ -73,12 +84,15 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
var err error
var next *search.DocumentMatch
// search context with enough pre-allocated document matches
// we keep references to size+skip ourselves
// plus possibly one extra for the highestMatchOutsideResults
// plus the amount required by the searcher tree
// pre-allocate enough space in the DocumentMatchPool
// unless the size + skip is too large, then cap it
// everything should still work, just allocates DocumentMatches on demand
backingSize := hc.size + hc.skip + 1
if hc.size+hc.skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(hc.size+hc.skip+1+searcher.DocumentMatchPoolSize(), len(hc.sort)),
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
}
select {