From 60efecc8e9ac15eadb6a815d0a3f3162667ad15f Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 31 Aug 2016 15:21:44 -0400 Subject: [PATCH] cap preallocation by the collector to reasonable value the collector has optimizations to avoid allocation and reslicing during the common case of searching for top hits however, in some cases users request an a very large number of search hits to be returned (attempting to get them all) this caused unnecessary allocation of ram. to address this we introduce a new constant PreAllocSizeSkipCap it defaults the value of 1000. if your search+skip is less than this constant, you get the optimized behavior. if your search+skip is greater than this, we cap the preallcations to this lower value. additional space is acquired on an as needed basis by growing the DocumentMatchPool and reslicing the collector backing slice applications can change the value of PreAllocSizeSkipCap to suit their own needs fixes #408 --- index_test.go | 65 +++++++++++++++++++++++++++++++++++++++ search/collectors/topn.go | 30 +++++++++++++----- 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/index_test.go b/index_test.go index 5e070366..e9a6ae63 100644 --- a/index_test.go +++ b/index_test.go @@ -13,6 +13,7 @@ import ( "fmt" "io/ioutil" "log" + "math" "os" "reflect" "sort" @@ -1668,3 +1669,67 @@ func TestOpenReadonlyMultiple(t *testing.T) { t.Fatal(err) } } + +// TestBug408 tests for VERY large values of size, even though actual result +// set may be reasonable size +func TestBug408(t *testing.T) { + type TestStruct struct { + ID string `json:"id"` + UserID *string `json:"user_id"` + } + + docMapping := NewDocumentMapping() + docMapping.AddFieldMappingsAt("id", NewTextFieldMapping()) + docMapping.AddFieldMappingsAt("user_id", NewTextFieldMapping()) + + indexMapping := NewIndexMapping() + indexMapping.DefaultMapping = docMapping + + index, err := New("", indexMapping) + if err != nil { + t.Fatal(err) + } + + numToTest := 10 + matchUserID := "match" + noMatchUserID := "no_match" + matchingDocIds := make(map[string]struct{}) + + for i := 0; i < numToTest; i++ { + ds := &TestStruct{"id_" + strconv.Itoa(i), nil} + if i%2 == 0 { + ds.UserID = &noMatchUserID + } else { + ds.UserID = &matchUserID + matchingDocIds[ds.ID] = struct{}{} + } + err = index.Index(ds.ID, ds) + if err != nil { + t.Fatal(err) + } + } + + cnt, err := index.DocCount() + if err != nil { + t.Fatal(err) + } + if int(cnt) != numToTest { + t.Fatalf("expected %d documents in index, got %d", numToTest, cnt) + } + + q := NewTermQuery(matchUserID).SetField("user_id") + searchReq := NewSearchRequestOptions(q, math.MaxInt32, 0, false) + results, err := index.Search(searchReq) + if err != nil { + t.Fatal(err) + } + if int(results.Total) != numToTest/2 { + t.Fatalf("expected %d search hits, got %d", numToTest/2, results.Total) + } + + for _, result := range results.Hits { + if _, found := matchingDocIds[result.ID]; !found { + t.Fatalf("document with ID %s not in results as expected", result.ID) + } + } +} diff --git a/search/collectors/topn.go b/search/collectors/topn.go index 636b7593..3465c0a6 100644 --- a/search/collectors/topn.go +++ b/search/collectors/topn.go @@ -17,6 +17,10 @@ import ( "golang.org/x/net/context" ) +// PreAllocSizeSkipCap will cap preallocation to this amount when +// size+skip exceeds this value +const PreAllocSizeSkipCap = 1000 + type collectorCompare func(i, j *search.DocumentMatch) int type collectorFixup func(d *search.DocumentMatch) error @@ -50,9 +54,16 @@ const CheckDoneEvery = uint64(1024) // ordering hits by the provided sort order func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { hc := &TopNCollector{size: size, skip: skip, sort: sort} - // pre-allocate space on the heap, we need size+skip results - // +1 additional while figuring out which to evict - hc.store = newStoreSlice(size+skip+1, func(i, j *search.DocumentMatch) int { + + // pre-allocate space on the store to avoid reslicing + // unless the size + skip is too large, then cap it + // everything should still work, just reslices as necessary + backingSize := size + skip + 1 + if size+skip > PreAllocSizeSkipCap { + backingSize = PreAllocSizeSkipCap + 1 + } + + hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int { return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) }) @@ -73,12 +84,15 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, var err error var next *search.DocumentMatch - // search context with enough pre-allocated document matches - // we keep references to size+skip ourselves - // plus possibly one extra for the highestMatchOutsideResults - // plus the amount required by the searcher tree + // pre-allocate enough space in the DocumentMatchPool + // unless the size + skip is too large, then cap it + // everything should still work, just allocates DocumentMatches on demand + backingSize := hc.size + hc.skip + 1 + if hc.size+hc.skip > PreAllocSizeSkipCap { + backingSize = PreAllocSizeSkipCap + 1 + } searchContext := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(hc.size+hc.skip+1+searcher.DocumentMatchPoolSize(), len(hc.sort)), + DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), } select {