diff --git a/index_test.go b/index_test.go index 5e070366..e9a6ae63 100644 --- a/index_test.go +++ b/index_test.go @@ -13,6 +13,7 @@ import ( "fmt" "io/ioutil" "log" + "math" "os" "reflect" "sort" @@ -1668,3 +1669,67 @@ func TestOpenReadonlyMultiple(t *testing.T) { t.Fatal(err) } } + +// TestBug408 tests for VERY large values of size, even though actual result +// set may be reasonable size +func TestBug408(t *testing.T) { + type TestStruct struct { + ID string `json:"id"` + UserID *string `json:"user_id"` + } + + docMapping := NewDocumentMapping() + docMapping.AddFieldMappingsAt("id", NewTextFieldMapping()) + docMapping.AddFieldMappingsAt("user_id", NewTextFieldMapping()) + + indexMapping := NewIndexMapping() + indexMapping.DefaultMapping = docMapping + + index, err := New("", indexMapping) + if err != nil { + t.Fatal(err) + } + + numToTest := 10 + matchUserID := "match" + noMatchUserID := "no_match" + matchingDocIds := make(map[string]struct{}) + + for i := 0; i < numToTest; i++ { + ds := &TestStruct{"id_" + strconv.Itoa(i), nil} + if i%2 == 0 { + ds.UserID = &noMatchUserID + } else { + ds.UserID = &matchUserID + matchingDocIds[ds.ID] = struct{}{} + } + err = index.Index(ds.ID, ds) + if err != nil { + t.Fatal(err) + } + } + + cnt, err := index.DocCount() + if err != nil { + t.Fatal(err) + } + if int(cnt) != numToTest { + t.Fatalf("expected %d documents in index, got %d", numToTest, cnt) + } + + q := NewTermQuery(matchUserID).SetField("user_id") + searchReq := NewSearchRequestOptions(q, math.MaxInt32, 0, false) + results, err := index.Search(searchReq) + if err != nil { + t.Fatal(err) + } + if int(results.Total) != numToTest/2 { + t.Fatalf("expected %d search hits, got %d", numToTest/2, results.Total) + } + + for _, result := range results.Hits { + if _, found := matchingDocIds[result.ID]; !found { + t.Fatalf("document with ID %s not in results as expected", result.ID) + } + } +} diff --git a/search/collectors/topn.go b/search/collectors/topn.go index 636b7593..3465c0a6 100644 --- a/search/collectors/topn.go +++ b/search/collectors/topn.go @@ -17,6 +17,10 @@ import ( "golang.org/x/net/context" ) +// PreAllocSizeSkipCap will cap preallocation to this amount when +// size+skip exceeds this value +const PreAllocSizeSkipCap = 1000 + type collectorCompare func(i, j *search.DocumentMatch) int type collectorFixup func(d *search.DocumentMatch) error @@ -50,9 +54,16 @@ const CheckDoneEvery = uint64(1024) // ordering hits by the provided sort order func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { hc := &TopNCollector{size: size, skip: skip, sort: sort} - // pre-allocate space on the heap, we need size+skip results - // +1 additional while figuring out which to evict - hc.store = newStoreSlice(size+skip+1, func(i, j *search.DocumentMatch) int { + + // pre-allocate space on the store to avoid reslicing + // unless the size + skip is too large, then cap it + // everything should still work, just reslices as necessary + backingSize := size + skip + 1 + if size+skip > PreAllocSizeSkipCap { + backingSize = PreAllocSizeSkipCap + 1 + } + + hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int { return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) }) @@ -73,12 +84,15 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, var err error var next *search.DocumentMatch - // search context with enough pre-allocated document matches - // we keep references to size+skip ourselves - // plus possibly one extra for the highestMatchOutsideResults - // plus the amount required by the searcher tree + // pre-allocate enough space in the DocumentMatchPool + // unless the size + skip is too large, then cap it + // everything should still work, just allocates DocumentMatches on demand + backingSize := hc.size + hc.skip + 1 + if hc.size+hc.skip > PreAllocSizeSkipCap { + backingSize = PreAllocSizeSkipCap + 1 + } searchContext := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(hc.size+hc.skip+1+searcher.DocumentMatchPoolSize(), len(hc.sort)), + DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), } select {