5aa9e95468
index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
418 lines
9.3 KiB
Go
418 lines
9.3 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
// and limitations under the License.
|
|
|
|
package collectors
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"golang.org/x/net/context"
|
|
|
|
"github.com/blevesearch/bleve/search"
|
|
)
|
|
|
|
func TestTop10Scores(t *testing.T) {
|
|
|
|
// a stub search with more than 10 matches
|
|
// the top-10 scores are > 10
|
|
// everything else is less than 10
|
|
searcher := &stubSearcher{
|
|
matches: []*search.DocumentMatchInternal{
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("a"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("b"),
|
|
Score: 9,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("c"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("d"),
|
|
Score: 9,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("e"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("f"),
|
|
Score: 9,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("g"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("h"),
|
|
Score: 9,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("i"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("j"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("k"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("l"),
|
|
Score: 99,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("m"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("n"),
|
|
Score: 11,
|
|
},
|
|
},
|
|
}
|
|
|
|
collector := NewTopScorerCollector(10)
|
|
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
maxScore := collector.MaxScore()
|
|
if maxScore != 99.0 {
|
|
t.Errorf("expected max score 99.0, got %f", maxScore)
|
|
}
|
|
|
|
total := collector.Total()
|
|
if total != 14 {
|
|
t.Errorf("expected 14 total results, got %d", total)
|
|
}
|
|
|
|
results := collector.Results()
|
|
|
|
if len(results) != 10 {
|
|
t.Fatalf("expected 10 results, got %d", len(results))
|
|
}
|
|
|
|
if results[0].ID != "l" {
|
|
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
|
|
}
|
|
|
|
if results[0].Score != 99.0 {
|
|
t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
|
|
}
|
|
|
|
minScore := 1000.0
|
|
for _, result := range results {
|
|
if result.Score < minScore {
|
|
minScore = result.Score
|
|
}
|
|
}
|
|
|
|
if minScore < 10 {
|
|
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
|
|
}
|
|
}
|
|
|
|
func TestTop10ScoresSkip10(t *testing.T) {
|
|
|
|
// a stub search with more than 10 matches
|
|
// the top-10 scores are > 10
|
|
// everything else is less than 10
|
|
searcher := &stubSearcher{
|
|
matches: []*search.DocumentMatchInternal{
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("a"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("b"),
|
|
Score: 9.5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("c"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("d"),
|
|
Score: 9,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("e"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("f"),
|
|
Score: 9,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("g"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("h"),
|
|
Score: 9,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("i"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("j"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("k"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("l"),
|
|
Score: 99,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("m"),
|
|
Score: 11,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("n"),
|
|
Score: 11,
|
|
},
|
|
},
|
|
}
|
|
|
|
collector := NewTopScorerSkipCollector(10, 10)
|
|
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
maxScore := collector.MaxScore()
|
|
if maxScore != 99.0 {
|
|
t.Errorf("expected max score 99.0, got %f", maxScore)
|
|
}
|
|
|
|
total := collector.Total()
|
|
if total != 14 {
|
|
t.Errorf("expected 14 total results, got %d", total)
|
|
}
|
|
|
|
results := collector.Results()
|
|
|
|
if len(results) != 4 {
|
|
t.Fatalf("expected 4 results, got %d", len(results))
|
|
}
|
|
|
|
if results[0].ID != "b" {
|
|
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
|
|
}
|
|
|
|
if results[0].Score != 9.5 {
|
|
t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score)
|
|
}
|
|
}
|
|
|
|
func TestPaginationSameScores(t *testing.T) {
|
|
|
|
// a stub search with more than 10 matches
|
|
// all documents have the same score
|
|
searcher := &stubSearcher{
|
|
matches: []*search.DocumentMatchInternal{
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("a"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("b"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("c"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("d"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("e"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("f"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("g"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("h"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("i"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("j"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("k"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("l"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("m"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("n"),
|
|
Score: 5,
|
|
},
|
|
},
|
|
}
|
|
|
|
// first get first 5 hits
|
|
collector := NewTopScorerSkipCollector(5, 0)
|
|
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
total := collector.Total()
|
|
if total != 14 {
|
|
t.Errorf("expected 14 total results, got %d", total)
|
|
}
|
|
|
|
results := collector.Results()
|
|
|
|
if len(results) != 5 {
|
|
t.Fatalf("expected 5 results, got %d", len(results))
|
|
}
|
|
|
|
firstResults := make(map[string]struct{})
|
|
for _, hit := range results {
|
|
firstResults[hit.ID] = struct{}{}
|
|
}
|
|
|
|
// a stub search with more than 10 matches
|
|
// all documents have the same score
|
|
searcher = &stubSearcher{
|
|
matches: []*search.DocumentMatchInternal{
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("a"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("b"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("c"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("d"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("e"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("f"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("g"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("h"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("i"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("j"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("k"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("l"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("m"),
|
|
Score: 5,
|
|
},
|
|
&search.DocumentMatchInternal{
|
|
ID: testInternalId("n"),
|
|
Score: 5,
|
|
},
|
|
},
|
|
}
|
|
|
|
// now get next 5 hits
|
|
collector = NewTopScorerSkipCollector(5, 5)
|
|
err = collector.Collect(context.Background(), searcher, &stubReader{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
total = collector.Total()
|
|
if total != 14 {
|
|
t.Errorf("expected 14 total results, got %d", total)
|
|
}
|
|
|
|
results = collector.Results()
|
|
|
|
if len(results) != 5 {
|
|
t.Fatalf("expected 5 results, got %d", len(results))
|
|
}
|
|
|
|
// make sure that none of these hits repeat ones we saw in the top 5
|
|
for _, hit := range results {
|
|
if _, ok := firstResults[hit.ID]; ok {
|
|
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkTop10of100000Scores(b *testing.B) {
|
|
benchHelper(10000, NewTopScorerCollector(10), b)
|
|
}
|
|
|
|
func BenchmarkTop100of100000Scores(b *testing.B) {
|
|
benchHelper(10000, NewTopScorerCollector(100), b)
|
|
}
|
|
|
|
func BenchmarkTop10of1000000Scores(b *testing.B) {
|
|
benchHelper(100000, NewTopScorerCollector(10), b)
|
|
}
|
|
|
|
func BenchmarkTop100of1000000Scores(b *testing.B) {
|
|
benchHelper(100000, NewTopScorerCollector(100), b)
|
|
}
|