bleve/search/searchers/search_phrase_test.go

//  Copyright (c) 2013 Couchbase, Inc.
//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
//  except in compliance with the License. You may obtain a copy of the License at
//    http://www.apache.org/licenses/LICENSE-2.0
//  Unless required by applicable law or agreed to in writing, software distributed under the
//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
//  either express or implied. See the License for the specific language governing permissions
//  and limitations under the License.

package searchers

import (
	"testing"

	"github.com/blevesearch/bleve/index"
	"github.com/blevesearch/bleve/search"
)

func TestPhraseSearch(t *testing.T) {

	twoDocIndexReader, err := twoDocIndex.Reader()
	if err != nil {
		t.Error(err)
	}
	defer func() {
		err := twoDocIndexReader.Close()
		if err != nil {
			t.Fatal(err)
		}
	}()

	angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true)
	if err != nil {
		t.Fatal(err)
	}
	beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
	if err != nil {
		t.Fatal(err)
	}
	mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher}, true)
	if err != nil {
		t.Fatal(err)
	}
	phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, mustSearcher, []string{"angst", "beer"})
	if err != nil {
		t.Fatal(err)
	}

	tests := []struct {
		searcher search.Searcher
		results  []*search.DocumentMatchInternal
	}{
		{
			searcher: phraseSearcher,
			results: []*search.DocumentMatchInternal{
				{
					ID:    index.IndexInternalID("2"),
					Score: 1.0807601687084403,
				},
			},
		},
	}

	for testIndex, test := range tests {
		defer func() {
			err := test.searcher.Close()
			if err != nil {
				t.Fatal(err)
			}
		}()

		next, err := test.searcher.Next(nil)
		i := 0
		for err == nil && next != nil {
			if i < len(test.results) {
				if !next.ID.Equals(test.results[i].ID) {
					t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
				}
				if next.Score != test.results[i].Score {
					t.Errorf("expected result %d to have score %v got  %v for test %d", i, test.results[i].Score, next.Score, testIndex)
					t.Logf("scoring explanation: %s", next.Expl)
				}
			}
			next, err = test.searcher.Next(nil)
			i++
		}
		if err != nil {
			t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
		}
		if len(test.results) != i {
			t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
		}
	}
}
added phrase search 2014-07-03 20:54:50 +02:00			`// Copyright (c) 2013 Couchbase, Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file`
			`// except in compliance with the License. You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software distributed under the`
			`// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,`
			`// either express or implied. See the License for the specific language governing permissions`
			`// and limitations under the License.`
add newline between license and package this avoids cluttering godocs with the license 2014-09-02 16:54:50 +02:00
major refactor of search package this started initially to relocate highlighting into a self contained package, which would then also use the registry however, it turned into a much larger refactor in order to avoid cyclic imports now facets, searchers, scorers and collectors are also broken out into subpackages of search 2014-09-01 17:15:38 +02:00			`package searchers`
added phrase search 2014-07-03 20:54:50 +02:00
			`import (`
			`"testing"`
major refactor of search package this started initially to relocate highlighting into a self contained package, which would then also use the registry however, it turned into a much larger refactor in order to avoid cyclic imports now facets, searchers, scorers and collectors are also broken out into subpackages of search 2014-09-01 17:15:38 +02:00
changed approach IndexInternalID is now []byte this is still opaque, and should still work for any future index implementations as it is a least common denominator choice, all implementations must internally represent the id as []byte at some point for storage to disk 2016-08-01 20:26:50 +02:00			`"github.com/blevesearch/bleve/index"`
major refactor of search package this started initially to relocate highlighting into a self contained package, which would then also use the registry however, it turned into a much larger refactor in order to avoid cyclic imports now facets, searchers, scorers and collectors are also broken out into subpackages of search 2014-09-01 17:15:38 +02:00			`"github.com/blevesearch/bleve/search"`
added phrase search 2014-07-03 20:54:50 +02:00			`)`

			`func TestPhraseSearch(t *testing.T) {`

refactored API a bit more things can return error now in a couple of places we had to swallow errors because they didn't fit the existing API. in these case and proactively in a few others we now return error as well. also the batch API has been updated to allow performing set/delete internal within the batch 2014-10-31 14:40:23 +01:00			`twoDocIndexReader, err := twoDocIndex.Reader()`
			`if err != nil {`
			`t.Error(err)`
			`}`
fix errors identified by errcheck part of #169 2015-04-08 00:05:41 +02:00			`defer func() {`
			`err := twoDocIndexReader.Close()`
			`if err != nil {`
			`t.Fatal(err)`
			`}`
			`}()`
major refactor of kvstore/index internals, see below In the index/store package introduce KVReader creates snapshot all read operations consistent from this snapshot must close to release introduce KVWriter only one writer active access to all operations allows for consisten read-modify-write must close to release introduce AssociativeMerge operation on batch allows efficient read-modify-write for associative operations used to consolidate updates to the term summary rows saves 1 set and 1 get op per shared instance of term in field In the index package introduced an IndexReader exposes a consisten snapshot of the index for searching At top level All searches now operate on a consisten snapshot of the index 2014-09-12 23:21:35 +02:00
			`angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true)`
major refactor, apologies for the large commit removed analyzers (these are now built as needed through config) removed html chacter filter (now built as needed through config) added missing license header changed constructor signature of filters that cannot return errors filter constructors that can have errors, now have Must variant which panics change cdl2 tokenizer into filter (should only see lower-case input) new top level index api, closes #5 refactored index tests to not rely directly on analyzers moved query objects to top-level new top level search api, closes #12 top score collector allows skipping results index mapping supports _all by default, closes #3 and closes #6 index mapping supports disabled sections, closes #7 new http sub package with reusable http.Handler's, closes #22 2014-07-30 18:30:38 +02:00			`if err != nil {`
			`t.Fatal(err)`
			`}`
major refactor of kvstore/index internals, see below In the index/store package introduce KVReader creates snapshot all read operations consistent from this snapshot must close to release introduce KVWriter only one writer active access to all operations allows for consisten read-modify-write must close to release introduce AssociativeMerge operation on batch allows efficient read-modify-write for associative operations used to consolidate updates to the term summary rows saves 1 set and 1 get op per shared instance of term in field In the index package introduced an IndexReader exposes a consisten snapshot of the index for searching At top level All searches now operate on a consisten snapshot of the index 2014-09-12 23:21:35 +02:00			`beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)`
major refactor, apologies for the large commit removed analyzers (these are now built as needed through config) removed html chacter filter (now built as needed through config) added missing license header changed constructor signature of filters that cannot return errors filter constructors that can have errors, now have Must variant which panics change cdl2 tokenizer into filter (should only see lower-case input) new top level index api, closes #5 refactored index tests to not rely directly on analyzers moved query objects to top-level new top level search api, closes #12 top score collector allows skipping results index mapping supports _all by default, closes #3 and closes #6 index mapping supports disabled sections, closes #7 new http sub package with reusable http.Handler's, closes #22 2014-07-30 18:30:38 +02:00			`if err != nil {`
			`t.Fatal(err)`
			`}`
major refactor of kvstore/index internals, see below In the index/store package introduce KVReader creates snapshot all read operations consistent from this snapshot must close to release introduce KVWriter only one writer active access to all operations allows for consisten read-modify-write must close to release introduce AssociativeMerge operation on batch allows efficient read-modify-write for associative operations used to consolidate updates to the term summary rows saves 1 set and 1 get op per shared instance of term in field In the index package introduced an IndexReader exposes a consisten snapshot of the index for searching At top level All searches now operate on a consisten snapshot of the index 2014-09-12 23:21:35 +02:00			`mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher}, true)`
major refactor, apologies for the large commit removed analyzers (these are now built as needed through config) removed html chacter filter (now built as needed through config) added missing license header changed constructor signature of filters that cannot return errors filter constructors that can have errors, now have Must variant which panics change cdl2 tokenizer into filter (should only see lower-case input) new top level index api, closes #5 refactored index tests to not rely directly on analyzers moved query objects to top-level new top level search api, closes #12 top score collector allows skipping results index mapping supports _all by default, closes #3 and closes #6 index mapping supports disabled sections, closes #7 new http sub package with reusable http.Handler's, closes #22 2014-07-30 18:30:38 +02:00			`if err != nil {`
			`t.Fatal(err)`
			`}`
major refactor of kvstore/index internals, see below In the index/store package introduce KVReader creates snapshot all read operations consistent from this snapshot must close to release introduce KVWriter only one writer active access to all operations allows for consisten read-modify-write must close to release introduce AssociativeMerge operation on batch allows efficient read-modify-write for associative operations used to consolidate updates to the term summary rows saves 1 set and 1 get op per shared instance of term in field In the index package introduced an IndexReader exposes a consisten snapshot of the index for searching At top level All searches now operate on a consisten snapshot of the index 2014-09-12 23:21:35 +02:00			`phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, mustSearcher, []string{"angst", "beer"})`
major refactor, apologies for the large commit removed analyzers (these are now built as needed through config) removed html chacter filter (now built as needed through config) added missing license header changed constructor signature of filters that cannot return errors filter constructors that can have errors, now have Must variant which panics change cdl2 tokenizer into filter (should only see lower-case input) new top level index api, closes #5 refactored index tests to not rely directly on analyzers moved query objects to top-level new top level search api, closes #12 top score collector allows skipping results index mapping supports _all by default, closes #3 and closes #6 index mapping supports disabled sections, closes #7 new http sub package with reusable http.Handler's, closes #22 2014-07-30 18:30:38 +02:00			`if err != nil {`
			`t.Fatal(err)`
			`}`

added phrase search 2014-07-03 20:54:50 +02:00			`tests := []struct {`
major refactor of search package this started initially to relocate highlighting into a self contained package, which would then also use the registry however, it turned into a much larger refactor in order to avoid cyclic imports now facets, searchers, scorers and collectors are also broken out into subpackages of search 2014-09-01 17:15:38 +02:00			`searcher search.Searcher`
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion) 2016-07-31 19:46:18 +02:00			`results []*search.DocumentMatchInternal`
added phrase search 2014-07-03 20:54:50 +02:00			`}{`
			`{`
major refactor, apologies for the large commit removed analyzers (these are now built as needed through config) removed html chacter filter (now built as needed through config) added missing license header changed constructor signature of filters that cannot return errors filter constructors that can have errors, now have Must variant which panics change cdl2 tokenizer into filter (should only see lower-case input) new top level index api, closes #5 refactored index tests to not rely directly on analyzers moved query objects to top-level new top level search api, closes #12 top score collector allows skipping results index mapping supports _all by default, closes #3 and closes #6 index mapping supports disabled sections, closes #7 new http sub package with reusable http.Handler's, closes #22 2014-07-30 18:30:38 +02:00			`searcher: phraseSearcher,`
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion) 2016-07-31 19:46:18 +02:00			`results: []*search.DocumentMatchInternal{`
gofmt simplifications 2016-04-03 03:54:33 +02:00			`{`
changed approach IndexInternalID is now []byte this is still opaque, and should still work for any future index implementations as it is a least common denominator choice, all implementations must internally represent the id as []byte at some point for storage to disk 2016-08-01 20:26:50 +02:00			`ID: index.IndexInternalID("2"),`
added phrase search 2014-07-03 20:54:50 +02:00			`Score: 1.0807601687084403,`
			`},`
			`},`
			`},`
			`}`

			`for testIndex, test := range tests {`
fix errors identified by errcheck part of #169 2015-04-08 00:05:41 +02:00			`defer func() {`
			`err := test.searcher.Close()`
			`if err != nil {`
			`t.Fatal(err)`
			`}`
			`}()`
added phrase search 2014-07-03 20:54:50 +02:00
optimize upside_down reader Next() with doc match reuse This optimization changes the search.Search.Next() interface API, adding an optional, pre-allocated DocumentMatch parameter. When it's non-nil, the TermSearcher and TermQueryScorer will use that pre-allocated DocumentMatch, instead of allocating a brand new DocumentMatch instance. 2016-07-21 01:29:20 +02:00			`next, err := test.searcher.Next(nil)`
added phrase search 2014-07-03 20:54:50 +02:00			`i := 0`
			`for err == nil && next != nil {`
			`if i < len(test.results) {`
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion) 2016-07-31 19:46:18 +02:00			`if !next.ID.Equals(test.results[i].ID) {`
added phrase search 2014-07-03 20:54:50 +02:00			`t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)`
			`}`
			`if next.Score != test.results[i].Score {`
			`t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)`
			`t.Logf("scoring explanation: %s", next.Expl)`
			`}`
			`}`
optimize upside_down reader Next() with doc match reuse This optimization changes the search.Search.Next() interface API, adding an optional, pre-allocated DocumentMatch parameter. When it's non-nil, the TermSearcher and TermQueryScorer will use that pre-allocated DocumentMatch, instead of allocating a brand new DocumentMatch instance. 2016-07-21 01:29:20 +02:00			`next, err = test.searcher.Next(nil)`
added phrase search 2014-07-03 20:54:50 +02:00			`i++`
			`}`
			`if err != nil {`
			`t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)`
			`}`
			`if len(test.results) != i {`
			`t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)`
			`}`
			`}`
			`}`