0
0
bleve/search/searchers/search_regexp_test.go

112 lines
2.7 KiB
Go
Raw Normal View History

// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"regexp"
"testing"
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
"github.com/blevesearch/bleve/index/upside_down"
"github.com/blevesearch/bleve/search"
)
func TestRegexpSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
pattern, err := regexp.Compile("ma.*")
if err != nil {
t.Fatal(err)
}
regexpSearcher, err := NewRegexpSearcher(twoDocIndexReader, pattern, "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
patternCo, err := regexp.Compile("co.*")
if err != nil {
t.Fatal(err)
}
regexpSearcherCo, err := NewRegexpSearcher(twoDocIndexReader, patternCo, "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
results []*search.DocumentMatchInternal
}{
{
searcher: regexpSearcher,
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
results: []*search.DocumentMatchInternal{
2016-04-03 03:54:33 +02:00
{
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
ID: upside_down.InternalId("1"),
Score: 1.916290731874155,
},
},
},
{
searcher: regexpSearcherCo,
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
results: []*search.DocumentMatchInternal{
2016-04-03 03:54:33 +02:00
{
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
ID: upside_down.InternalId("2"),
Score: 0.33875554280828685,
},
2016-04-03 03:54:33 +02:00
{
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
ID: upside_down.InternalId("3"),
Score: 0.33875554280828685,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next(nil)
i := 0
for err == nil && next != nil {
if i < len(test.results) {
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
if !next.ID.Equals(test.results[i].ID) {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if next.Score != test.results[i].Score {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next(nil)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}