0
0
bleve/index/upsidedown/reader_test.go

530 lines
12 KiB
Go
Raw Normal View History

2014-04-17 22:55:53 +02:00
// Copyright (c) 2014 Couchbase, Inc.
2016-10-02 16:13:14 +02:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
2014-04-17 22:55:53 +02:00
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store/boltdb"
2014-04-17 22:55:53 +02:00
)
func TestIndexReader(t *testing.T) {
2015-10-19 20:27:03 +02:00
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
2014-04-17 22:55:53 +02:00
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
2014-04-17 22:55:53 +02:00
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
2014-04-17 22:55:53 +02:00
2014-09-04 00:47:02 +02:00
var expectedCount uint64
2014-04-17 22:55:53 +02:00
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
2014-04-17 22:55:53 +02:00
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
2014-09-04 00:47:02 +02:00
expectedCount++
2014-04-17 22:55:53 +02:00
doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer))
2014-04-17 22:55:53 +02:00
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
2014-09-04 00:47:02 +02:00
expectedCount++
2014-04-17 22:55:53 +02:00
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
2014-12-18 18:43:12 +01:00
// first look for a term that doesn't exist
reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true)
2014-04-17 22:55:53 +02:00
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count := reader.Count()
if count != 0 {
t.Errorf("Expected doc count to be: %d got: %d", 0, count)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
2014-04-17 22:55:53 +02:00
reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
2014-04-17 22:55:53 +02:00
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
expectedCount = 2
count = reader.Count()
if count != expectedCount {
t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count)
}
var match *index.TermFieldDoc
var actualCount uint64
match, err = reader.Next(nil)
2014-04-17 22:55:53 +02:00
for err == nil && match != nil {
match, err = reader.Next(nil)
2014-04-17 22:55:53 +02:00
if err != nil {
t.Errorf("unexpected error reading next")
}
2014-09-04 00:47:02 +02:00
actualCount++
2014-04-17 22:55:53 +02:00
}
if actualCount != count {
t.Errorf("count was 2, but only saw %d", actualCount)
}
expectedMatch := &index.TermFieldDoc{
ID: index.IndexInternalID("2"),
2014-04-17 22:55:53 +02:00
Freq: 1,
Norm: 0.5773502588272095,
Vectors: []*index.TermFieldVector{
2016-04-03 03:54:33 +02:00
{
2014-04-17 22:55:53 +02:00
Field: "desc",
Pos: 3,
Start: 9,
End: 13,
},
},
}
tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true)
2014-04-17 22:55:53 +02:00
if err != nil {
t.Errorf("unexpected error: %v", err)
}
match, err = tfr.Next(nil)
2014-04-17 22:55:53 +02:00
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(expectedMatch, match) {
t.Errorf("got %#v, expected %#v", match, expectedMatch)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
2014-04-20 15:43:02 +02:00
// now test usage of advance
reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
2014-04-20 15:43:02 +02:00
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
match, err = reader.Advance(index.IndexInternalID("2"), nil)
2014-04-20 15:43:02 +02:00
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Fatalf("Expected match, got nil")
}
if !match.ID.Equals(index.IndexInternalID("2")) {
2014-04-20 15:43:02 +02:00
t.Errorf("Expected ID '2', got '%s'", match.ID)
}
match, err = reader.Advance(index.IndexInternalID("3"), nil)
2014-04-20 15:43:02 +02:00
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
2014-04-20 15:43:02 +02:00
2014-04-22 19:57:13 +02:00
// now test creating a reader for a field that doesn't exist
reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true)
2014-04-22 19:57:13 +02:00
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count = reader.Count()
if count != 0 {
2016-10-02 18:11:15 +02:00
t.Errorf("expected count 0 for reader of non-existent field")
2014-04-22 19:57:13 +02:00
}
match, err = reader.Next(nil)
2014-04-22 19:57:13 +02:00
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
match, err = reader.Advance(index.IndexInternalID("anywhere"), nil)
2014-04-22 19:57:13 +02:00
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
2014-04-17 22:55:53 +02:00
}
func TestIndexDocIdReader(t *testing.T) {
2015-10-19 20:27:03 +02:00
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
2014-09-04 00:47:02 +02:00
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
2014-09-04 00:47:02 +02:00
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
2014-09-04 00:47:02 +02:00
expectedCount++
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Error(err)
}
}()
// first get all doc ids
reader, err := indexReader.DocIDReaderAll()
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
id, err := reader.Next()
count := uint64(0)
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
for id != nil {
count++
id, err = reader.Next()
}
if count != expectedCount {
t.Errorf("expected %d, got %d", expectedCount, count)
}
// try it again, but jump to the second doc this time
reader2, err := indexReader.DocIDReaderAll()
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader2.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader2.Advance(index.IndexInternalID("2"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("2")) {
t.Errorf("expected to find id '2', got '%s'", id)
}
id, err = reader2.Advance(index.IndexInternalID("3"))
if err != nil {
t.Error(err)
}
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
if id != nil {
t.Errorf("expected to find id '', got '%s'", id)
}
}
2015-07-13 21:22:54 +02:00
func TestCrashBadBackIndexRow(t *testing.T) {
br, err := NewBackIndexRowKV([]byte{byte('b'), byte('a'), ByteSeparator}, []byte{})
if err != nil {
t.Fatal(err)
}
if string(br.doc) != "a" {
t.Fatal(err)
}
}
func TestIndexDocIdOnlyReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("3")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("5")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("7")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("9")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Error(err)
}
}()
onlyIds := []string{"1", "5", "9"}
reader, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
id, err := reader.Next()
count := uint64(0)
for id != nil {
count++
id, err = reader.Next()
}
if count != 3 {
t.Errorf("expected 3, got %d", count)
}
// try it again, but jump
reader2, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader2.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader2.Advance(index.IndexInternalID("5"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("5")) {
t.Errorf("expected to find id '5', got '%s'", id)
}
id, err = reader2.Advance(index.IndexInternalID("a"))
if err != nil {
t.Error(err)
}
if id != nil {
t.Errorf("expected to find id '', got '%s'", id)
}
// some keys aren't actually there
onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"}
reader3, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader3.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader3.Next()
count = uint64(0)
for id != nil {
count++
id, err = reader3.Next()
}
if count != 1 {
t.Errorf("expected 1, got %d", count)
}
// mix advance and next
onlyIds = []string{"0", "1", "3", "5", "6", "9"}
reader4, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader4.Close()
if err != nil {
t.Error(err)
}
}()
// first key is "1"
id, err = reader4.Next()
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("1")) {
t.Errorf("expected to find id '1', got '%s'", id)
}
// advancing to key we dont have gives next
id, err = reader4.Advance(index.IndexInternalID("2"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("3")) {
t.Errorf("expected to find id '3', got '%s'", id)
}
// next after advance works
id, err = reader4.Next()
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("5")) {
t.Errorf("expected to find id '5', got '%s'", id)
}
// advancing to key we do have works
id, err = reader4.Advance(index.IndexInternalID("9"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("9")) {
t.Errorf("expected to find id '9', got '%s'", id)
}
// advance backwards at end
id, err = reader4.Advance(index.IndexInternalID("4"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("5")) {
t.Errorf("expected to find id '5', got '%s'", id)
}
// next after advance works
id, err = reader4.Next()
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("9")) {
t.Errorf("expected to find id '9', got '%s'", id)
}
// advance backwards to key that exists, but not in only set
id, err = reader4.Advance(index.IndexInternalID("7"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("9")) {
t.Errorf("expected to find id '9', got '%s'", id)
}
}