0
0
bleve/index/upside_down/reader_test.go
Marty Schoch 5aa9e95468 major refactor of index/search API
index id's are now opaque (until finally returned to top-level user)
 - the TermFieldDoc's returned by TermFieldReader no longer contain doc id
 - instead they return an opaque IndexInternalID
 - items returned are still in the "natural index order"
 - but that is no longer guaranteed to be "doc id order"
 - correct behavior requires that they all follow the same order
 - but not any particular order

 - new API FinalizeDocID which converts index internal ID's to public string ID

 - APIs used internally which previously took doc id now take IndexInternalID
     - that is DocumentFieldTerms() and DocumentFieldTermsForFields()
 - however, APIs that are used externally do not reflect this change
     - that is Document()

 - DocumentIDReader follows the same changes, but this is less obvious
     - behavior clarified, used to iterate doc ids, BUT NOT in doc id order
     - method STILL available to iterate doc ids in range
     - but again, you won't get them in any meaningful order
     - new method to iterate actual doc ids from list of possible ids
         - this was introduced to make the DocIDSearcher continue working

searchers now work with the new opaque index internal doc ids
 - they return new DocumentMatchInternal (which does not have string ID)
scorerers also work with these opaque index internal doc ids
 - they return DocumentMatchInternal (which does not have string ID)
collectors now also perform a final step of converting the final result
 - they STILL return traditional DocumentMatch (with string ID)
 - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 13:46:18 -04:00

309 lines
7.3 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store/boltdb"
)
func TestIndexReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// first look for a term that doesn't exist
reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count := reader.Count()
if count != 0 {
t.Errorf("Expected doc count to be: %d got: %d", 0, count)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
expectedCount = 2
count = reader.Count()
if count != expectedCount {
t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count)
}
var match *index.TermFieldDoc
var actualCount uint64
match, err = reader.Next(nil)
for err == nil && match != nil {
match, err = reader.Next(nil)
if err != nil {
t.Errorf("unexpected error reading next")
}
actualCount++
}
if actualCount != count {
t.Errorf("count was 2, but only saw %d", actualCount)
}
expectedMatch := &index.TermFieldDoc{
ID: InternalId("2"),
Freq: 1,
Norm: 0.5773502588272095,
Vectors: []*index.TermFieldVector{
{
Field: "desc",
Pos: 3,
Start: 9,
End: 13,
},
},
}
tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
match, err = tfr.Next(nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(expectedMatch, match) {
t.Errorf("got %#v, expected %#v", match, expectedMatch)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now test usage of advance
reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
match, err = reader.Advance(InternalId("2"), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Fatalf("Expected match, got nil")
}
if !match.ID.Equals(InternalId("2")) {
t.Errorf("Expected ID '2', got '%s'", match.ID)
}
match, err = reader.Advance(InternalId("3"), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now test creating a reader for a field that doesn't exist
reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count = reader.Count()
if count != 0 {
t.Errorf("expected count 0 for reader of non-existant field")
}
match, err = reader.Next(nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
match, err = reader.Advance(InternalId("anywhere"), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
}
func TestIndexDocIdReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Error(err)
}
}()
// first get all doc ids
reader, err := indexReader.DocIDReader("", "")
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
id, err := reader.Next()
count := uint64(0)
for id != nil {
count++
id, err = reader.Next()
}
if count != expectedCount {
t.Errorf("expected %d, got %d", expectedCount, count)
}
// try it again, but jump to the second doc this time
reader2, err := indexReader.DocIDReader("", "")
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader2.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader2.Advance(InternalId("2"))
if err != nil {
t.Error(err)
}
if !id.Equals(InternalId("2")) {
t.Errorf("expected to find id '2', got '%s'", id)
}
id, err = reader2.Advance(InternalId("3"))
if err != nil {
t.Error(err)
}
if id != nil {
t.Errorf("expected to find id '', got '%s'", id)
}
}
func TestCrashBadBackIndexRow(t *testing.T) {
br, err := NewBackIndexRowKV([]byte{byte('b'), byte('a'), ByteSeparator}, []byte{})
if err != nil {
t.Fatal(err)
}
if string(br.doc) != "a" {
t.Fatal(err)
}
}