0
0
Fork 0

query_docid: add DocIDQuery to filter by document identifiers

This commit is contained in:
Patrick Mezard 2015-10-31 17:23:47 +01:00
parent 74c309a7c2
commit ff7234d893
4 changed files with 285 additions and 0 deletions

56
query_docid.go Normal file
View File

@ -0,0 +1,56 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searchers"
)
type docIDQuery struct {
IDs []string `json:"ids"`
BoostVal float64 `json:"boost,omitempty"`
}
// NewDocIDQuery creates a new Query object returning indexed documents among
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
// other queries output.
func NewDocIDQuery(ids []string) *docIDQuery {
return &docIDQuery{
IDs: ids,
BoostVal: 1.0,
}
}
func (q *docIDQuery) Boost() float64 {
return q.BoostVal
}
func (q *docIDQuery) SetBoost(b float64) Query {
q.BoostVal = b
return q
}
func (q *docIDQuery) Field() string {
return ""
}
func (q *docIDQuery) SetField(f string) Query {
return q
}
func (q *docIDQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
return searchers.NewDocIDSearcher(i, q.IDs, q.BoostVal, explain)
}
func (q *docIDQuery) Validate() error {
return nil
}

View File

@ -224,6 +224,10 @@ func TestQueryValidate(t *testing.T) {
2.0),
err: ErrorDisjunctionFewerThanMinClauses,
},
{
query: NewDocIDQuery(nil).SetBoost(25),
err: nil,
},
}
for _, test := range tests {

View File

@ -0,0 +1,94 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
// DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct {
ids []string
current int
scorer *scorers.ConstantScorer
}
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, explain bool) (*DocIDSearcher, error) {
kept := make([]string, len(ids))
copy(kept, ids)
sort.Strings(kept)
if len(ids) > 0 {
idReader, err := indexReader.DocIDReader(kept[0], kept[len(kept)-1])
if err != nil {
return nil, err
}
defer idReader.Close()
j := 0
for _, id := range kept {
doc, err := idReader.Advance(id)
if err != nil {
return nil, err
}
// Non-duplicate match
if doc == id && (j == 0 || kept[j-1] != id) {
kept[j] = id
j++
}
}
kept = kept[:j]
}
scorer := scorers.NewConstantScorer(1.0, boost, explain)
return &DocIDSearcher{
ids: kept,
scorer: scorer,
}, nil
}
func (s *DocIDSearcher) Count() uint64 {
return uint64(len(s.ids))
}
func (s *DocIDSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *DocIDSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *DocIDSearcher) Next() (*search.DocumentMatch, error) {
if s.current >= len(s.ids) {
return nil, nil
}
id := s.ids[s.current]
s.current++
docMatch := s.scorer.Score(id)
return docMatch, nil
}
func (s *DocIDSearcher) Advance(ID string) (*search.DocumentMatch, error) {
s.current = sort.SearchStrings(s.ids, ID)
return s.Next()
}
func (s *DocIDSearcher) Close() error {
return nil
}
func (s *DocIDSearcher) Min() int {
return 0
}

View File

@ -0,0 +1,131 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store/gtreap"
"github.com/blevesearch/bleve/index/upside_down"
)
func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) {
analysisQueue := index.NewAnalysisQueue(1)
i, err := upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
for _, id := range indexed {
err = i.Update(&document.Document{
ID: id,
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
}
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher, err := NewDocIDSearcher(indexReader, searched, 1.0, false)
if err != nil {
t.Fatal(err)
}
defer func() {
err := searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
if searcher.Count() != uint64(len(wanted)) {
t.Fatalf("expected count %v got %v", len(wanted), searcher.Count())
}
// Check the sequence
for i, id := range wanted {
m, err := searcher.Next()
if err != nil {
t.Fatal(err)
}
if id != m.ID {
t.Fatalf("expected %v at position %v, got %v", id, i, m.ID)
}
}
m, err := searcher.Next()
if err != nil {
t.Fatal(err)
}
if m != nil {
t.Fatalf("expected nil past the end of the sequence, got %v", m.ID)
}
// Check seeking
for _, id := range wanted {
if len(id) != 2 {
t.Fatalf("expected identifier must be 2 characters long, got %v", id)
}
before := id[:1]
for _, target := range []string{before, id} {
m, err := searcher.Advance(target)
if err != nil {
t.Fatal(err)
}
if m == nil || m.ID != id {
t.Fatalf("advancing to %v returned %v instead of %v", before, m, id)
}
}
}
// Seek after the end of the sequence
after := "zzz"
m, err = searcher.Advance(after)
if err != nil {
t.Fatal(err)
}
if m != nil {
t.Fatalf("advancing past the end of the sequence should return nil, got %v", m)
}
}
func TestDocIDSearcherEmptySearchEmptyIndex(t *testing.T) {
testDocIDSearcher(t, nil, nil, nil)
}
func TestDocIDSearcherEmptyIndex(t *testing.T) {
testDocIDSearcher(t, nil, []string{"aa", "bb"}, nil)
}
func TestDocIDSearcherEmptySearch(t *testing.T) {
testDocIDSearcher(t, []string{"aa", "bb"}, nil, nil)
}
func TestDocIDSearcherValid(t *testing.T) {
// Test missing, out of order and duplicate inputs
testDocIDSearcher(t, []string{"aa", "bb", "cc"},
[]string{"ee", "bb", "aa", "bb"},
[]string{"aa", "bb"})
}