query_docid: add DocIDQuery to filter by document identifiers
This commit is contained in:
parent
74c309a7c2
commit
ff7234d893
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/searchers"
|
||||
)
|
||||
|
||||
type docIDQuery struct {
|
||||
IDs []string `json:"ids"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
// NewDocIDQuery creates a new Query object returning indexed documents among
|
||||
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
|
||||
// other queries output.
|
||||
func NewDocIDQuery(ids []string) *docIDQuery {
|
||||
return &docIDQuery{
|
||||
IDs: ids,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *docIDQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *docIDQuery) SetBoost(b float64) Query {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *docIDQuery) Field() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (q *docIDQuery) SetField(f string) Query {
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *docIDQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
|
||||
return searchers.NewDocIDSearcher(i, q.IDs, q.BoostVal, explain)
|
||||
}
|
||||
|
||||
func (q *docIDQuery) Validate() error {
|
||||
return nil
|
||||
}
|
|
@ -224,6 +224,10 @@ func TestQueryValidate(t *testing.T) {
|
|||
2.0),
|
||||
err: ErrorDisjunctionFewerThanMinClauses,
|
||||
},
|
||||
{
|
||||
query: NewDocIDQuery(nil).SetBoost(25),
|
||||
err: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package searchers
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/scorers"
|
||||
)
|
||||
|
||||
// DocIDSearcher returns documents matching a predefined set of identifiers.
|
||||
type DocIDSearcher struct {
|
||||
ids []string
|
||||
current int
|
||||
scorer *scorers.ConstantScorer
|
||||
}
|
||||
|
||||
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, explain bool) (*DocIDSearcher, error) {
|
||||
kept := make([]string, len(ids))
|
||||
copy(kept, ids)
|
||||
sort.Strings(kept)
|
||||
|
||||
if len(ids) > 0 {
|
||||
idReader, err := indexReader.DocIDReader(kept[0], kept[len(kept)-1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer idReader.Close()
|
||||
j := 0
|
||||
for _, id := range kept {
|
||||
doc, err := idReader.Advance(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Non-duplicate match
|
||||
if doc == id && (j == 0 || kept[j-1] != id) {
|
||||
kept[j] = id
|
||||
j++
|
||||
}
|
||||
}
|
||||
kept = kept[:j]
|
||||
}
|
||||
|
||||
scorer := scorers.NewConstantScorer(1.0, boost, explain)
|
||||
return &DocIDSearcher{
|
||||
ids: kept,
|
||||
scorer: scorer,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Count() uint64 {
|
||||
return uint64(len(s.ids))
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Weight() float64 {
|
||||
return s.scorer.Weight()
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) SetQueryNorm(qnorm float64) {
|
||||
s.scorer.SetQueryNorm(qnorm)
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Next() (*search.DocumentMatch, error) {
|
||||
if s.current >= len(s.ids) {
|
||||
return nil, nil
|
||||
}
|
||||
id := s.ids[s.current]
|
||||
s.current++
|
||||
docMatch := s.scorer.Score(id)
|
||||
return docMatch, nil
|
||||
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Advance(ID string) (*search.DocumentMatch, error) {
|
||||
s.current = sort.SearchStrings(s.ids, ID)
|
||||
return s.Next()
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DocIDSearcher) Min() int {
|
||||
return 0
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package searchers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
"github.com/blevesearch/bleve/index/upside_down"
|
||||
)
|
||||
|
||||
func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) {
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
i, err := upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = i.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, id := range indexed {
|
||||
err = i.Update(&document.Document{
|
||||
ID: id,
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
indexReader, err := i.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
searcher, err := NewDocIDSearcher(indexReader, searched, 1.0, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := searcher.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
if searcher.Count() != uint64(len(wanted)) {
|
||||
t.Fatalf("expected count %v got %v", len(wanted), searcher.Count())
|
||||
}
|
||||
|
||||
// Check the sequence
|
||||
for i, id := range wanted {
|
||||
m, err := searcher.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if id != m.ID {
|
||||
t.Fatalf("expected %v at position %v, got %v", id, i, m.ID)
|
||||
}
|
||||
}
|
||||
m, err := searcher.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if m != nil {
|
||||
t.Fatalf("expected nil past the end of the sequence, got %v", m.ID)
|
||||
}
|
||||
|
||||
// Check seeking
|
||||
for _, id := range wanted {
|
||||
if len(id) != 2 {
|
||||
t.Fatalf("expected identifier must be 2 characters long, got %v", id)
|
||||
}
|
||||
before := id[:1]
|
||||
for _, target := range []string{before, id} {
|
||||
m, err := searcher.Advance(target)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if m == nil || m.ID != id {
|
||||
t.Fatalf("advancing to %v returned %v instead of %v", before, m, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Seek after the end of the sequence
|
||||
after := "zzz"
|
||||
m, err = searcher.Advance(after)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if m != nil {
|
||||
t.Fatalf("advancing past the end of the sequence should return nil, got %v", m)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDocIDSearcherEmptySearchEmptyIndex(t *testing.T) {
|
||||
testDocIDSearcher(t, nil, nil, nil)
|
||||
}
|
||||
|
||||
func TestDocIDSearcherEmptyIndex(t *testing.T) {
|
||||
testDocIDSearcher(t, nil, []string{"aa", "bb"}, nil)
|
||||
}
|
||||
|
||||
func TestDocIDSearcherEmptySearch(t *testing.T) {
|
||||
testDocIDSearcher(t, []string{"aa", "bb"}, nil, nil)
|
||||
}
|
||||
|
||||
func TestDocIDSearcherValid(t *testing.T) {
|
||||
// Test missing, out of order and duplicate inputs
|
||||
testDocIDSearcher(t, []string{"aa", "bb", "cc"},
|
||||
[]string{"ee", "bb", "aa", "bb"},
|
||||
[]string{"aa", "bb"})
|
||||
}
|
Loading…
Reference in New Issue