diff --git a/query_docid.go b/query_docid.go new file mode 100644 index 00000000..dce12bd4 --- /dev/null +++ b/query_docid.go @@ -0,0 +1,56 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package bleve + +import ( + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/searchers" +) + +type docIDQuery struct { + IDs []string `json:"ids"` + BoostVal float64 `json:"boost,omitempty"` +} + +// NewDocIDQuery creates a new Query object returning indexed documents among +// the specified set. Combine it with ConjunctionQuery to restrict the scope of +// other queries output. +func NewDocIDQuery(ids []string) *docIDQuery { + return &docIDQuery{ + IDs: ids, + BoostVal: 1.0, + } +} + +func (q *docIDQuery) Boost() float64 { + return q.BoostVal +} + +func (q *docIDQuery) SetBoost(b float64) Query { + q.BoostVal = b + return q +} + +func (q *docIDQuery) Field() string { + return "" +} + +func (q *docIDQuery) SetField(f string) Query { + return q +} + +func (q *docIDQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) { + return searchers.NewDocIDSearcher(i, q.IDs, q.BoostVal, explain) +} + +func (q *docIDQuery) Validate() error { + return nil +} diff --git a/query_test.go b/query_test.go index 82d1f3b8..ecd59ef0 100644 --- a/query_test.go +++ b/query_test.go @@ -224,6 +224,10 @@ func TestQueryValidate(t *testing.T) { 2.0), err: ErrorDisjunctionFewerThanMinClauses, }, + { + query: NewDocIDQuery(nil).SetBoost(25), + err: nil, + }, } for _, test := range tests { diff --git a/search/searchers/search_docid.go b/search/searchers/search_docid.go new file mode 100644 index 00000000..210dc29c --- /dev/null +++ b/search/searchers/search_docid.go @@ -0,0 +1,94 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package searchers + +import ( + "sort" + + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/scorers" +) + +// DocIDSearcher returns documents matching a predefined set of identifiers. +type DocIDSearcher struct { + ids []string + current int + scorer *scorers.ConstantScorer +} + +func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, explain bool) (*DocIDSearcher, error) { + kept := make([]string, len(ids)) + copy(kept, ids) + sort.Strings(kept) + + if len(ids) > 0 { + idReader, err := indexReader.DocIDReader(kept[0], kept[len(kept)-1]) + if err != nil { + return nil, err + } + defer idReader.Close() + j := 0 + for _, id := range kept { + doc, err := idReader.Advance(id) + if err != nil { + return nil, err + } + // Non-duplicate match + if doc == id && (j == 0 || kept[j-1] != id) { + kept[j] = id + j++ + } + } + kept = kept[:j] + } + + scorer := scorers.NewConstantScorer(1.0, boost, explain) + return &DocIDSearcher{ + ids: kept, + scorer: scorer, + }, nil +} + +func (s *DocIDSearcher) Count() uint64 { + return uint64(len(s.ids)) +} + +func (s *DocIDSearcher) Weight() float64 { + return s.scorer.Weight() +} + +func (s *DocIDSearcher) SetQueryNorm(qnorm float64) { + s.scorer.SetQueryNorm(qnorm) +} + +func (s *DocIDSearcher) Next() (*search.DocumentMatch, error) { + if s.current >= len(s.ids) { + return nil, nil + } + id := s.ids[s.current] + s.current++ + docMatch := s.scorer.Score(id) + return docMatch, nil + +} + +func (s *DocIDSearcher) Advance(ID string) (*search.DocumentMatch, error) { + s.current = sort.SearchStrings(s.ids, ID) + return s.Next() +} + +func (s *DocIDSearcher) Close() error { + return nil +} + +func (s *DocIDSearcher) Min() int { + return 0 +} diff --git a/search/searchers/search_docid_test.go b/search/searchers/search_docid_test.go new file mode 100644 index 00000000..67b878a5 --- /dev/null +++ b/search/searchers/search_docid_test.go @@ -0,0 +1,131 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package searchers + +import ( + "testing" + + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/store/gtreap" + "github.com/blevesearch/bleve/index/upside_down" +) + +func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { + analysisQueue := index.NewAnalysisQueue(1) + i, err := upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = i.Open() + if err != nil { + t.Fatal(err) + } + for _, id := range indexed { + err = i.Update(&document.Document{ + ID: id, + Fields: []document.Field{ + document.NewTextField("desc", []uint64{}, []byte("beer")), + }, + }) + if err != nil { + t.Fatal(err) + } + } + + indexReader, err := i.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err := indexReader.Close() + if err != nil { + t.Fatal(err) + } + }() + + searcher, err := NewDocIDSearcher(indexReader, searched, 1.0, false) + if err != nil { + t.Fatal(err) + } + defer func() { + err := searcher.Close() + if err != nil { + t.Fatal(err) + } + }() + + if searcher.Count() != uint64(len(wanted)) { + t.Fatalf("expected count %v got %v", len(wanted), searcher.Count()) + } + + // Check the sequence + for i, id := range wanted { + m, err := searcher.Next() + if err != nil { + t.Fatal(err) + } + if id != m.ID { + t.Fatalf("expected %v at position %v, got %v", id, i, m.ID) + } + } + m, err := searcher.Next() + if err != nil { + t.Fatal(err) + } + if m != nil { + t.Fatalf("expected nil past the end of the sequence, got %v", m.ID) + } + + // Check seeking + for _, id := range wanted { + if len(id) != 2 { + t.Fatalf("expected identifier must be 2 characters long, got %v", id) + } + before := id[:1] + for _, target := range []string{before, id} { + m, err := searcher.Advance(target) + if err != nil { + t.Fatal(err) + } + if m == nil || m.ID != id { + t.Fatalf("advancing to %v returned %v instead of %v", before, m, id) + } + } + } + // Seek after the end of the sequence + after := "zzz" + m, err = searcher.Advance(after) + if err != nil { + t.Fatal(err) + } + if m != nil { + t.Fatalf("advancing past the end of the sequence should return nil, got %v", m) + } +} + +func TestDocIDSearcherEmptySearchEmptyIndex(t *testing.T) { + testDocIDSearcher(t, nil, nil, nil) +} + +func TestDocIDSearcherEmptyIndex(t *testing.T) { + testDocIDSearcher(t, nil, []string{"aa", "bb"}, nil) +} + +func TestDocIDSearcherEmptySearch(t *testing.T) { + testDocIDSearcher(t, []string{"aa", "bb"}, nil, nil) +} + +func TestDocIDSearcherValid(t *testing.T) { + // Test missing, out of order and duplicate inputs + testDocIDSearcher(t, []string{"aa", "bb", "cc"}, + []string{"ee", "bb", "aa", "bb"}, + []string{"aa", "bb"}) +}