added phrase search
This commit is contained in:
parent
ef690e0ead
commit
cc77b074fe
|
@ -28,32 +28,33 @@ func init() {
|
|||
}
|
||||
|
||||
// sets up some mock data used in many tests in this package
|
||||
var twoDocIndexDescIndexingOptions = document.DEFAULT_TEXT_INDEXING_OPTIONS | document.INCLUDE_TERM_VECTORS
|
||||
|
||||
var twoDocIndexDocs = []*document.Document{
|
||||
// must have 4/4 beer
|
||||
document.NewDocument("1").
|
||||
AddField(document.NewTextField("name", []byte("marty"))).
|
||||
AddField(document.NewTextField("desc", []byte("beer beer beer beer"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("street", []byte("couchbase way"))),
|
||||
// must have 1/4 beer
|
||||
document.NewDocument("2").
|
||||
AddField(document.NewTextField("name", []byte("steve"))).
|
||||
AddField(document.NewTextField("desc", []byte("angst beer couch database"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("street", []byte("couchbase way"))).
|
||||
AddField(document.NewTextField("title", []byte("mister"))),
|
||||
// must have 1/4 beer
|
||||
document.NewDocument("3").
|
||||
AddField(document.NewTextField("name", []byte("dustin"))).
|
||||
AddField(document.NewTextField("desc", []byte("apple beer column dank"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("title", []byte("mister"))),
|
||||
// must have 65/65 beer
|
||||
document.NewDocument("4").
|
||||
AddField(document.NewTextField("name", []byte("ravi"))).
|
||||
AddField(document.NewTextField("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"))),
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions)),
|
||||
// must have 0/x beer
|
||||
document.NewDocument("5").
|
||||
AddField(document.NewTextField("name", []byte("bobert"))).
|
||||
AddField(document.NewTextField("desc", []byte("water"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("water"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("title", []byte("mister"))),
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
)
|
||||
|
||||
|
@ -17,3 +20,41 @@ type Query interface {
|
|||
Searcher(index index.Index) (Searcher, error)
|
||||
Validate() error
|
||||
}
|
||||
|
||||
func ParseQuery(input []byte) (Query, error) {
|
||||
var tmp map[string]interface{}
|
||||
err := json.Unmarshal(input, &tmp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_, isTermQuery := tmp["term"]
|
||||
if isTermQuery {
|
||||
var rv *TermQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
_, hasMust := tmp["must"]
|
||||
_, hasShould := tmp["should"]
|
||||
_, hasMustNot := tmp["must_not"]
|
||||
if hasMust || hasShould || hasMustNot {
|
||||
var rv *TermBooleanQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
_, hasTerms := tmp["terms"]
|
||||
if hasTerms {
|
||||
var rv *PhraseQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
return nil, fmt.Errorf("Unrecognized query")
|
||||
}
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
)
|
||||
|
||||
type PhraseQuery struct {
|
||||
Terms []*TermQuery `json:"terms,omitempty"`
|
||||
PhrasePositions map[string]float64 `json:"phrase_positions,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
Explain bool `json:"explain,omitempty"`
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) Searcher(index index.Index) (Searcher, error) {
|
||||
return NewPhraseSearcher(index, q)
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) Validate() error {
|
||||
if q.Terms == nil {
|
||||
return fmt.Errorf("Phrase query must contain at least one term")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,178 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
)
|
||||
|
||||
type PhraseSearcher struct {
|
||||
query *PhraseQuery
|
||||
index index.Index
|
||||
mustSearcher *TermConjunctionSearcher
|
||||
queryNorm float64
|
||||
currMust *DocumentMatch
|
||||
slop int
|
||||
}
|
||||
|
||||
func NewPhraseSearcher(index index.Index, query *PhraseQuery) (*PhraseSearcher, error) {
|
||||
// build the downstream searchres
|
||||
var err error
|
||||
var mustSearcher *TermConjunctionSearcher
|
||||
if query.Terms != nil {
|
||||
qterms := make([]Query, len(query.Terms))
|
||||
for i, qt := range query.Terms {
|
||||
qterms[i] = qt
|
||||
}
|
||||
tcq := TermConjunctionQuery{
|
||||
Terms: qterms,
|
||||
BoostVal: 1.0,
|
||||
Explain: query.Explain,
|
||||
}
|
||||
|
||||
mustSearcher, err = NewTermConjunctionSearcher(index, &tcq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// build our searcher
|
||||
rv := PhraseSearcher{
|
||||
index: index,
|
||||
query: query,
|
||||
mustSearcher: mustSearcher,
|
||||
}
|
||||
rv.computeQueryNorm()
|
||||
err = rv.initSearchers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
if s.mustSearcher != nil {
|
||||
sumOfSquaredWeights += s.mustSearcher.Weight()
|
||||
}
|
||||
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downsteam searchers the norm
|
||||
if s.mustSearcher != nil {
|
||||
s.mustSearcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) initSearchers() error {
|
||||
var err error
|
||||
// get all searchers pointing at their first match
|
||||
if s.mustSearcher != nil {
|
||||
s.currMust, err = s.mustSearcher.Next()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) advanceNextMust() error {
|
||||
var err error
|
||||
|
||||
if s.mustSearcher != nil {
|
||||
s.currMust, err = s.mustSearcher.Next()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) Weight() float64 {
|
||||
var rv float64
|
||||
rv += s.mustSearcher.Weight()
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) SetQueryNorm(qnorm float64) {
|
||||
s.mustSearcher.SetQueryNorm(qnorm)
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) Next() (*DocumentMatch, error) {
|
||||
var rv *DocumentMatch
|
||||
for s.currMust != nil {
|
||||
rvtlm := make(TermLocationMap, 0)
|
||||
freq := 0
|
||||
firstTerm := s.query.Terms[0]
|
||||
termLocMap, ok := s.currMust.Locations[firstTerm.Field]
|
||||
if ok {
|
||||
locations, ok := termLocMap[firstTerm.Term]
|
||||
if ok {
|
||||
OUTER:
|
||||
for _, location := range locations {
|
||||
crvtlm := make(TermLocationMap, 0)
|
||||
INNER:
|
||||
for i := 0; i < len(s.query.Terms); i++ {
|
||||
nextTerm := s.query.Terms[i]
|
||||
if nextTerm != nil {
|
||||
// look through all this terms locations
|
||||
// to try and find the correct offsets
|
||||
nextLocations, ok := termLocMap[nextTerm.Term]
|
||||
if ok {
|
||||
for _, nextLocation := range nextLocations {
|
||||
if nextLocation.Pos == location.Pos+float64(i) {
|
||||
// found a location match for this term
|
||||
crvtlm.AddLocation(nextTerm.Term, nextLocation)
|
||||
continue INNER
|
||||
}
|
||||
}
|
||||
// if we got here we didnt find location match for this term
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
}
|
||||
// if we got here all the terms matched
|
||||
freq += 1
|
||||
mergeTermLocationMaps(rvtlm, crvtlm)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if freq > 0 {
|
||||
// return match
|
||||
rv = s.currMust
|
||||
rv.Locations = FieldTermLocationMap{
|
||||
firstTerm.Field: rvtlm,
|
||||
}
|
||||
s.advanceNextMust()
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
s.advanceNextMust()
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) Advance(ID string) (*DocumentMatch, error) {
|
||||
s.mustSearcher.Advance(ID)
|
||||
return s.Next()
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) Count() uint64 {
|
||||
// for now return a worst case
|
||||
var sum uint64 = 0
|
||||
sum += s.mustSearcher.Count()
|
||||
return sum
|
||||
}
|
||||
|
||||
func (s *PhraseSearcher) Close() {
|
||||
if s.mustSearcher != nil {
|
||||
s.mustSearcher.Close()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) 2013 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
)
|
||||
|
||||
func TestPhraseSearch(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
index index.Index
|
||||
query Query
|
||||
results []*DocumentMatch
|
||||
}{
|
||||
{
|
||||
index: twoDocIndex,
|
||||
query: &PhraseQuery{
|
||||
Terms: []*TermQuery{
|
||||
&TermQuery{
|
||||
Term: "angst",
|
||||
Field: "desc",
|
||||
BoostVal: 1.0,
|
||||
Explain: true,
|
||||
},
|
||||
&TermQuery{
|
||||
Term: "beer",
|
||||
Field: "desc",
|
||||
BoostVal: 1.0,
|
||||
Explain: true,
|
||||
},
|
||||
},
|
||||
Explain: true,
|
||||
},
|
||||
results: []*DocumentMatch{
|
||||
&DocumentMatch{
|
||||
ID: "2",
|
||||
Score: 1.0807601687084403,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for testIndex, test := range tests {
|
||||
searcher, err := test.query.Searcher(test.index)
|
||||
defer searcher.Close()
|
||||
|
||||
next, err := searcher.Next()
|
||||
i := 0
|
||||
for err == nil && next != nil {
|
||||
if i < len(test.results) {
|
||||
if next.ID != test.results[i].ID {
|
||||
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
|
||||
}
|
||||
if next.Score != test.results[i].Score {
|
||||
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
|
||||
t.Logf("scoring explanation: %s", next.Expl)
|
||||
}
|
||||
}
|
||||
next, err = searcher.Next()
|
||||
i++
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
|
||||
}
|
||||
if len(test.results) != i {
|
||||
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue