parent
183fcd4b14
commit
a41f229b14
18
query.go
18
query.go
|
@ -189,5 +189,23 @@ func ParseQuery(input []byte) (Query, error) {
|
|||
}
|
||||
return &rv, nil
|
||||
}
|
||||
_, hasRegexp := tmp["regexp"]
|
||||
if hasRegexp {
|
||||
var rv regexpQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
_, hasWildcard := tmp["wildcard"]
|
||||
if hasWildcard {
|
||||
var rv wildcardQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
return nil, ErrorUnknownQueryType
|
||||
}
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/searchers"
|
||||
)
|
||||
|
||||
type regexpQuery struct {
|
||||
Regexp string `json:"regexp"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
compiled *regexp.Regexp
|
||||
}
|
||||
|
||||
// NewRegexpQuery creates a new Query which finds
|
||||
// documents containing terms that match the
|
||||
// specified regular expression.
|
||||
func NewRegexpQuery(regexp string) *regexpQuery {
|
||||
return ®expQuery{
|
||||
Regexp: regexp,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *regexpQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *regexpQuery) SetBoost(b float64) Query {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *regexpQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *regexpQuery) SetField(f string) Query {
|
||||
q.FieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *regexpQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
|
||||
field := q.FieldVal
|
||||
if q.FieldVal == "" {
|
||||
field = m.DefaultField
|
||||
}
|
||||
if q.compiled == nil {
|
||||
var err error
|
||||
q.compiled, err = regexp.Compile(q.Regexp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return searchers.NewRegexpSearcher(i, q.compiled, field, q.BoostVal, explain)
|
||||
}
|
||||
|
||||
func (q *regexpQuery) Validate() error {
|
||||
var err error
|
||||
q.compiled, err = regexp.Compile(q.Regexp)
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/searchers"
|
||||
)
|
||||
|
||||
var wildcardRegexpReplacer = strings.NewReplacer(
|
||||
// characters in the wildcard that must
|
||||
// be escaped in the regexp
|
||||
"+", `\+`,
|
||||
"(", `\(`,
|
||||
")", `\)`,
|
||||
"^", `\^`,
|
||||
"$", `\$`,
|
||||
".", `\.`,
|
||||
"{", `\{`,
|
||||
"}", `\}`,
|
||||
"[", `\[`,
|
||||
"]", `\]`,
|
||||
`|`, `\|`,
|
||||
`\`, `\\`,
|
||||
// wildcard characters
|
||||
"*", ".*",
|
||||
"?", ".")
|
||||
|
||||
type wildcardQuery struct {
|
||||
Wildcard string `json:"wildcard"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
compiled *regexp.Regexp
|
||||
}
|
||||
|
||||
// NewWildcardQuery creates a new Query which finds
|
||||
// documents containing terms that match the
|
||||
// specified wildcard. In the wildcard pattern '*'
|
||||
// will match any sequence of 0 or more characters,
|
||||
// and '?' will match any single character.
|
||||
func NewWildcardQuery(wildcard string) *wildcardQuery {
|
||||
return &wildcardQuery{
|
||||
Wildcard: wildcard,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *wildcardQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *wildcardQuery) SetBoost(b float64) Query {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *wildcardQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *wildcardQuery) SetField(f string) Query {
|
||||
q.FieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *wildcardQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
|
||||
field := q.FieldVal
|
||||
if q.FieldVal == "" {
|
||||
field = m.DefaultField
|
||||
}
|
||||
if q.compiled == nil {
|
||||
var err error
|
||||
q.compiled, err = q.convertToRegexp()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return searchers.NewRegexpSearcher(i, q.compiled, field, q.BoostVal, explain)
|
||||
}
|
||||
|
||||
func (q *wildcardQuery) Validate() error {
|
||||
var err error
|
||||
q.compiled, err = q.convertToRegexp()
|
||||
return err
|
||||
}
|
||||
|
||||
func (q *wildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
|
||||
regexpString := "^" + wildcardRegexpReplacer.Replace(q.Wildcard) + "$"
|
||||
return regexp.Compile(regexpString)
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package searchers
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
type RegexpSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
pattern *regexp.Regexp
|
||||
field string
|
||||
explain bool
|
||||
searcher *DisjunctionSearcher
|
||||
}
|
||||
|
||||
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {
|
||||
|
||||
prefixTerm, complete := pattern.LiteralPrefix()
|
||||
candidateTerms := make([]string, 0)
|
||||
if complete {
|
||||
// there is no pattern
|
||||
candidateTerms = append(candidateTerms, prefixTerm)
|
||||
} else {
|
||||
var fieldDict index.FieldDict
|
||||
var err error
|
||||
if len(prefixTerm) > 0 {
|
||||
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
|
||||
} else {
|
||||
fieldDict, err = indexReader.FieldDict(field)
|
||||
}
|
||||
|
||||
// enumerate the terms and check against regexp
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
if pattern.MatchString(tfd.Term) {
|
||||
candidateTerms = append(candidateTerms, tfd.Term)
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// enumerate all the terms in the range
|
||||
qsearchers := make([]search.Searcher, 0, 25)
|
||||
|
||||
for _, cterm := range candidateTerms {
|
||||
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qsearchers = append(qsearchers, qsearcher)
|
||||
}
|
||||
|
||||
// build disjunction searcher of these ranges
|
||||
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &RegexpSearcher{
|
||||
indexReader: indexReader,
|
||||
pattern: pattern,
|
||||
field: field,
|
||||
explain: explain,
|
||||
searcher: searcher,
|
||||
}, nil
|
||||
}
|
||||
func (s *RegexpSearcher) Count() uint64 {
|
||||
return s.searcher.Count()
|
||||
}
|
||||
|
||||
func (s *RegexpSearcher) Weight() float64 {
|
||||
return s.searcher.Weight()
|
||||
}
|
||||
|
||||
func (s *RegexpSearcher) SetQueryNorm(qnorm float64) {
|
||||
s.searcher.SetQueryNorm(qnorm)
|
||||
}
|
||||
|
||||
func (s *RegexpSearcher) Next() (*search.DocumentMatch, error) {
|
||||
return s.searcher.Next()
|
||||
|
||||
}
|
||||
|
||||
func (s *RegexpSearcher) Advance(ID string) (*search.DocumentMatch, error) {
|
||||
return s.searcher.Next()
|
||||
}
|
||||
|
||||
func (s *RegexpSearcher) Close() error {
|
||||
return s.searcher.Close()
|
||||
}
|
||||
|
||||
func (s *RegexpSearcher) Min() int {
|
||||
return 0
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package searchers
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
func TestRegexpSearch(t *testing.T) {
|
||||
|
||||
twoDocIndexReader, err := twoDocIndex.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer twoDocIndexReader.Close()
|
||||
|
||||
pattern, err := regexp.Compile("ma.*")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
regexpSearcher, err := NewRegexpSearcher(twoDocIndexReader, pattern, "name", 1.0, true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
patternCo, err := regexp.Compile("co.*")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
regexpSearcherCo, err := NewRegexpSearcher(twoDocIndexReader, patternCo, "desc", 1.0, true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
searcher search.Searcher
|
||||
results []*search.DocumentMatch
|
||||
}{
|
||||
{
|
||||
searcher: regexpSearcher,
|
||||
results: []*search.DocumentMatch{
|
||||
&search.DocumentMatch{
|
||||
ID: "1",
|
||||
Score: 1.916290731874155,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
searcher: regexpSearcherCo,
|
||||
results: []*search.DocumentMatch{
|
||||
&search.DocumentMatch{
|
||||
ID: "2",
|
||||
Score: 0.33875554280828685,
|
||||
},
|
||||
&search.DocumentMatch{
|
||||
ID: "3",
|
||||
Score: 0.33875554280828685,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for testIndex, test := range tests {
|
||||
defer test.searcher.Close()
|
||||
|
||||
next, err := test.searcher.Next()
|
||||
i := 0
|
||||
for err == nil && next != nil {
|
||||
if i < len(test.results) {
|
||||
if next.ID != test.results[i].ID {
|
||||
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
|
||||
}
|
||||
if next.Score != test.results[i].Score {
|
||||
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
|
||||
t.Logf("scoring explanation: %s", next.Expl)
|
||||
}
|
||||
}
|
||||
next, err = test.searcher.Next()
|
||||
i++
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
|
||||
}
|
||||
if len(test.results) != i {
|
||||
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue