0
0
Fork 0

overhauled search

many bugs fixed in existing search
phrase query updated to support gaps in term sequence
new query types
all,none,match,phrase match
and new query syntax search, like google search:
+/-(optional field qualifier:)<term or quoted phrase>
This commit is contained in:
Marty Schoch 2014-07-11 14:49:59 -04:00
parent 5a33825091
commit f225d484b3
26 changed files with 2877 additions and 17 deletions

8
search/build.sh Executable file
View File

@ -0,0 +1,8 @@
#! /bin/sh
echo Running nex...
nex query_syntax.nex
echo Running goyacc...
go tool yacc query_syntax.y
echo Running go build...
go build

View File

@ -11,7 +11,9 @@ package search
import (
"encoding/json"
"fmt"
"log"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
@ -21,7 +23,7 @@ type Query interface {
Validate() error
}
func ParseQuery(input []byte) (Query, error) {
func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
var tmp map[string]interface{}
err := json.Unmarshal(input, &tmp)
if err != nil {
@ -29,32 +31,65 @@ func ParseQuery(input []byte) (Query, error) {
}
_, isTermQuery := tmp["term"]
if isTermQuery {
var rv *TermQuery
var rv TermQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return rv, nil
return &rv, nil
}
_, isMatchQuery := tmp["match"]
if isMatchQuery {
log.Printf("detected match query")
var rv MatchQuery
rv.mapping = mapping
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, isMatchPhraseQuery := tmp["match_phrase"]
if isMatchPhraseQuery {
log.Printf("detected match phrase query")
var rv MatchPhraseQuery
rv.mapping = mapping
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMust := tmp["must"]
_, hasShould := tmp["should"]
_, hasMustNot := tmp["must_not"]
if hasMust || hasShould || hasMustNot {
var rv *TermBooleanQuery
var rv TermBooleanQuery
rv.mapping = mapping
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return rv, nil
return &rv, nil
}
_, hasTerms := tmp["terms"]
if hasTerms {
var rv *PhraseQuery
var rv PhraseQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return rv, nil
return &rv, nil
}
_, hasSyntaxQuery := tmp["query"]
if hasSyntaxQuery {
var rv SyntaxQuery
rv.mapping = mapping
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("Unrecognized query")
}

View File

@ -9,8 +9,10 @@
package search
import (
"encoding/json"
"fmt"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
@ -20,6 +22,7 @@ type TermBooleanQuery struct {
Should *TermDisjunctionQuery `json:"should,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
mapping document.Mapping
}
func (q *TermBooleanQuery) Boost() float64 {
@ -39,3 +42,49 @@ func (q *TermBooleanQuery) Validate() error {
}
return nil
}
func (q *TermBooleanQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Must json.RawMessage `json:"must,omitempty"`
MustNot json.RawMessage `json:"must_not,omitempty"`
Should json.RawMessage `json:"should,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.BoostVal = tmp.BoostVal
q.Explain = tmp.Explain
var must TermConjunctionQuery
if len(tmp.Must) > 0 {
must.mapping = q.mapping
err = json.Unmarshal(tmp.Must, &must)
if err != nil {
return err
}
q.Must = &must
}
var mustNot TermDisjunctionQuery
if len(tmp.MustNot) > 0 {
mustNot.mapping = q.mapping
err = json.Unmarshal(tmp.MustNot, &mustNot)
if err != nil {
return err
}
q.MustNot = &mustNot
}
var should TermDisjunctionQuery
if len(tmp.Should) > 0 {
must.mapping = q.mapping
err = json.Unmarshal(tmp.Should, &should)
if err != nil {
return err
}
q.Should = &should
}
return nil
}

82
search/query_match.go Normal file
View File

@ -0,0 +1,82 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"encoding/json"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
type MatchQuery struct {
Match string `json:"match"`
Field string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
Analyzer *analysis.Analyzer
mapping document.Mapping
}
func (q *MatchQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchQuery) Searcher(index index.Index) (Searcher, error) {
tokens := q.Analyzer.Analyze([]byte(q.Match))
if len(tokens) > 0 {
tqs := make([]Query, len(tokens))
for i, token := range tokens {
tqs[i] = &TermQuery{
Term: string(token.Term),
Field: q.Field,
BoostVal: q.BoostVal,
Explain: q.Explain,
}
}
boolQuery := &TermBooleanQuery{
Should: &TermDisjunctionQuery{
Terms: tqs,
BoostVal: q.BoostVal,
Explain: q.Explain,
Min: 1,
},
BoostVal: q.BoostVal,
Explain: q.Explain,
}
return NewTermBooleanSearcher(index, boolQuery)
} else {
noneQuery := &MatchNoneQuery{BoostVal: q.BoostVal, Explain: q.Explain}
return NewMatchNoneSearcher(index, noneQuery)
}
}
func (q *MatchQuery) Validate() error {
return nil
}
func (q *MatchQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Match string `json:"match"`
Field string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Match = tmp.Match
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
q.Explain = tmp.Explain
q.Analyzer = q.mapping[q.Field].Analyzer
return nil
}

30
search/query_match_all.go Normal file
View File

@ -0,0 +1,30 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type MatchAllQuery struct {
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}
func (q *MatchAllQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchAllQuery) Searcher(index index.Index) (Searcher, error) {
return NewMatchAllSearcher(index, q)
}
func (q *MatchAllQuery) Validate() error {
return nil
}

View File

@ -0,0 +1,30 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type MatchNoneQuery struct {
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}
func (q *MatchNoneQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchNoneQuery) Searcher(index index.Index) (Searcher, error) {
return NewMatchNoneSearcher(index, q)
}
func (q *MatchNoneQuery) Validate() error {
return nil
}

View File

@ -0,0 +1,85 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"encoding/json"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
type MatchPhraseQuery struct {
MatchPhrase string `json:"match_phrase"`
Field string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
Analyzer *analysis.Analyzer
mapping document.Mapping
}
func (q *MatchPhraseQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchPhraseQuery) Searcher(index index.Index) (Searcher, error) {
tokens := q.Analyzer.Analyze([]byte(q.MatchPhrase))
if len(tokens) > 0 {
maxPos := 0
// find the highest position index
for _, token := range tokens {
if int(token.Position) > maxPos {
maxPos = int(token.Position)
}
}
// use tokens to build phrase
phraseTerms := make([]*TermQuery, maxPos)
for _, token := range tokens {
phraseTerms[int(token.Position)-1] = &TermQuery{
Term: string(token.Term),
Field: q.Field,
BoostVal: q.BoostVal,
Explain: q.Explain,
}
}
phraseQuery := &PhraseQuery{
Terms: phraseTerms,
BoostVal: q.BoostVal,
Explain: q.Explain,
}
return NewPhraseSearcher(index, phraseQuery)
} else {
noneQuery := &MatchNoneQuery{BoostVal: q.BoostVal, Explain: q.Explain}
return NewMatchNoneSearcher(index, noneQuery)
}
}
func (q *MatchPhraseQuery) Validate() error {
return nil
}
func (q *MatchPhraseQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
MatchPhrase string `json:"match_phrase"`
Field string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.MatchPhrase = tmp.MatchPhrase
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
q.Explain = tmp.Explain
q.Analyzer = q.mapping[q.Field].Analyzer
return nil
}

View File

@ -7,10 +7,9 @@ import (
)
type PhraseQuery struct {
Terms []*TermQuery `json:"terms,omitempty"`
PhrasePositions map[string]float64 `json:"phrase_positions,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
Terms []*TermQuery `json:"terms,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}
func (q *PhraseQuery) Boost() float64 {

37
search/query_syntax.go Normal file
View File

@ -0,0 +1,37 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
type SyntaxQuery struct {
Query string `json:"query"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
mapping document.Mapping
}
func (q *SyntaxQuery) Boost() float64 {
return q.BoostVal
}
func (q *SyntaxQuery) Searcher(index index.Index) (Searcher, error) {
newQuery, err := ParseQuerySyntax(q.Query, q.mapping)
if err != nil {
return nil, err
}
return newQuery.Searcher(index)
}
func (q *SyntaxQuery) Validate() error {
return nil
}

34
search/query_syntax.nex Normal file
View File

@ -0,0 +1,34 @@
/\"((\\\")|(\\\\)|(\\\/)|(\\b)|(\\f)|(\\n)|(\\r)|(\\t)|(\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])|[^\"])*\"/ {
lval.s = yylex.Text()[1:len(yylex.Text())-1]
logDebugTokens("PHRASE - %s", lval.s);
return PHRASE
}
/\+/ { logDebugTokens("PLUS"); return PLUS }
/-/ { logDebugTokens("MINUS"); return MINUS }
/:/ { logDebugTokens("COLON"); return COLON }
/^/ { logDebugTokens("BOOST"); return BOOST }
/\(/ { logDebugTokens("LPAREN"); return LPAREN }
/\)/ { logDebugTokens("RPAREN"); return RPAREN }
/-?[0-9]|[1-9][0-9]*/
{
lval.n,_ = strconv.Atoi(yylex.Text());
logDebugTokens("INT - %d", lval.n);
return INT
}
/[ \t\n]+/ { logDebugTokens("WHITESPACE (count=%d)", len(yylex.Text())) /* eat up whitespace */ }
/[^\t\n\f\r :^\+\-]+/ {
lval.s = yylex.Text()
logDebugTokens("STRING - %s", lval.s);
return STRING
}
//
package search
import("log")
import("strconv")
func logDebugTokens(format string, v ...interface{}) {
if debugLexer {
log.Printf(format, v...)
}
}

867
search/query_syntax.nn.go Normal file
View File

@ -0,0 +1,867 @@
package search
import (
"log"
)
import (
"strconv"
)
import ("bufio";"io";"strings")
type dfa struct {
acc []bool
f []func(rune) int
id int
}
type family struct {
a []dfa
endcase int
}
var a0 [10]dfa
var a []family
func init() {
a = make([]family, 1)
{
var acc [18]bool
var fun [18]func(rune) int
fun[1] = func(r rune) int {
switch(r) {
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
case 47: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[14] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 15
case 47: return 3
case 34: return 2
case 98: return 15
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 15
case 65 <= r && r <= 70: return 15
case 97 <= r && r <= 102: return 15
default: return 3
}
}
panic("unreachable")
}
fun[10] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[3] = func(r rune) int {
switch(r) {
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
case 47: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[4] = func(r rune) int {
switch(r) {
case 117: return 5
case 102: return 6
case 47: return 7
case 34: return 8
case 98: return 9
case 110: return 10
case 92: return 11
case 116: return 12
case 114: return 13
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[16] = func(r rune) int {
switch(r) {
case 34: return 2
case 98: return 17
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 17
case 47: return 3
default:
switch {
case 48 <= r && r <= 57: return 17
case 65 <= r && r <= 70: return 17
case 97 <= r && r <= 102: return 17
default: return 3
}
}
panic("unreachable")
}
fun[15] = func(r rune) int {
switch(r) {
case 114: return 3
case 117: return 3
case 102: return 16
case 47: return 3
case 34: return 2
case 98: return 16
case 110: return 3
case 92: return 4
case 116: return 3
default:
switch {
case 48 <= r && r <= 57: return 16
case 65 <= r && r <= 70: return 16
case 97 <= r && r <= 102: return 16
default: return 3
}
}
panic("unreachable")
}
fun[17] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[11] = func(r rune) int {
switch(r) {
case 117: return 5
case 102: return 6
case 47: return 7
case 34: return 8
case 98: return 9
case 110: return 10
case 92: return 11
case 116: return 12
case 114: return 13
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[12] = func(r rune) int {
switch(r) {
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
acc[8] = true
fun[8] = func(r rune) int {
switch(r) {
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[6] = func(r rune) int {
switch(r) {
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[9] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[7] = func(r rune) int {
switch(r) {
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
acc[2] = true
fun[2] = func(r rune) int {
switch(r) {
case 110: return -1
case 92: return -1
case 116: return -1
case 114: return -1
case 117: return -1
case 102: return -1
case 47: return -1
case 34: return -1
case 98: return -1
default:
switch {
case 48 <= r && r <= 57: return -1
case 65 <= r && r <= 70: return -1
case 97 <= r && r <= 102: return -1
default: return -1
}
}
panic("unreachable")
}
fun[13] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 117: return -1
case 102: return -1
case 47: return -1
case 34: return 1
case 98: return -1
case 110: return -1
case 92: return -1
case 116: return -1
case 114: return -1
default:
switch {
case 48 <= r && r <= 57: return -1
case 65 <= r && r <= 70: return -1
case 97 <= r && r <= 102: return -1
default: return -1
}
}
panic("unreachable")
}
fun[5] = func(r rune) int {
switch(r) {
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 14
case 47: return 3
case 34: return 2
case 98: return 14
default:
switch {
case 48 <= r && r <= 57: return 14
case 65 <= r && r <= 70: return 14
case 97 <= r && r <= 102: return 14
default: return 3
}
}
panic("unreachable")
}
a0[0].acc = acc[:]
a0[0].f = fun[:]
a0[0].id = 0
}
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
switch(r) {
case 43: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 43: return -1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
a0[1].acc = acc[:]
a0[1].f = fun[:]
a0[1].id = 1
}
{
var acc [2]bool
var fun [2]func(rune) int
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 45: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
a0[2].acc = acc[:]
a0[2].f = fun[:]
a0[2].id = 2
}
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
switch(r) {
case 58: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 58: return -1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
a0[3].acc = acc[:]
a0[3].f = fun[:]
a0[3].id = 3
}
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
switch(r) {
case 94: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 94: return -1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
a0[4].acc = acc[:]
a0[4].f = fun[:]
a0[4].id = 4
}
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
switch(r) {
case 40: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 40: return -1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
a0[5].acc = acc[:]
a0[5].f = fun[:]
a0[5].id = 5
}
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
switch(r) {
case 41: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 41: return -1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
a0[6].acc = acc[:]
a0[6].f = fun[:]
a0[6].id = 6
}
{
var acc [5]bool
var fun [5]func(rune) int
fun[1] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return 2
case 49 <= r && r <= 57: return 2
default: return -1
}
}
panic("unreachable")
}
acc[2] = true
fun[2] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return -1
case 49 <= r && r <= 57: return -1
default: return -1
}
}
panic("unreachable")
}
acc[4] = true
fun[4] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return 4
case 49 <= r && r <= 57: return 4
default: return -1
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 45: return 1
default:
switch {
case 48 <= r && r <= 48: return 2
case 49 <= r && r <= 57: return 3
default: return -1
}
}
panic("unreachable")
}
acc[3] = true
fun[3] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return 4
case 49 <= r && r <= 57: return 4
default: return -1
}
}
panic("unreachable")
}
a0[7].acc = acc[:]
a0[7].f = fun[:]
a0[7].id = 7
}
{
var acc [2]bool
var fun [2]func(rune) int
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 9: return 1
case 10: return 1
case 32: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 9: return 1
case 10: return 1
case 32: return 1
default:
switch {
default: return -1
}
}
panic("unreachable")
}
a0[8].acc = acc[:]
a0[8].f = fun[:]
a0[8].id = 8
}
{
var acc [2]bool
var fun [2]func(rune) int
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 10: return -1
case 43: return -1
case 9: return -1
case 32: return -1
case 12: return -1
case 13: return -1
case 58: return -1
case 94: return -1
case 45: return -1
default:
switch {
default: return 1
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 9: return -1
case 32: return -1
case 12: return -1
case 13: return -1
case 58: return -1
case 94: return -1
case 45: return -1
case 10: return -1
case 43: return -1
default:
switch {
default: return 1
}
}
panic("unreachable")
}
a0[9].acc = acc[:]
a0[9].f = fun[:]
a0[9].id = 9
}
a[0].endcase = 10
a[0].a = a0[:]
}
func getAction(c *frame) int {
if -1 == c.match { return -1 }
c.action = c.fam.a[c.match].id
c.match = -1
return c.action
}
type frame struct {
atEOF bool
action, match, matchn, n int
buf []rune
text string
in *bufio.Reader
state []int
fam family
}
func newFrame(in *bufio.Reader, index int) *frame {
f := new(frame)
f.buf = make([]rune, 0, 128)
f.in = in
f.match = -1
f.fam = a[index]
f.state = make([]int, len(f.fam.a))
return f
}
type Lexer []*frame
func NewLexer(in io.Reader) Lexer {
stack := make([]*frame, 0, 4)
stack = append(stack, newFrame(bufio.NewReader(in), 0))
return stack
}
func (stack Lexer) isDone() bool {
return 1 == len(stack) && stack[0].atEOF
}
func (stack Lexer) nextAction() int {
c := stack[len(stack) - 1]
for {
if c.atEOF { return c.fam.endcase }
if c.n == len(c.buf) {
r,_,er := c.in.ReadRune()
switch er {
case nil: c.buf = append(c.buf, r)
case io.EOF:
c.atEOF = true
if c.n > 0 {
c.text = string(c.buf)
return getAction(c)
}
return c.fam.endcase
default: panic(er.Error())
}
}
jammed := true
r := c.buf[c.n]
for i, x := range c.fam.a {
if -1 == c.state[i] { continue }
c.state[i] = x.f[c.state[i]](r)
if -1 == c.state[i] { continue }
jammed = false
if x.acc[c.state[i]] {
if -1 == c.match || c.matchn < c.n+1 || c.match > i {
c.match = i
c.matchn = c.n+1
}
}
}
if jammed {
a := getAction(c)
if -1 == a { c.matchn = c.n + 1 }
c.n = 0
for i, _ := range c.state { c.state[i] = 0 }
c.text = string(c.buf[:c.matchn])
copy(c.buf, c.buf[c.matchn:])
c.buf = c.buf[:len(c.buf) - c.matchn]
return a
}
c.n++
}
panic("unreachable")
}
func (stack Lexer) push(index int) Lexer {
c := stack[len(stack) - 1]
return append(stack,
newFrame(bufio.NewReader(strings.NewReader(c.text)), index))
}
func (stack Lexer) pop() Lexer {
return stack[:len(stack) - 1]
}
func (stack Lexer) Text() string {
c := stack[len(stack) - 1]
return c.text
}
func (yylex Lexer) Error(e string) {
panic(e)
}
func (yylex Lexer) Lex(lval *yySymType) int {
for !yylex.isDone() {
switch yylex.nextAction() {
case -1:
case 0: //\"((\\\")|(\\\\)|(\\\/)|(\\b)|(\\f)|(\\n)|(\\r)|(\\t)|(\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])|[^\"])*\"/
{
lval.s = yylex.Text()[1:len(yylex.Text())-1]
logDebugTokens("PHRASE - %s", lval.s);
return PHRASE
}
case 1: //\+/
{ logDebugTokens("PLUS"); return PLUS }
case 2: //-/
{ logDebugTokens("MINUS"); return MINUS }
case 3: //:/
{ logDebugTokens("COLON"); return COLON }
case 4: //^/
{ logDebugTokens("BOOST"); return BOOST }
case 5: //\(/
{ logDebugTokens("LPAREN"); return LPAREN }
case 6: //\)/
{ logDebugTokens("RPAREN"); return RPAREN }
case 7: //-?[0-9]|[1-9][0-9]*/
{
lval.n,_ = strconv.Atoi(yylex.Text());
logDebugTokens("INT - %d", lval.n);
return INT
}
case 8: //[ \t\n]+/
{ logDebugTokens("WHITESPACE (count=%d)", len(yylex.Text())) /* eat up whitespace */ }
case 9: //[^\t\n\f\r :^\+\-]+/
{
lval.s = yylex.Text()
logDebugTokens("STRING - %s", lval.s);
return STRING
}
case 10: ///
// [END]
}
}
return 0
}
func logDebugTokens(format string, v ...interface{}) {
if debugLexer {
log.Printf(format, v...)
}
}

170
search/query_syntax.y Normal file
View File

@ -0,0 +1,170 @@
%{
package search
import "log"
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
log.Printf(format, v...)
}
}
%}
%union {
s string
n int
f float64}
%token STRING PHRASE PLUS MINUS COLON BOOST LPAREN RPAREN INT STRING
%%
input:
searchParts {
logDebugGrammar("INPUT")
};
searchParts:
searchPart searchParts {
logDebugGrammar("SEARCH PARTS")
}
|
searchPart {
logDebugGrammar("SEARCH PART")
};
searchPart:
searchPrefix searchBase searchSuffix {
};
searchPrefix:
/* empty */ {
}
|
searchMustMustNot {
}
;
searchMustMustNot:
PLUS {
logDebugGrammar("PLUS")
parsingMust = true
}
|
MINUS {
logDebugGrammar("MINUS")
parsingMustNot = true
};
searchBase:
STRING {
str := $1.s
logDebugGrammar("STRING - %s", str)
q := &MatchQuery{
Match: str,
Field: parsingDefaultField,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[parsingDefaultField] != nil {
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
}
|
PHRASE {
phrase := $1.s
logDebugGrammar("PHRASE - %s", phrase)
q := &MatchPhraseQuery{
MatchPhrase: phrase,
Field: parsingDefaultField,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[parsingDefaultField] != nil {
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
}
|
STRING COLON STRING {
field := $1.s
str := $3.s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q := &MatchQuery{
Match: str,
Field: field,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[field] != nil {
q.Analyzer = parsingMapping[field].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
}
|
STRING COLON PHRASE {
field := $1.s
phrase := $3.s
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := &MatchPhraseQuery{
MatchPhrase: phrase,
Field: field,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[field] != nil {
q.Analyzer = parsingMapping[field].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
};
searchBoost:
BOOST INT {
boost := $1.n
logDebugGrammar("BOOST %d", boost)
}
searchSuffix:
/* empty */ {
}
|
searchBoost {
};

View File

@ -0,0 +1,80 @@
package search
import (
"fmt"
"strings"
"sync"
"github.com/couchbaselabs/bleve/document"
)
var crashHard = false
var parserMutex sync.Mutex
var parsingDefaultField string
var parsingMust bool
var parsingMustNot bool
var debugParser bool
var debugLexer bool
var parsingMustList *TermConjunctionQuery
var parsingMustNotList *TermDisjunctionQuery
var parsingShouldList *TermDisjunctionQuery
var parsingMapping document.Mapping
func ParseQuerySyntax(query string, mapping document.Mapping) (rq Query, err error) {
parserMutex.Lock()
defer parserMutex.Unlock()
parsingMapping = mapping
parsingMustList = &TermConjunctionQuery{
Terms: make([]Query, 0),
BoostVal: 1.0,
Explain: true,
}
parsingMustNotList = &TermDisjunctionQuery{
Terms: make([]Query, 0),
BoostVal: 1.0,
Explain: true,
}
parsingShouldList = &TermDisjunctionQuery{
Terms: make([]Query, 0),
BoostVal: 1.0,
Explain: true,
Min: 1,
}
defer func() {
r := recover()
if r != nil && r == "syntax error" {
// if we're panicing over a syntax error, chill
err = fmt.Errorf("Parse Error - %v", r)
} else if r != nil {
// otherise continue to panic
if crashHard {
panic(r)
} else {
err = fmt.Errorf("Other Error - %v", r)
}
}
}()
yyParse(NewLexer(strings.NewReader(query)))
parsingQuery := &TermBooleanQuery{
BoostVal: 1.0,
Explain: true,
}
if len(parsingMustList.Terms) > 0 {
parsingQuery.Must = parsingMustList
}
if len(parsingMustNotList.Terms) > 0 {
parsingQuery.MustNot = parsingMustNotList
}
if len(parsingShouldList.Terms) > 0 {
parsingQuery.Should = parsingShouldList
}
rq = parsingQuery
return rq, err
}

View File

@ -0,0 +1,235 @@
package search
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/document"
)
func TestQuerySyntaxParserValid(t *testing.T) {
tests := []struct {
input string
result Query
mapping document.Mapping
}{
{
input: "test",
mapping: document.Mapping{},
result: &TermBooleanQuery{
Should: &TermDisjunctionQuery{
Terms: []Query{
&MatchQuery{
Match: "test",
Field: "_all",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
Min: 1.0,
},
BoostVal: 1.0,
Explain: true,
},
},
{
input: "field:test",
mapping: document.Mapping{},
result: &TermBooleanQuery{
Should: &TermDisjunctionQuery{
Terms: []Query{
&MatchQuery{
Match: "test",
Field: "field",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
Min: 1.0,
},
BoostVal: 1.0,
Explain: true,
},
},
{
input: "+field1:test1",
mapping: document.Mapping{},
result: &TermBooleanQuery{
Must: &TermConjunctionQuery{
Terms: []Query{
&MatchQuery{
Match: "test1",
Field: "field1",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
},
BoostVal: 1.0,
Explain: true,
},
},
{
input: "-field2:test2",
mapping: document.Mapping{},
result: &TermBooleanQuery{
MustNot: &TermDisjunctionQuery{
Terms: []Query{
&MatchQuery{
Match: "test2",
Field: "field2",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
},
BoostVal: 1.0,
Explain: true,
},
},
{
input: `field3:"test phrase 2"`,
mapping: document.Mapping{},
result: &TermBooleanQuery{
Should: &TermDisjunctionQuery{
Terms: []Query{
&MatchPhraseQuery{
MatchPhrase: "test phrase 2",
Field: "field3",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
Min: 1.0,
},
BoostVal: 1.0,
Explain: true,
},
},
{
input: `+field4:"test phrase 1"`,
mapping: document.Mapping{},
result: &TermBooleanQuery{
Must: &TermConjunctionQuery{
Terms: []Query{
&MatchPhraseQuery{
MatchPhrase: "test phrase 1",
Field: "field4",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
},
BoostVal: 1.0,
Explain: true,
},
},
{
input: `-field5:"test phrase 2"`,
mapping: document.Mapping{},
result: &TermBooleanQuery{
MustNot: &TermDisjunctionQuery{
Terms: []Query{
&MatchPhraseQuery{
MatchPhrase: "test phrase 2",
Field: "field5",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
},
BoostVal: 1.0,
Explain: true,
},
},
{
input: `+field6:test3 -field7:test4 field8:test5`,
mapping: document.Mapping{},
result: &TermBooleanQuery{
Must: &TermConjunctionQuery{
Terms: []Query{
&MatchQuery{
Match: "test3",
Field: "field6",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
},
MustNot: &TermDisjunctionQuery{
Terms: []Query{
&MatchQuery{
Match: "test4",
Field: "field7",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
},
Should: &TermDisjunctionQuery{
Terms: []Query{
&MatchQuery{
Match: "test5",
Field: "field8",
BoostVal: 1.0,
Explain: true,
},
},
BoostVal: 1.0,
Explain: true,
Min: 1.0,
},
BoostVal: 1.0,
Explain: true,
},
},
}
parsingDefaultField = "_all"
for _, test := range tests {
q, err := ParseQuerySyntax(test.input, test.mapping)
if err != nil {
t.Error(err)
}
if !reflect.DeepEqual(q, test.result) {
t.Errorf("Expected %#v, got %#v: for %s", test.result, q, test.input)
for _, x := range q.(*TermBooleanQuery).Should.Terms {
t.Logf("term: %#v", x)
}
}
}
}
func TestQuerySyntaxParserInvalid(t *testing.T) {
tests := []struct {
input string
}{
{"^"},
{"^5"},
}
for _, test := range tests {
_, err := ParseQuerySyntax(test.input, document.Mapping{})
if err == nil {
t.Errorf("expected error, got nil for `%s`", test.input)
}
}
}

View File

@ -9,6 +9,9 @@
package search
import (
"encoding/json"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
@ -16,6 +19,7 @@ type TermConjunctionQuery struct {
Terms []Query `json:"terms"`
BoostVal float64 `json:"boost"`
Explain bool `json:"explain"`
mapping document.Mapping
}
func (q *TermConjunctionQuery) Boost() float64 {
@ -29,3 +33,26 @@ func (q *TermConjunctionQuery) Searcher(index index.Index) (Searcher, error) {
func (q *TermConjunctionQuery) Validate() error {
return nil
}
func (q *TermConjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Terms []json.RawMessage `json:"terms"`
BoostVal float64 `json:"boost"`
Explain bool `json:"explain"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = make([]Query, len(tmp.Terms))
for i, term := range tmp.Terms {
query, err := ParseQuery(term, q.mapping)
if err != nil {
return err
}
q.Terms[i] = query
}
q.BoostVal = tmp.BoostVal
q.Explain = tmp.Explain
return nil
}

View File

@ -9,8 +9,10 @@
package search
import (
"encoding/json"
"fmt"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
@ -19,6 +21,7 @@ type TermDisjunctionQuery struct {
BoostVal float64 `json:"boost"`
Explain bool `json:"explain"`
Min float64 `json:"min"`
mapping document.Mapping
}
func (q *TermDisjunctionQuery) Boost() float64 {
@ -35,3 +38,28 @@ func (q *TermDisjunctionQuery) Validate() error {
}
return nil
}
func (q *TermDisjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Terms []json.RawMessage `json:"terms"`
BoostVal float64 `json:"boost"`
Explain bool `json:"explain"`
Min float64 `json:"min"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = make([]Query, len(tmp.Terms))
for i, term := range tmp.Terms {
query, err := ParseQuery(term, q.mapping)
if err != nil {
return err
}
q.Terms[i] = query
}
q.BoostVal = tmp.BoostVal
q.Explain = tmp.Explain
q.Min = tmp.Min
return nil
}

100
search/scorer_contsant.go Normal file
View File

@ -0,0 +1,100 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"fmt"
)
type ConstantScorer struct {
constant float64
query Query
explain bool
queryNorm float64
queryWeight float64
queryWeightExplanation *Explanation
}
func NewConstantScorer(query Query, constant float64, explain bool) *ConstantScorer {
rv := ConstantScorer{
query: query,
explain: explain,
queryWeight: 1.0,
constant: constant,
}
return &rv
}
func (s *ConstantScorer) Weight() float64 {
sum := s.query.Boost()
return sum * sum
}
func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.query.Boost() * s.queryNorm
if s.explain {
childrenExplanations := make([]*Explanation, 2)
childrenExplanations[0] = &Explanation{
Value: s.query.Boost(),
Message: "boost",
}
childrenExplanations[1] = &Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("ConstantScore()^%f, product of:", s.query.Boost()),
Children: childrenExplanations,
}
}
}
func (s *ConstantScorer) Score(id string) *DocumentMatch {
var scoreExplanation *Explanation
score := s.constant
if s.explain {
scoreExplanation = &Explanation{
Value: score,
Message: fmt.Sprintf("ConstantScore()"),
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
childExplanations := make([]*Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &Explanation{
Value: score,
Message: fmt.Sprintf("weight(^%f), product of:", s.query.Boost()),
Children: childExplanations,
}
}
}
rv := DocumentMatch{
ID: id,
Score: score,
}
if s.explain {
rv.Expl = scoreExplanation
}
return &rv
}

View File

@ -74,7 +74,7 @@ func TestTermBooleanSearch(t *testing.T) {
},
&DocumentMatch{
ID: "3",
Score: 0.8506018914159408,
Score: 0.808709699395535,
},
&DocumentMatch{
ID: "4",
@ -367,7 +367,7 @@ func TestTermBooleanSearch(t *testing.T) {
results: []*DocumentMatch{
&DocumentMatch{
ID: "3",
Score: 2.6853243330509997,
Score: 2.0681575785068107,
},
},
},

View File

@ -0,0 +1,84 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type MatchAllSearcher struct {
index index.Index
query *MatchAllQuery
reader index.DocIdReader
scorer *ConstantScorer
}
func NewMatchAllSearcher(index index.Index, query *MatchAllQuery) (*MatchAllSearcher, error) {
reader, err := index.DocIdReader("", "")
if err != nil {
return nil, err
}
scorer := NewConstantScorer(query, 1.0, query.Explain)
return &MatchAllSearcher{
index: index,
query: query,
reader: reader,
scorer: scorer,
}, nil
}
func (s *MatchAllSearcher) Count() uint64 {
return s.index.DocCount()
}
func (s *MatchAllSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *MatchAllSearcher) Next() (*DocumentMatch, error) {
id, err := s.reader.Next()
if err != nil {
return nil, err
}
if id == "" {
return nil, nil
}
// score match
docMatch := s.scorer.Score(id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Advance(ID string) (*DocumentMatch, error) {
id, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if id == "" {
return nil, nil
}
// score match
docMatch := s.scorer.Score(id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Close() {
s.reader.Close()
}

View File

@ -0,0 +1,116 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"testing"
"github.com/couchbaselabs/bleve/index"
)
func TestMatchAllSearch(t *testing.T) {
tests := []struct {
index index.Index
query Query
queryNorm float64
results []*DocumentMatch
}{
{
index: twoDocIndex,
query: &MatchAllQuery{
BoostVal: 1.0,
Explain: true,
},
queryNorm: 1.0,
results: []*DocumentMatch{
&DocumentMatch{
ID: "1",
Score: 1.0,
},
&DocumentMatch{
ID: "2",
Score: 1.0,
},
&DocumentMatch{
ID: "3",
Score: 1.0,
},
&DocumentMatch{
ID: "4",
Score: 1.0,
},
&DocumentMatch{
ID: "5",
Score: 1.0,
},
},
},
{
index: twoDocIndex,
query: &MatchAllQuery{
BoostVal: 1.2,
Explain: true,
},
queryNorm: 0.8333333,
results: []*DocumentMatch{
&DocumentMatch{
ID: "1",
Score: 1.0,
},
&DocumentMatch{
ID: "2",
Score: 1.0,
},
&DocumentMatch{
ID: "3",
Score: 1.0,
},
&DocumentMatch{
ID: "4",
Score: 1.0,
},
&DocumentMatch{
ID: "5",
Score: 1.0,
},
},
},
}
for testIndex, test := range tests {
searcher, err := test.query.Searcher(test.index)
if test.queryNorm != 1.0 {
searcher.SetQueryNorm(test.queryNorm)
}
defer searcher.Close()
next, err := searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View File

@ -0,0 +1,48 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type MatchNoneSearcher struct {
index index.Index
query *MatchNoneQuery
}
func NewMatchNoneSearcher(index index.Index, query *MatchNoneQuery) (*MatchNoneSearcher, error) {
return &MatchNoneSearcher{
index: index,
query: query,
}, nil
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}
func (s *MatchNoneSearcher) Weight() float64 {
return 0.0
}
func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) {
}
func (s *MatchNoneSearcher) Next() (*DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Advance(ID string) (*DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Close() {
}

View File

@ -0,0 +1,59 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"testing"
"github.com/couchbaselabs/bleve/index"
)
func TestMatchNoneSearch(t *testing.T) {
tests := []struct {
index index.Index
query Query
results []*DocumentMatch
}{
{
index: twoDocIndex,
query: &MatchNoneQuery{
Explain: true,
},
results: []*DocumentMatch{},
},
}
for testIndex, test := range tests {
searcher, err := test.query.Searcher(test.index)
defer searcher.Close()
next, err := searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View File

@ -19,10 +19,13 @@ func NewPhraseSearcher(index index.Index, query *PhraseQuery) (*PhraseSearcher,
// build the downstream searchres
var err error
var mustSearcher *TermConjunctionSearcher
if query.Terms != nil {
qterms := make([]Query, len(query.Terms))
for i, qt := range query.Terms {
qterms[i] = qt
qterms := make([]Query, 0, len(query.Terms))
for _, qt := range query.Terms {
if qt != nil {
qterms = append(qterms, qt)
}
}
tcq := TermConjunctionQuery{
Terms: qterms,

View File

@ -183,7 +183,7 @@ func TestTermConjunctionSearch(t *testing.T) {
results: []*DocumentMatch{
&DocumentMatch{
ID: "2",
Score: 1.754501824953384,
Score: 1.441614953806971,
},
},
},

495
search/y.go Normal file
View File

@ -0,0 +1,495 @@
//line query_syntax.y:2
package search
import __yyfmt__ "fmt"
//line query_syntax.y:2
import "log"
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
log.Printf(format, v...)
}
}
//line query_syntax.y:12
type yySymType struct {
yys int
s string
n int
f float64}
const STRING = 57346
const PHRASE = 57347
const PLUS = 57348
const MINUS = 57349
const COLON = 57350
const BOOST = 57351
const LPAREN = 57352
const RPAREN = 57353
const INT = 57354
var yyToknames = []string{
"STRING",
"PHRASE",
"PLUS",
"MINUS",
"COLON",
"BOOST",
"LPAREN",
"RPAREN",
"INT",
}
var yyStatenames = []string{}
const yyEofCode = 1
const yyErrCode = 2
const yyMaxDepth = 200
//line yacctab:1
var yyExca = []int{
-1, 1,
1, -1,
-2, 0,
-1, 3,
1, 3,
-2, 5,
}
const yyNprod = 16
const yyPrivate = 57344
var yyTokenNames []string
var yyStates []string
const yyLast = 18
var yyAct = []int{
16, 14, 15, 6, 7, 17, 18, 10, 11, 2,
13, 5, 12, 8, 9, 4, 3, 1,
}
var yyPact = []int{
-3, -1000, -1000, -3, 3, -1000, -1000, -1000, -1000, -8,
-6, -1000, -1000, -1000, -12, 1, -1000, -1000, -1000,
}
var yyPgo = []int{
0, 17, 9, 16, 15, 14, 12, 11, 10,
}
var yyR1 = []int{
0, 1, 2, 2, 3, 4, 4, 7, 7, 5,
5, 5, 5, 8, 6, 6,
}
var yyR2 = []int{
0, 1, 2, 1, 3, 0, 1, 1, 1, 1,
1, 3, 3, 2, 0, 1,
}
var yyChk = []int{
-1000, -1, -2, -3, -4, -7, 6, 7, -2, -5,
4, 5, -6, -8, 9, 8, 12, 4, 5,
}
var yyDef = []int{
5, -2, 1, -2, 0, 6, 7, 8, 2, 14,
9, 10, 4, 15, 0, 0, 13, 11, 12,
}
var yyTok1 = []int{
1,
}
var yyTok2 = []int{
2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12,
}
var yyTok3 = []int{
0,
}
//line yaccpar:1
/* parser for yacc output */
var yyDebug = 0
type yyLexer interface {
Lex(lval *yySymType) int
Error(s string)
}
const yyFlag = -1000
func yyTokname(c int) string {
// 4 is TOKSTART above
if c >= 4 && c-4 < len(yyToknames) {
if yyToknames[c-4] != "" {
return yyToknames[c-4]
}
}
return __yyfmt__.Sprintf("tok-%v", c)
}
func yyStatname(s int) string {
if s >= 0 && s < len(yyStatenames) {
if yyStatenames[s] != "" {
return yyStatenames[s]
}
}
return __yyfmt__.Sprintf("state-%v", s)
}
func yylex1(lex yyLexer, lval *yySymType) int {
c := 0
char := lex.Lex(lval)
if char <= 0 {
c = yyTok1[0]
goto out
}
if char < len(yyTok1) {
c = yyTok1[char]
goto out
}
if char >= yyPrivate {
if char < yyPrivate+len(yyTok2) {
c = yyTok2[char-yyPrivate]
goto out
}
}
for i := 0; i < len(yyTok3); i += 2 {
c = yyTok3[i+0]
if c == char {
c = yyTok3[i+1]
goto out
}
}
out:
if c == 0 {
c = yyTok2[1] /* unknown char */
}
if yyDebug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", yyTokname(c), uint(char))
}
return c
}
func yyParse(yylex yyLexer) int {
var yyn int
var yylval yySymType
var yyVAL yySymType
yyS := make([]yySymType, yyMaxDepth)
Nerrs := 0 /* number of errors */
Errflag := 0 /* error recovery flag */
yystate := 0
yychar := -1
yyp := -1
goto yystack
ret0:
return 0
ret1:
return 1
yystack:
/* put a state and value onto the stack */
if yyDebug >= 4 {
__yyfmt__.Printf("char %v in %v\n", yyTokname(yychar), yyStatname(yystate))
}
yyp++
if yyp >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyS[yyp] = yyVAL
yyS[yyp].yys = yystate
yynewstate:
yyn = yyPact[yystate]
if yyn <= yyFlag {
goto yydefault /* simple state */
}
if yychar < 0 {
yychar = yylex1(yylex, &yylval)
}
yyn += yychar
if yyn < 0 || yyn >= yyLast {
goto yydefault
}
yyn = yyAct[yyn]
if yyChk[yyn] == yychar { /* valid shift */
yychar = -1
yyVAL = yylval
yystate = yyn
if Errflag > 0 {
Errflag--
}
goto yystack
}
yydefault:
/* default state action */
yyn = yyDef[yystate]
if yyn == -2 {
if yychar < 0 {
yychar = yylex1(yylex, &yylval)
}
/* look through exception table */
xi := 0
for {
if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate {
break
}
xi += 2
}
for xi += 2; ; xi += 2 {
yyn = yyExca[xi+0]
if yyn < 0 || yyn == yychar {
break
}
}
yyn = yyExca[xi+1]
if yyn < 0 {
goto ret0
}
}
if yyn == 0 {
/* error ... attempt to resume parsing */
switch Errflag {
case 0: /* brand new error */
yylex.Error("syntax error")
Nerrs++
if yyDebug >= 1 {
__yyfmt__.Printf("%s", yyStatname(yystate))
__yyfmt__.Printf(" saw %s\n", yyTokname(yychar))
}
fallthrough
case 1, 2: /* incompletely recovered error ... try again */
Errflag = 3
/* find a state where "error" is a legal shift action */
for yyp >= 0 {
yyn = yyPact[yyS[yyp].yys] + yyErrCode
if yyn >= 0 && yyn < yyLast {
yystate = yyAct[yyn] /* simulate a shift of "error" */
if yyChk[yystate] == yyErrCode {
goto yystack
}
}
/* the current p has no shift on "error", pop stack */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
}
yyp--
}
/* there is no state on the stack with an error shift ... abort */
goto ret1
case 3: /* no shift yet; clobber input char */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", yyTokname(yychar))
}
if yychar == yyEofCode {
goto ret1
}
yychar = -1
goto yynewstate /* try again in the same state */
}
}
/* reduction by production yyn */
if yyDebug >= 2 {
__yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
}
yynt := yyn
yypt := yyp
_ = yypt // guard against "declared and not used"
yyp -= yyR2[yyn]
yyVAL = yyS[yyp+1]
/* consult goto table to find next state */
yyn = yyR1[yyn]
yyg := yyPgo[yyn]
yyj := yyg + yyS[yyp].yys + 1
if yyj >= yyLast {
yystate = yyAct[yyg]
} else {
yystate = yyAct[yyj]
if yyChk[yystate] != -yyn {
yystate = yyAct[yyg]
}
}
// dummy call; replaced with literal code
switch yynt {
case 1:
//line query_syntax.y:22
{
logDebugGrammar("INPUT")
}
case 2:
//line query_syntax.y:27
{
logDebugGrammar("SEARCH PARTS")
}
case 3:
//line query_syntax.y:31
{
logDebugGrammar("SEARCH PART")
}
case 4:
//line query_syntax.y:36
{
}
case 5:
//line query_syntax.y:42
{
}
case 6:
//line query_syntax.y:45
{
}
case 7:
//line query_syntax.y:51
{
logDebugGrammar("PLUS")
parsingMust = true
}
case 8:
//line query_syntax.y:56
{
logDebugGrammar("MINUS")
parsingMustNot = true
}
case 9:
//line query_syntax.y:62
{
str := yyS[yypt-0].s
logDebugGrammar("STRING - %s", str)
q := &MatchQuery{
Match: str,
Field: parsingDefaultField,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[parsingDefaultField] != nil {
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
}
case 10:
//line query_syntax.y:85
{
phrase := yyS[yypt-0].s
logDebugGrammar("PHRASE - %s", phrase)
q := &MatchPhraseQuery{
MatchPhrase: phrase,
Field: parsingDefaultField,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[parsingDefaultField] != nil {
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
}
case 11:
//line query_syntax.y:108
{
field := yyS[yypt-2].s
str := yyS[yypt-0].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q := &MatchQuery{
Match: str,
Field: field,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[field] != nil {
q.Analyzer = parsingMapping[field].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
}
case 12:
//line query_syntax.y:132
{
field := yyS[yypt-2].s
phrase := yyS[yypt-0].s
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := &MatchPhraseQuery{
MatchPhrase: phrase,
Field: field,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[field] != nil {
q.Analyzer = parsingMapping[field].Analyzer
}
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
}
}
case 13:
//line query_syntax.y:158
{
boost := yyS[yypt-1].n
logDebugGrammar("BOOST %d", boost)
}
case 14:
//line query_syntax.y:164
{
}
case 15:
//line query_syntax.y:168
{
}
}
goto yystack /* stack new state and value */
}

159
search/y.output Normal file
View File

@ -0,0 +1,159 @@
state 0
$accept: .input $end
searchPrefix: . (5)
PLUS shift 6
MINUS shift 7
. reduce 5 (src line 41)
input goto 1
searchParts goto 2
searchPart goto 3
searchPrefix goto 4
searchMustMustNot goto 5
state 1
$accept: input.$end
$end accept
. error
state 2
input: searchParts. (1)
. reduce 1 (src line 21)
state 3
searchParts: searchPart.searchParts
searchParts: searchPart. (3)
searchPrefix: . (5)
$end reduce 3 (src line 30)
PLUS shift 6
MINUS shift 7
. reduce 5 (src line 41)
searchParts goto 8
searchPart goto 3
searchPrefix goto 4
searchMustMustNot goto 5
state 4
searchPart: searchPrefix.searchBase searchSuffix
STRING shift 10
PHRASE shift 11
. error
searchBase goto 9
state 5
searchPrefix: searchMustMustNot. (6)
. reduce 6 (src line 44)
state 6
searchMustMustNot: PLUS. (7)
. reduce 7 (src line 50)
state 7
searchMustMustNot: MINUS. (8)
. reduce 8 (src line 55)
state 8
searchParts: searchPart searchParts. (2)
. reduce 2 (src line 26)
state 9
searchPart: searchPrefix searchBase.searchSuffix
searchSuffix: . (14)
BOOST shift 14
. reduce 14 (src line 163)
searchSuffix goto 12
searchBoost goto 13
state 10
searchBase: STRING. (9)
searchBase: STRING.COLON STRING
searchBase: STRING.COLON PHRASE
COLON shift 15
. reduce 9 (src line 61)
state 11
searchBase: PHRASE. (10)
. reduce 10 (src line 84)
state 12
searchPart: searchPrefix searchBase searchSuffix. (4)
. reduce 4 (src line 35)
state 13
searchSuffix: searchBoost. (15)
. reduce 15 (src line 167)
state 14
searchBoost: BOOST.INT
INT shift 16
. error
state 15
searchBase: STRING COLON.STRING
searchBase: STRING COLON.PHRASE
STRING shift 17
PHRASE shift 18
. error
state 16
searchBoost: BOOST INT. (13)
. reduce 13 (src line 157)
state 17
searchBase: STRING COLON STRING. (11)
. reduce 11 (src line 107)
state 18
searchBase: STRING COLON PHRASE. (12)
. reduce 12 (src line 131)
12 terminals, 9 nonterminals
16 grammar rules, 19/2000 states
0 shift/reduce, 0 reduce/reduce conflicts reported
58 working sets used
memory: parser 13/30000
0 extra closures
11 shift entries, 2 exceptions
9 goto entries
3 entries saved by goto default
Optimizer space used: output 18/30000
18 table entries, 0 zero
maximum spread: 12, maximum offset: 12