diff --git a/.gitignore b/.gitignore index d66b5df8..013f2a6e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,9 @@ .project .settings .DS_Store -/analysis/tokenizers/cld2/cld2-read-only +/analysis/token_filters/cld2/cld2-read-only /examples/bleve_index_json/bleve_index_json /examples/bleve_index_json/index/ /examples/bleve_query/bleve_query /utils/bleve_dump/bleve_dump +/y.output diff --git a/analysis/analyzers/standard_analyzer/standard.go b/analysis/analyzers/standard_analyzer/standard.go deleted file mode 100644 index 95e809f9..00000000 --- a/analysis/analyzers/standard_analyzer/standard.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package standard_analyzer - -import ( - "github.com/couchbaselabs/bleve/analysis" - "github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter" - "github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter" - "github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary" -) - -func NewStandardAnalyzer() (*analysis.Analyzer, error) { - lower_case_filter, err := lower_case_filter.NewLowerCaseFilter() - if err != nil { - return nil, err - } - - stop_words_filter, err := stop_words_filter.NewStopWordsFilter() - if err != nil { - return nil, err - } - - standard := analysis.Analyzer{ - CharFilters: []analysis.CharFilter{}, - Tokenizer: unicode_word_boundary.NewUnicodeWordBoundaryTokenizer(), - TokenFilters: []analysis.TokenFilter{ - lower_case_filter, - stop_words_filter, - }, - } - - return &standard, nil -} diff --git a/analysis/char_filters/html_char_filter/html_char_filter.go b/analysis/char_filters/html_char_filter/html_char_filter.go deleted file mode 100644 index a564c064..00000000 --- a/analysis/char_filters/html_char_filter/html_char_filter.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package html_char_filter - -import ( - "regexp" - - "github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter" -) - -// the origin of this regex is here: -// http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/ -// slightly modified by me to also match the DOCTYPE -const htmlTagPattern = `\s]+))?)+\s*|\s*)/?>` - -var htmlRegex = regexp.MustCompile(htmlTagPattern) - -type HtmlCharFilter struct { - *regexp_char_filter.RegexpCharFilter -} - -func NewHtmlCharFilter() *HtmlCharFilter { - return &HtmlCharFilter{ - regexp_char_filter.NewRegexpCharFilter(htmlRegex, []byte{' '}), - } -} diff --git a/analysis/char_filters/html_char_filter/html_char_filter_test.go b/analysis/char_filters/regexp_char_filter/regexp_char_filter_test.go similarity index 81% rename from analysis/char_filters/html_char_filter/html_char_filter_test.go rename to analysis/char_filters/regexp_char_filter/regexp_char_filter_test.go index eb01f8c1..4671342e 100644 --- a/analysis/char_filters/html_char_filter/html_char_filter_test.go +++ b/analysis/char_filters/regexp_char_filter/regexp_char_filter_test.go @@ -6,14 +6,19 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package html_char_filter +package regexp_char_filter import ( "reflect" + "regexp" "testing" ) -func TestHtmlCharFilter(t *testing.T) { +func TestRegexpCharFilter(t *testing.T) { + + htmlTagPattern := `\s]+))?)+\s*|\s*)/?>` + htmlRegex := regexp.MustCompile(htmlTagPattern) + tests := []struct { input []byte output []byte @@ -43,7 +48,7 @@ func TestHtmlCharFilter(t *testing.T) { } for _, test := range tests { - filter := NewHtmlCharFilter() + filter := NewRegexpCharFilter(htmlRegex, []byte{' '}) output := filter.Filter(test.input) if !reflect.DeepEqual(output, test.output) { t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input)) diff --git a/analysis/freq_test.go b/analysis/freq_test.go index 1f267085..d9f3f805 100644 --- a/analysis/freq_test.go +++ b/analysis/freq_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package analysis import ( diff --git a/analysis/token_filters/cld2/README.md b/analysis/token_filters/cld2/README.md new file mode 100644 index 00000000..3533448c --- /dev/null +++ b/analysis/token_filters/cld2/README.md @@ -0,0 +1,30 @@ +# cld2 token filter + +A bleve token filter which passes the text of each token and passes it to the cld2 library. The library determines what it thinks the language most likely is. The ISO-639 language code replaces the token term. + +In normal usage, you use this with the "single" tokenizer, so there is only one input token. Further, you should precede it with the "to_lower" filter so that the input term is in all lower-case unicode characters. + +# Building + +1. Acquire the source to cld2 in this directory. + + $ svn checkout http://cld2.googlecode.com/svn/trunk/ cld2-read-only + +2. Build cld2 + + $ cd cld2-read-only/internal/ + $ ./compile_libs.sh + + +3. Put the resulting libraries somewhere your dynamic linker can find. + + $ cp *.so /usr/local/lib + +4. Run the unit tests + + $ cd ../.. + $ go test -v + === RUN TestCld2Filter + --- PASS: TestCld2Filter (0.00 seconds) + PASS + ok github.com/couchbaselabs/bleve/analysis/token_filters/cld2 0.033s diff --git a/analysis/tokenizers/cld2/cld2_tokenizer.cc b/analysis/token_filters/cld2/cld2_filter.cc similarity index 58% rename from analysis/tokenizers/cld2/cld2_tokenizer.cc rename to analysis/token_filters/cld2/cld2_filter.cc index 9faf176e..cb116715 100644 --- a/analysis/tokenizers/cld2/cld2_tokenizer.cc +++ b/analysis/token_filters/cld2/cld2_filter.cc @@ -1,9 +1,16 @@ - +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. #include #include #include #include -#include "cld2_tokenizer.h" +#include "cld2_filter.h" #include "cld2-read-only/public/compact_lang_det.h" const char* DetectLang(const char *buffer) { diff --git a/analysis/token_filters/cld2/cld2_filter.go b/analysis/token_filters/cld2/cld2_filter.go new file mode 100644 index 00000000..b037592f --- /dev/null +++ b/analysis/token_filters/cld2/cld2_filter.go @@ -0,0 +1,52 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package cld2 + +// #cgo LDFLAGS: -lcld2_full +// #include "cld2_filter.h" +// #include +import "C" + +import ( + "unsafe" + + "github.com/couchbaselabs/bleve/analysis" +) + +type Cld2Filter struct { +} + +func NewCld2Filter() *Cld2Filter { + return &Cld2Filter{} +} + +func (f *Cld2Filter) Filter(input analysis.TokenStream) analysis.TokenStream { + rv := make(analysis.TokenStream, 0) + + offset := 0 + for _, token := range input { + var err error + token.Term, err = f.detectLanguage(token.Term) + if err != nil { + token.Term = []byte("error") + } + token.Start = offset + token.End = token.Start + len(token.Term) + rv = append(rv, token) + offset = token.End + 1 + } + + return rv +} + +func (f *Cld2Filter) detectLanguage(input []byte) ([]byte, error) { + cstr := C.CString(string(input)) + res := C.DetectLang(cstr) + return C.GoBytes(unsafe.Pointer(res), C.int(C.strlen(res))), nil +} diff --git a/shredder/shredder.go b/analysis/token_filters/cld2/cld2_filter.h similarity index 78% rename from shredder/shredder.go rename to analysis/token_filters/cld2/cld2_filter.h index 49320f26..8f692986 100644 --- a/shredder/shredder.go +++ b/analysis/token_filters/cld2/cld2_filter.h @@ -6,12 +6,13 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package shredder +#ifdef __cplusplus +extern "C" { +#endif -import ( - "github.com/couchbaselabs/bleve/document" -) +const char* DetectLang(const char *buffer); + +#ifdef __cplusplus +} /* extern "C" */ +#endif -type Shredder interface { - Shred(id string, body []byte) (document.Document, error) -} diff --git a/analysis/token_filters/cld2/cld2_filter_test.go b/analysis/token_filters/cld2/cld2_filter_test.go new file mode 100644 index 00000000..a0c0c749 --- /dev/null +++ b/analysis/token_filters/cld2/cld2_filter_test.go @@ -0,0 +1,112 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package cld2 + +import ( + "reflect" + "testing" + + "github.com/couchbaselabs/bleve/analysis" +) + +func TestCld2Filter(t *testing.T) { + tests := []struct { + input analysis.TokenStream + output analysis.TokenStream + }{ + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("the quick brown fox"), + Start: 0, + End: 19, + Position: 1, + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("en"), + Start: 0, + End: 2, + Position: 1, + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("こんにちは世界"), + Start: 0, + End: 21, + Position: 1, + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("ja"), + Start: 0, + End: 2, + Position: 1, + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"), + Start: 0, + End: 72, + Position: 1, + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("th"), + Start: 0, + End: 2, + Position: 1, + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("مرحبا، العالم!"), + Start: 0, + End: 26, + Position: 1, + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("ar"), + Start: 0, + End: 2, + Position: 1, + }, + }, + }, + } + + filter := NewCld2Filter() + for _, test := range tests { + res := filter.Filter(test.input) + if !reflect.DeepEqual(res, test.output) { + t.Errorf("expected:") + for _, token := range test.output { + t.Errorf("%#v - %s", token, token.Term) + } + t.Errorf("got:") + for _, token := range res { + t.Errorf("%#v - %s", token, token.Term) + } + } + } + +} diff --git a/analysis/token_filters/length_filter/length_filter.go b/analysis/token_filters/length_filter/length_filter.go index b2ccdef7..5f296bc5 100644 --- a/analysis/token_filters/length_filter/length_filter.go +++ b/analysis/token_filters/length_filter/length_filter.go @@ -19,11 +19,11 @@ type LengthFilter struct { max int } -func NewLengthFilter(min, max int) (*LengthFilter, error) { +func NewLengthFilter(min, max int) *LengthFilter { return &LengthFilter{ min: min, max: max, - }, nil + } } func (f *LengthFilter) Filter(input analysis.TokenStream) analysis.TokenStream { diff --git a/analysis/token_filters/length_filter/length_filter_test.go b/analysis/token_filters/length_filter/length_filter_test.go index 3f4d3923..1f4eac24 100644 --- a/analysis/token_filters/length_filter/length_filter_test.go +++ b/analysis/token_filters/length_filter/length_filter_test.go @@ -28,10 +28,7 @@ func TestLengthFilter(t *testing.T) { }, } - lengthFilter, err := NewLengthFilter(3, 4) - if err != nil { - t.Fatal(err) - } + lengthFilter := NewLengthFilter(3, 4) ouputTokenStream := lengthFilter.Filter(inputTokenStream) if len(ouputTokenStream) != 1 { t.Fatalf("expected 1 output token") @@ -55,10 +52,7 @@ func TestLengthFilterNoMax(t *testing.T) { }, } - lengthFilter, err := NewLengthFilter(3, -1) - if err != nil { - t.Fatal(err) - } + lengthFilter := NewLengthFilter(3, -1) ouputTokenStream := lengthFilter.Filter(inputTokenStream) if len(ouputTokenStream) != 2 { t.Fatalf("expected 2 output token") @@ -85,10 +79,7 @@ func TestLengthFilterNoMin(t *testing.T) { }, } - lengthFilter, err := NewLengthFilter(-1, 4) - if err != nil { - t.Fatal(err) - } + lengthFilter := NewLengthFilter(-1, 4) ouputTokenStream := lengthFilter.Filter(inputTokenStream) if len(ouputTokenStream) != 2 { t.Fatalf("expected 2 output token") diff --git a/analysis/token_filters/lower_case_filter/lower_case_filter.go b/analysis/token_filters/lower_case_filter/lower_case_filter.go index 572a6d5a..bc0e7c8e 100644 --- a/analysis/token_filters/lower_case_filter/lower_case_filter.go +++ b/analysis/token_filters/lower_case_filter/lower_case_filter.go @@ -17,8 +17,8 @@ import ( type LowerCaseFilter struct { } -func NewLowerCaseFilter() (*LowerCaseFilter, error) { - return &LowerCaseFilter{}, nil +func NewLowerCaseFilter() *LowerCaseFilter { + return &LowerCaseFilter{} } func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream { diff --git a/analysis/token_filters/lower_case_filter/lower_case_filter_test.go b/analysis/token_filters/lower_case_filter/lower_case_filter_test.go index 3c4cbf12..030c6c02 100644 --- a/analysis/token_filters/lower_case_filter/lower_case_filter_test.go +++ b/analysis/token_filters/lower_case_filter/lower_case_filter_test.go @@ -41,10 +41,7 @@ func TestLowerCaseFilter(t *testing.T) { }, } - filter, err := NewLowerCaseFilter() - if err != nil { - t.Fatal(err) - } + filter := NewLowerCaseFilter() ouputTokenStream := filter.Filter(inputTokenStream) if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) { t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream) diff --git a/analysis/token_filters/stemmer_filter/README.md b/analysis/token_filters/stemmer_filter/README.md new file mode 100644 index 00000000..56b0e863 --- /dev/null +++ b/analysis/token_filters/stemmer_filter/README.md @@ -0,0 +1,18 @@ +## Languages supported + +"danish", +"dutch", +"english", +"finnish", +"french", +"german", +"hungarian", +"italian", +"norwegian", +"porter", +"portuguese", +"romanian", +"russian", +"spanish", +"swedish", +"turkish" \ No newline at end of file diff --git a/analysis/token_filters/stemmer_filter/stemmer_filter.go b/analysis/token_filters/stemmer_filter/stemmer_filter.go index f9470b58..fd825c88 100644 --- a/analysis/token_filters/stemmer_filter/stemmer_filter.go +++ b/analysis/token_filters/stemmer_filter/stemmer_filter.go @@ -29,6 +29,14 @@ func NewStemmerFilter(lang string) (*StemmerFilter, error) { }, nil } +func MustNewStemmerFilter(lang string) *StemmerFilter { + sf, err := NewStemmerFilter(lang) + if err != nil { + panic(err) + } + return sf +} + func (s *StemmerFilter) List() []string { return snowball.LangList() } diff --git a/analysis/token_filters/stop_words_filter/stop_words_filter.go b/analysis/token_filters/stop_words_filter/stop_words_filter.go index 8d3d2873..293d5501 100644 --- a/analysis/token_filters/stop_words_filter/stop_words_filter.go +++ b/analysis/token_filters/stop_words_filter/stop_words_filter.go @@ -24,10 +24,10 @@ type StopWordsFilter struct { stopWords map[string]bool } -func NewStopWordsFilter() (*StopWordsFilter, error) { +func NewStopWordsFilter() *StopWordsFilter { return &StopWordsFilter{ stopWords: buildStopWordMap(DEFAULT_STOP_WORDS), - }, nil + } } func (f *StopWordsFilter) Filter(input analysis.TokenStream) analysis.TokenStream { diff --git a/analysis/token_filters/stop_words_filter/stop_words_filter_test.go b/analysis/token_filters/stop_words_filter/stop_words_filter_test.go index fb120fa6..3e16bee0 100644 --- a/analysis/token_filters/stop_words_filter/stop_words_filter_test.go +++ b/analysis/token_filters/stop_words_filter/stop_words_filter_test.go @@ -44,10 +44,7 @@ func TestStopWordsFilter(t *testing.T) { }, } - filter, err := NewStopWordsFilter() - if err != nil { - t.Fatal(err) - } + filter := NewStopWordsFilter() ouputTokenStream := filter.Filter(inputTokenStream) if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) { t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream) diff --git a/analysis/tokenizers/cld2/README.md b/analysis/tokenizers/cld2/README.md deleted file mode 100644 index 5298a057..00000000 --- a/analysis/tokenizers/cld2/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# cld2 tokenizer - -A bleve tokenizer which passes the input text to the cld2 library. The library determines what it thinks the language most likely is. The ISO-639 language code is returned as the single token resulting from the analysis. - -# Building - -1. Acquire the source to cld2 in this directory. - - $ svn checkout http://cld2.googlecode.com/svn/trunk/ cld2-read-only - -2. Build cld2 - - $ cd cld2-read-only/internal/ - $ ./compile_libs.sh - - -3. Put the resulting libraries somewhere your dynamic linker can find. - - $ cp *.so /usr/local/lib - -4. Run the unit tests - - $ cd ../.. - $ go test -v - === RUN TestCld2Tokenizer - --- PASS: TestCld2Tokenizer (0.03 seconds) - PASS - ok github.com/couchbaselabs/bleve/analysis/tokenizers/cld2 0.067s \ No newline at end of file diff --git a/analysis/tokenizers/cld2/cld2_tokenizer.go b/analysis/tokenizers/cld2/cld2_tokenizer.go deleted file mode 100644 index 4431fc4a..00000000 --- a/analysis/tokenizers/cld2/cld2_tokenizer.go +++ /dev/null @@ -1,41 +0,0 @@ -package cld2 - -// #cgo LDFLAGS: -Lcld2-read-only/internal/ -lcld2_full -// #include "cld2_tokenizer.h" -// #include -import "C" - -import ( - "unsafe" - - "github.com/couchbaselabs/bleve/analysis" -) - -type Cld2Tokenizer struct { -} - -func NewCld2Tokenizer() *Cld2Tokenizer { - return &Cld2Tokenizer{} -} - -func (rt *Cld2Tokenizer) Tokenize(input []byte) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - lang, err := rt.detectLanguage(input) - if err != nil { - return rv - } - token := analysis.Token{ - Term: lang, - Start: 0, - End: len(lang), - Position: 1, - } - rv = append(rv, &token) - return rv -} - -func (rt *Cld2Tokenizer) detectLanguage(input []byte) ([]byte, error) { - cstr := C.CString(string(input)) - res := C.DetectLang(cstr) - return C.GoBytes(unsafe.Pointer(res), C.int(C.strlen(res))), nil -} diff --git a/analysis/tokenizers/cld2/cld2_tokenizer.h b/analysis/tokenizers/cld2/cld2_tokenizer.h deleted file mode 100644 index 77bfa322..00000000 --- a/analysis/tokenizers/cld2/cld2_tokenizer.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -const char* DetectLang(const char *buffer); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - diff --git a/analysis/tokenizers/cld2/cld2_tokenizer_test.go b/analysis/tokenizers/cld2/cld2_tokenizer_test.go deleted file mode 100644 index 16209fe1..00000000 --- a/analysis/tokenizers/cld2/cld2_tokenizer_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package cld2 - -import ( - "reflect" - "testing" - - "github.com/couchbaselabs/bleve/analysis" -) - -func TestCld2Tokenizer(t *testing.T) { - tests := []struct { - input []byte - output analysis.TokenStream - }{ - { - input: []byte("the quick brown fox"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("en"), - Start: 0, - End: 2, - Position: 1, - }, - }, - }, - { - input: []byte("こんにちは世界"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("ja"), - Start: 0, - End: 2, - Position: 1, - }, - }, - }, - { - input: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("th"), - Start: 0, - End: 2, - Position: 1, - }, - }, - }, - { - input: []byte("مرحبا، العالم!"), - output: analysis.TokenStream{ - &analysis.Token{ - Term: []byte("ar"), - Start: 0, - End: 2, - Position: 1, - }, - }, - }, - } - - tokenizer := NewCld2Tokenizer() - for _, test := range tests { - res := tokenizer.Tokenize(test.input) - if !reflect.DeepEqual(res, test.output) { - t.Errorf("expected:") - for _, token := range test.output { - t.Errorf("%#v - %s", token, token.Term) - } - t.Errorf("got:") - for _, token := range res { - t.Errorf("%#v - %s", token, token.Term) - } - } - } - -} diff --git a/config.go b/config.go new file mode 100644 index 00000000..addb7033 --- /dev/null +++ b/config.go @@ -0,0 +1,172 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "fmt" + "regexp" + + "github.com/couchbaselabs/bleve/analysis" + + "github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter" + + "github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer" + "github.com/couchbaselabs/bleve/analysis/tokenizers/single_token" + "github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary" + + "github.com/couchbaselabs/bleve/analysis/token_filters/cld2" + "github.com/couchbaselabs/bleve/analysis/token_filters/length_filter" + "github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter" + "github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter" + "github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter" + + "github.com/couchbaselabs/bleve/search" +) + +type AnalysisConfig struct { + CharFilters map[string]analysis.CharFilter + Tokenizers map[string]analysis.Tokenizer + TokenFilters map[string]analysis.TokenFilter + Analyzers map[string]*analysis.Analyzer +} + +type HighlightConfig struct { + Highlighters map[string]search.Highlighter +} + +type Config struct { + Analysis *AnalysisConfig + DefaultAnalyzer *string + Highlight *HighlightConfig + DefaultHighlighter *string +} + +func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) { + rv := analysis.Analyzer{} + if len(charFilterNames) > 0 { + rv.CharFilters = make([]analysis.CharFilter, len(charFilterNames)) + for i, charFilterName := range charFilterNames { + charFilter := c.Analysis.CharFilters[charFilterName] + if charFilter == nil { + return nil, fmt.Errorf("no character filter named `%s` registered", charFilterName) + } + rv.CharFilters[i] = charFilter + } + } + rv.Tokenizer = c.Analysis.Tokenizers[tokenizerName] + if rv.Tokenizer == nil { + return nil, fmt.Errorf("no tokenizer named `%s` registered", tokenizerName) + } + if len(tokenFilterNames) > 0 { + rv.TokenFilters = make([]analysis.TokenFilter, len(tokenFilterNames)) + for i, tokenFilterName := range tokenFilterNames { + tokenFilter := c.Analysis.TokenFilters[tokenFilterName] + if tokenFilter == nil { + return nil, fmt.Errorf("no token filter named `%s` registered", tokenFilterName) + } + rv.TokenFilters[i] = tokenFilter + } + } + return &rv, nil +} + +func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer { + analyzer, err := c.BuildNewAnalyzer(charFilterNames, tokenizerName, tokenFilterNames) + if err != nil { + panic(err) + } + return analyzer +} + +func NewConfig() *Config { + return &Config{ + Analysis: &AnalysisConfig{ + CharFilters: make(map[string]analysis.CharFilter), + Tokenizers: make(map[string]analysis.Tokenizer), + TokenFilters: make(map[string]analysis.TokenFilter), + Analyzers: make(map[string]*analysis.Analyzer), + }, + Highlight: &HighlightConfig{ + Highlighters: make(map[string]search.Highlighter), + }, + } +} + +var config *Config + +func init() { + + // build the default configuration + config = NewConfig() + + // register char filters + htmlCharFilterRegexp := regexp.MustCompile(`\s]+))?)+\s*|\s*)/?>`) + htmlCharFilter := regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, []byte{' '}) + config.Analysis.CharFilters["html"] = htmlCharFilter + + // register tokenizers + whitespaceTokenizerRegexp := regexp.MustCompile(`\w+`) + config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer() + config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer() + config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH") + config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp) + + // register token filters + config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter() + config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1) + config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255) + config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter() + config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish") + config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch") + config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english") + config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish") + config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french") + config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german") + config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian") + config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian") + config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian") + config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter") + config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese") + config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian") + config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian") + config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish") + config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish") + config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish") + config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter() + + // register analyzers + keywordAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{}) + config.Analysis.Analyzers["keyword"] = keywordAnalyzer + simpleAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"}) + config.Analysis.Analyzers["simple"] = simpleAnalyzer + standardAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"}) + config.Analysis.Analyzers["standard"] = standardAnalyzer + englishAnalyzer := config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"}) + config.Analysis.Analyzers["english"] = englishAnalyzer + detectLangAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"}) + config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer + + // register ansi highlighter + config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter() + + // register html highlighter + htmlFormatter := search.NewHTMLFragmentFormatterCustom(``, ``) + htmlHighlighter := search.NewSimpleHighlighter() + htmlHighlighter.SetFragmentFormatter(htmlFormatter) + config.Highlight.Highlighters["html"] = htmlHighlighter + + // set the default analyzer + simpleAnalyzerName := "simple" + config.DefaultAnalyzer = &simpleAnalyzerName + + // set the default highlighter + htmlHighlighterName := "html" + config.DefaultHighlighter = &htmlHighlighterName + +} diff --git a/document/document.go b/document/document.go index e304185b..cc67a1f3 100644 --- a/document/document.go +++ b/document/document.go @@ -9,7 +9,8 @@ package document import ( - "encoding/json" + "fmt" + "log" ) type Document struct { @@ -36,7 +37,21 @@ func (d *Document) AddField(f Field) *Document { return d } -func (d *Document) String() string { - bytes, _ := json.MarshalIndent(d, "", " ") - return string(bytes) +func (d *Document) GoString() string { + fields := "" + for i, field := range d.Fields { + if i != 0 { + fields += ", " + } + fields += fmt.Sprintf("%#v", field) + } + compositeFields := "" + for i, field := range d.CompositeFields { + log.Printf("see composite field") + if i != 0 { + compositeFields += ", " + } + compositeFields += fmt.Sprintf("%#v", field) + } + return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields) } diff --git a/document/field_composite.go b/document/field_composite.go index 2d60e3b8..d7db5bb9 100644 --- a/document/field_composite.go +++ b/document/field_composite.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package document import ( diff --git a/document/field_text.go b/document/field_text.go index f62edfd3..e883962c 100644 --- a/document/field_text.go +++ b/document/field_text.go @@ -9,22 +9,11 @@ package document import ( - "log" + "fmt" "github.com/couchbaselabs/bleve/analysis" - "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer" ) -var standardAnalyzer *analysis.Analyzer - -func init() { - var err error - standardAnalyzer, err = standard_analyzer.NewStandardAnalyzer() - if err != nil { - log.Fatal(err) - } -} - const DEFAULT_TEXT_INDEXING_OPTIONS = INDEX_FIELD type TextField struct { @@ -43,7 +32,19 @@ func (t *TextField) Options() IndexingOptions { } func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { - tokens := t.analyzer.Analyze(t.Value()) + var tokens analysis.TokenStream + if t.analyzer != nil { + tokens = t.analyzer.Analyze(t.Value()) + } else { + tokens = analysis.TokenStream{ + &analysis.Token{ + Start: 0, + End: len(t.value), + Term: t.value, + Position: 1, + }, + } + } fieldLength := len(tokens) // number of tokens in this doc field tokenFreqs := analysis.TokenFrequency(tokens) return fieldLength, tokenFreqs @@ -53,15 +54,27 @@ func (t *TextField) Value() []byte { return t.value } +func (t *TextField) GoString() string { + return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s}", t.name, t.options, t.analyzer, t.value) +} + func NewTextField(name string, value []byte) *TextField { return NewTextFieldWithIndexingOptions(name, value, DEFAULT_TEXT_INDEXING_OPTIONS) } func NewTextFieldWithIndexingOptions(name string, value []byte, options IndexingOptions) *TextField { + return &TextField{ + name: name, + options: options, + value: value, + } +} + +func NewTextFieldWithAnalyzer(name string, value []byte, analyzer *analysis.Analyzer) *TextField { return &TextField{ name: name, - options: options, - analyzer: standardAnalyzer, + options: DEFAULT_TEXT_INDEXING_OPTIONS, + analyzer: analyzer, value: value, } } diff --git a/document/indexing_options.go b/document/indexing_options.go index ca83d04b..2507a4c7 100644 --- a/document/indexing_options.go +++ b/document/indexing_options.go @@ -27,3 +27,23 @@ func (o IndexingOptions) IsStored() bool { func (o IndexingOptions) IncludeTermVectors() bool { return o&INCLUDE_TERM_VECTORS != 0 } + +func (o IndexingOptions) String() string { + rv := "" + if o.IsIndexed() { + rv += "INDEXED" + } + if o.IsStored() { + if rv != "" { + rv += ", " + } + rv += "STORE" + } + if o.IncludeTermVectors() { + if rv != "" { + rv += ", " + } + rv += "TV" + } + return rv +} diff --git a/document/mapping.go b/document/mapping.go deleted file mode 100644 index fe178d61..00000000 --- a/document/mapping.go +++ /dev/null @@ -1,13 +0,0 @@ -package document - -import ( - "github.com/couchbaselabs/bleve/analysis" -) - -type FieldMapping struct { - Name string - Options IndexingOptions - Analyzer *analysis.Analyzer -} - -type Mapping map[string]*FieldMapping diff --git a/analysis/analyzers/keyword_analyzer/keyword.go b/error.go similarity index 60% rename from analysis/analyzers/keyword_analyzer/keyword.go rename to error.go index 0927fe3b..bbad9448 100644 --- a/analysis/analyzers/keyword_analyzer/keyword.go +++ b/error.go @@ -6,19 +6,19 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package keyword_analyzer +package bleve -import ( - "github.com/couchbaselabs/bleve/analysis" - "github.com/couchbaselabs/bleve/analysis/tokenizers/single_token" +const ( + ERROR_NO_ID Error = iota + ERROR_NO_TYPE ) -func NewKeywordAnalyzer() (*analysis.Analyzer, error) { - keyword := analysis.Analyzer{ - CharFilters: []analysis.CharFilter{}, - Tokenizer: single_token.NewSingleTokenTokenizer(), - TokenFilters: []analysis.TokenFilter{}, - } +type Error int - return &keyword, nil +func (e Error) Error() string { + return errorMessages[int(e)] +} + +var errorMessages = map[int]string{ + 0: "unable to determine document id", } diff --git a/examples/bleve_index_json/main.go b/examples/bleve_index_json/main.go index ba41f6bc..206734e1 100644 --- a/examples/bleve_index_json/main.go +++ b/examples/bleve_index_json/main.go @@ -13,67 +13,64 @@ import ( "io/ioutil" "log" - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index/store/leveldb" - "github.com/couchbaselabs/bleve/index/upside_down" - "github.com/couchbaselabs/bleve/shredder" + "github.com/couchbaselabs/bleve" ) var jsonDir = flag.String("jsonDir", "json", "json directory") var indexDir = flag.String("indexDir", "index", "index directory") -var storeFields = flag.Bool("storeFields", false, "store field data") -var includeTermVectors = flag.Bool("includeTermVectors", false, "include term vectors") func main() { flag.Parse() - indexOptions := document.INDEX_FIELD - if *storeFields { - indexOptions |= document.STORE_FIELD - } - if *includeTermVectors { - indexOptions |= document.INCLUDE_TERM_VECTORS - } + // create a new default mapping + mapping := bleve.NewIndexMapping() - // create a automatic JSON document shredder - jsonShredder := shredder.NewAutoJsonShredderWithOptions(indexOptions) - - // create a new index - store, err := leveldb.Open(*indexDir) - if err != nil { - log.Fatal(err) - } - index := upside_down.NewUpsideDownCouch(store) - err = index.Open() + // open the index + index, err := bleve.Open(*indexDir, mapping) if err != nil { log.Fatal(err) } defer index.Close() - // open the directory - dirEntries, err := ioutil.ReadDir(*jsonDir) - if err != nil { - log.Fatal(err) - } - - // walk the directory entries - for _, dirEntry := range dirEntries { - // read the bytes - jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name()) - if err != nil { - log.Fatal(err) - } - // shred them into a document - doc, err := jsonShredder.Shred(dirEntry.Name(), jsonBytes) - if err != nil { - log.Fatal(err) - } - //log.Printf("%+v", doc) - // update the index - err = index.Update(doc) + for jsonFile := range walkDirectory(*jsonDir) { + // index the json files + err = index.IndexJSONID(jsonFile.filename, jsonFile.contents) if err != nil { log.Fatal(err) } } } + +type jsonFile struct { + filename string + contents []byte +} + +func walkDirectory(dir string) chan jsonFile { + rv := make(chan jsonFile) + go func() { + defer close(rv) + + // open the directory + dirEntries, err := ioutil.ReadDir(*jsonDir) + if err != nil { + log.Fatal(err) + } + + // walk the directory entries + for _, dirEntry := range dirEntries { + // read the bytes + jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name()) + if err != nil { + log.Fatal(err) + } + + rv <- jsonFile{ + filename: dirEntry.Name(), + contents: jsonBytes, + } + } + }() + return rv +} diff --git a/examples/bleve_query/main.go b/examples/bleve_query/main.go index ee3710f7..ab2321fb 100644 --- a/examples/bleve_query/main.go +++ b/examples/bleve_query/main.go @@ -12,85 +12,50 @@ import ( "flag" "fmt" "log" + "strings" - "github.com/couchbaselabs/bleve/index/store/leveldb" - "github.com/couchbaselabs/bleve/index/upside_down" - "github.com/couchbaselabs/bleve/search" + "github.com/couchbaselabs/bleve" ) -var field = flag.String("field", "description", "field to query") +var field = flag.String("field", "_all", "field to query") var indexDir = flag.String("indexDir", "index", "index directory") var limit = flag.Int("limit", 10, "limit to first N results") -var includeHighlights = flag.Bool("highlight", false, "highlight matches") +var skip = flag.Int("skip", 0, "skip the first N results") +var explain = flag.Bool("explain", false, "explain scores") +var includeHighlights = flag.Bool("highlight", true, "highlight matches") func main() { flag.Parse() if flag.NArg() < 1 { - log.Fatal("Specify search term") + log.Fatal("Specify search query") } + // create a new default mapping + mapping := bleve.NewIndexMapping() + // open index - store, err := leveldb.Open(*indexDir) - if err != nil { - log.Fatal(err) - } - index := upside_down.NewUpsideDownCouch(store) - err = index.Open() + index, err := bleve.Open(*indexDir, mapping) if err != nil { log.Fatal(err) } defer index.Close() - tq := search.TermQuery{ - Term: flag.Arg(0), - Field: *field, - BoostVal: 1.0, - Explain: true, + // build a search with the provided parameters + queryString := strings.Join(flag.Args(), " ") + query := bleve.NewSyntaxQuery(queryString) + searchRequest := bleve.NewSearchRequest(query, *limit, *skip, *explain) + + // enable highlights if requested + if *includeHighlights { + searchRequest.Highlight = bleve.NewHighlightWithStyle("ansi") } - collector := search.NewTopScorerCollector(*limit) - searcher, err := tq.Searcher(index) - if err != nil { - log.Fatalf("searcher error: %v", err) - return - } - err = collector.Collect(searcher) + + // execute the search + searchResult, err := index.Search(searchRequest) if err != nil { log.Fatalf("search error: %v", err) - return - } - results := collector.Results() - if len(results) == 0 { - fmt.Printf("No matches\n") - } else { - last := uint64(*limit) - if searcher.Count() < last { - last = searcher.Count() - } - fmt.Printf("%d matches, showing %d through %d\n", searcher.Count(), 1, last) - for i, result := range results { - fmt.Printf("%2d. %s (%f)\n", i+1, result.ID, result.Score) - if *includeHighlights { - highlighter := search.NewSimpleHighlighter() - - doc, err := index.Document(result.ID) - if err != nil { - fmt.Print(err) - return - } - - fragments := highlighter.BestFragmentsInField(result, doc, *field, 5) - for _, fragment := range fragments { - fmt.Printf("\t%s\n", fragment) - } - if len(fragments) == 0 { - for _, f := range doc.Fields { - fmt.Printf("\tfield: %s\n", f) - } - } - - } - } } + fmt.Println(searchResult) } diff --git a/search/build.sh b/genparser.sh similarity index 75% rename from search/build.sh rename to genparser.sh index 04232e9e..91f8d8ad 100755 --- a/search/build.sh +++ b/genparser.sh @@ -4,5 +4,3 @@ echo Running nex... nex query_syntax.nex echo Running goyacc... go tool yacc query_syntax.y -echo Running go build... -go build diff --git a/http/debug.go b/http/debug.go new file mode 100644 index 00000000..17b44e3e --- /dev/null +++ b/http/debug.go @@ -0,0 +1,67 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package http + +import ( + "fmt" + "net/http" + + "github.com/couchbaselabs/bleve/index/upside_down" + "github.com/gorilla/mux" +) + +// DebugDocumentHandler allows you to debug the index content +// for a given document id. the document ID should be mapped +// to the mux router URL with name "docId" +type DebugDocumentHandler struct { + defaultIndexName string +} + +func NewDebugDocumentHandler(defaultIndexName string) *DebugDocumentHandler { + return &DebugDocumentHandler{ + defaultIndexName: defaultIndexName, + } +} + +func (h *DebugDocumentHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { + + // find the index to operate on + indexName := mux.Vars(req)["indexName"] + if indexName == "" { + indexName = h.defaultIndexName + } + index := IndexByName(indexName) + if index == nil { + showError(w, req, fmt.Sprintf("no such index '%s'", indexName), 404) + return + } + + // find the docID + docID := mux.Vars(req)["docID"] + rows, err := index.DumpDoc(docID) + if err != nil { + showError(w, req, fmt.Sprintf("error debugging document: %v", err), 500) + return + } + rv := make([]interface{}, 0) + for _, row := range rows { + udcRow, ok := row.(upside_down.UpsideDownCouchRow) + if ok { + tmp := struct { + Key []byte `json:"key"` + Val []byte `json:"val"` + }{ + Key: udcRow.Key(), + Val: udcRow.Value(), + } + rv = append(rv, tmp) + } + } + mustEncode(w, rv) +} diff --git a/http/registry.go b/http/registry.go new file mode 100644 index 00000000..734b55a0 --- /dev/null +++ b/http/registry.go @@ -0,0 +1,35 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package http + +import ( + "sync" + + "github.com/couchbaselabs/bleve" +) + +var indexNameMapping map[string]bleve.Index +var indexNameMappingLock sync.RWMutex + +func RegisterIndexName(name string, index bleve.Index) { + indexNameMappingLock.Lock() + defer indexNameMappingLock.Unlock() + + if indexNameMapping == nil { + indexNameMapping = make(map[string]bleve.Index) + } + indexNameMapping[name] = index +} + +func IndexByName(name string) bleve.Index { + indexNameMappingLock.RLock() + defer indexNameMappingLock.RUnlock() + + return indexNameMapping[name] +} diff --git a/http/search.go b/http/search.go new file mode 100644 index 00000000..3588fd5e --- /dev/null +++ b/http/search.go @@ -0,0 +1,85 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package http + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "log" + "net/http" + + "github.com/gorilla/mux" + + "github.com/couchbaselabs/bleve" +) + +// SearchHandler can handle search requests sent over HTTP +// the index name can be selected in the URL by mapping a +// gorilla mux var, or it can be set manually with by +// setting the defaultIndex value +type SearchHandler struct { + defaultIndexName string +} + +func NewSearchHandler(defaultIndexName string) *SearchHandler { + return &SearchHandler{ + defaultIndexName: defaultIndexName, + } +} + +func (h *SearchHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { + + // find the index to operate on + indexName := mux.Vars(req)["indexName"] + if indexName == "" { + indexName = h.defaultIndexName + } + index := IndexByName(indexName) + if index == nil { + showError(w, req, fmt.Sprintf("no such index '%s'", indexName), 404) + return + } + + // read the request body + requestBody, err := ioutil.ReadAll(req.Body) + if err != nil { + showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400) + return + } + + log.Printf("request body: %s", requestBody) + + // parse the request + var searchRequest bleve.SearchRequest + err = json.Unmarshal(requestBody, &searchRequest) + if err != nil { + showError(w, req, fmt.Sprintf("error parsing query: %v", err), 400) + return + } + + log.Printf("parsed request %#v", searchRequest) + + // varlidate the query + err = searchRequest.Query.Validate() + if err != nil { + showError(w, req, fmt.Sprintf("error validating query: %v", err), 400) + return + } + + // execute the query + searchResponse, err := index.Search(&searchRequest) + if err != nil { + showError(w, req, fmt.Sprintf("error executing query: %v", err), 500) + return + } + + // encode the response + mustEncode(w, searchResponse) +} diff --git a/http/util.go b/http/util.go new file mode 100644 index 00000000..e2a701ca --- /dev/null +++ b/http/util.go @@ -0,0 +1,34 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package http + +import ( + "encoding/json" + "io" + "log" + "net/http" +) + +func showError(w http.ResponseWriter, r *http.Request, + msg string, code int) { + log.Printf("Reporting error %v/%v", code, msg) + http.Error(w, msg, code) +} + +func mustEncode(w io.Writer, i interface{}) { + if headered, ok := w.(http.ResponseWriter); ok { + headered.Header().Set("Cache-Control", "no-cache") + headered.Header().Set("Content-type", "application/json") + } + + e := json.NewEncoder(w) + if err := e.Encode(i); err != nil { + panic(err) + } +} diff --git a/index.go b/index.go new file mode 100644 index 00000000..a454d1e2 --- /dev/null +++ b/index.go @@ -0,0 +1,47 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "github.com/couchbaselabs/bleve/document" +) + +type Identifier interface { + ID() string +} + +type Classifier interface { + Type() string +} + +type Index interface { + Index(data interface{}) error + IndexID(id string, data interface{}) error + + IndexJSON(data []byte) error + IndexJSONID(id string, data []byte) error + + Delete(data interface{}) error + DeleteID(id string) error + + Document(id string) (*document.Document, error) + DocCount() uint64 + + Search(req *SearchRequest) (*SearchResult, error) + + DumpDoc(id string) ([]interface{}, error) + + Close() +} + +// Open the index at the specified path, and create it if it does not exist. +// The provided mapping will be used for all Index/Search operations. +func Open(path string, mapping *IndexMapping) (Index, error) { + return newIndex(path, mapping) +} diff --git a/index/store/goforestdb/batch.go b/index/store/goforestdb/batch.go index 4d4d7c8f..c73d603d 100644 --- a/index/store/goforestdb/batch.go +++ b/index/store/goforestdb/batch.go @@ -1,3 +1,12 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + // +build forestdb package goforestdb diff --git a/index/store/goforestdb/iterator.go b/index/store/goforestdb/iterator.go index 64761442..e5f10bef 100644 --- a/index/store/goforestdb/iterator.go +++ b/index/store/goforestdb/iterator.go @@ -1,3 +1,12 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + // +build forestdb package goforestdb diff --git a/index/store/goforestdb/store.go b/index/store/goforestdb/store.go index fe502b45..671bf87c 100644 --- a/index/store/goforestdb/store.go +++ b/index/store/goforestdb/store.go @@ -1,3 +1,12 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + // +build forestdb package goforestdb diff --git a/index/store/goforestdb/store_test.go b/index/store/goforestdb/store_test.go index a171a936..9b5d5931 100644 --- a/index/store/goforestdb/store_test.go +++ b/index/store/goforestdb/store_test.go @@ -1,3 +1,12 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + // +build forestdb package goforestdb diff --git a/index/store/gouchstore/batch.go b/index/store/gouchstore/batch.go index c11e6e50..e706c6c0 100644 --- a/index/store/gouchstore/batch.go +++ b/index/store/gouchstore/batch.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package gouchstore import ( diff --git a/index/store/gouchstore/iterator.go b/index/store/gouchstore/iterator.go index fa6177a4..dada07b1 100644 --- a/index/store/gouchstore/iterator.go +++ b/index/store/gouchstore/iterator.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package gouchstore import ( diff --git a/index/store/gouchstore/store.go b/index/store/gouchstore/store.go index 67250647..e8b01e6d 100644 --- a/index/store/gouchstore/store.go +++ b/index/store/gouchstore/store.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package gouchstore import ( diff --git a/index/store/gouchstore/store_test.go b/index/store/gouchstore/store_test.go index fceab649..dd2ce0f7 100644 --- a/index/store/gouchstore/store_test.go +++ b/index/store/gouchstore/store_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package gouchstore import ( diff --git a/index/store/gouchstore/util.go b/index/store/gouchstore/util.go index 7e8fcb6b..d857815b 100644 --- a/index/store/gouchstore/util.go +++ b/index/store/gouchstore/util.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package gouchstore import ( diff --git a/index/store/inmem/batch.go b/index/store/inmem/batch.go index 46bca333..a657115b 100644 --- a/index/store/inmem/batch.go +++ b/index/store/inmem/batch.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package inmem type InMemBatch struct { diff --git a/index/store/inmem/iterator.go b/index/store/inmem/iterator.go index ee652f40..54a7044e 100644 --- a/index/store/inmem/iterator.go +++ b/index/store/inmem/iterator.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package inmem import ( diff --git a/index/store/inmem/store.go b/index/store/inmem/store.go index d6168e6e..986f41bd 100644 --- a/index/store/inmem/store.go +++ b/index/store/inmem/store.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package inmem import ( diff --git a/index/store/inmem/store_test.go b/index/store/inmem/store_test.go index 77b222bf..a01f005d 100644 --- a/index/store/inmem/store_test.go +++ b/index/store/inmem/store_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package inmem import ( diff --git a/index/store/kvstore.go b/index/store/kvstore.go index 295e7494..086504ed 100644 --- a/index/store/kvstore.go +++ b/index/store/kvstore.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package store type KVBatch interface { diff --git a/index/store/leveldb/batch.go b/index/store/leveldb/batch.go index 5e7115f8..c2986d4c 100644 --- a/index/store/leveldb/batch.go +++ b/index/store/leveldb/batch.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package leveldb import ( diff --git a/index/store/leveldb/iterator.go b/index/store/leveldb/iterator.go index f09629be..06c8d155 100644 --- a/index/store/leveldb/iterator.go +++ b/index/store/leveldb/iterator.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package leveldb import ( diff --git a/index/store/leveldb/store.go b/index/store/leveldb/store.go index 2473c077..4735fe1a 100644 --- a/index/store/leveldb/store.go +++ b/index/store/leveldb/store.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package leveldb import ( diff --git a/index/store/leveldb/store_test.go b/index/store/leveldb/store_test.go index 45d6f262..6432bd29 100644 --- a/index/store/leveldb/store_test.go +++ b/index/store/leveldb/store_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package leveldb import ( diff --git a/index/store/leveldb/util.go b/index/store/leveldb/util.go index a539a29d..4f4ae893 100644 --- a/index/store/leveldb/util.go +++ b/index/store/leveldb/util.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package leveldb import ( diff --git a/index/store/test/common.go b/index/store/test/common.go index acd0fee1..ee254152 100644 --- a/index/store/test/common.go +++ b/index/store/test/common.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package store_test import ( diff --git a/index/upside_down/benchmark_common_test.go b/index/upside_down/benchmark_common_test.go index ff9a854a..53128cfe 100644 --- a/index/upside_down/benchmark_common_test.go +++ b/index/upside_down/benchmark_common_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package upside_down import ( diff --git a/index/upside_down/benchmark_forestdb_test.go b/index/upside_down/benchmark_forestdb_test.go index f3adf467..d2126791 100644 --- a/index/upside_down/benchmark_forestdb_test.go +++ b/index/upside_down/benchmark_forestdb_test.go @@ -1,3 +1,12 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + // +build forestdb package upside_down diff --git a/index/upside_down/benchmark_gouchstore_test.go b/index/upside_down/benchmark_gouchstore_test.go index 9cca9d2e..c6307a6f 100644 --- a/index/upside_down/benchmark_gouchstore_test.go +++ b/index/upside_down/benchmark_gouchstore_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package upside_down import ( diff --git a/index/upside_down/benchmark_inmem_test.go b/index/upside_down/benchmark_inmem_test.go index 24f03924..49b2be04 100644 --- a/index/upside_down/benchmark_inmem_test.go +++ b/index/upside_down/benchmark_inmem_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package upside_down import ( diff --git a/index/upside_down/benchmark_leveldb_test.go b/index/upside_down/benchmark_leveldb_test.go index ef5f08c2..7429edc3 100644 --- a/index/upside_down/benchmark_leveldb_test.go +++ b/index/upside_down/benchmark_leveldb_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package upside_down import ( diff --git a/index/upside_down/reader_test.go b/index/upside_down/reader_test.go index f5694abc..2054afc5 100644 --- a/index/upside_down/reader_test.go +++ b/index/upside_down/reader_test.go @@ -13,7 +13,6 @@ import ( "reflect" "testing" - _ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer" "github.com/couchbaselabs/bleve/document" "github.com/couchbaselabs/bleve/index" "github.com/couchbaselabs/bleve/index/store/gouchstore" @@ -40,8 +39,8 @@ func TestIndexReader(t *testing.T) { expectedCount += 1 doc = document.NewDocument("2") - doc.AddField(document.NewTextField("name", []byte("test test test"))) - doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS)) + doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test test test"), testAnalyzer)) + doc.AddField(document.NewTextFieldCustom("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index a62d10b3..5308198f 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -486,6 +486,30 @@ func (udc *UpsideDownCouch) Dump() { } } +func (udc *UpsideDownCouch) DumpFields() { + it := udc.store.Iterator([]byte{'f'}) + defer it.Close() + key, val, valid := it.Current() + for valid { + if !bytes.HasPrefix(key, []byte{'f'}) { + break + } + + row, err := ParseFromKeyValue(key, val) + if err != nil { + fmt.Printf("error parsing key/value: %v", err) + return + } + if row != nil { + fmt.Printf("%v\n", row) + fmt.Printf("Key: % -100x\nValue: % -100x\n\n", key, val) + } + + it.Next() + key, val, valid = it.Current() + } +} + type keyset [][]byte func (k keyset) Len() int { return len(k) } diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index 898904b5..f8a869fd 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -10,13 +10,19 @@ package upside_down import ( "os" + "regexp" "testing" - _ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer" + "github.com/couchbaselabs/bleve/analysis" + "github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer" "github.com/couchbaselabs/bleve/document" "github.com/couchbaselabs/bleve/index/store/gouchstore" ) +var testAnalyzer = &analysis.Analyzer{ + Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)), +} + func TestIndexOpenReopen(t *testing.T) { defer os.RemoveAll("test") @@ -180,7 +186,7 @@ func TestIndexInsertThenUpdate(t *testing.T) { // this update should overwrite one term, and introduce one new one doc = document.NewDocument("1") - doc.AddField(document.NewTextField("name", []byte("test fail"))) + doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test fail"), testAnalyzer)) err = idx.Update(doc) if err != nil { t.Errorf("Error deleting entry from index: %v", err) diff --git a/index_impl.go b/index_impl.go new file mode 100644 index 00000000..e6324dc7 --- /dev/null +++ b/index_impl.go @@ -0,0 +1,187 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" + "fmt" + + "github.com/couchbaselabs/bleve/document" + "github.com/couchbaselabs/bleve/index" + "github.com/couchbaselabs/bleve/index/store" + "github.com/couchbaselabs/bleve/index/store/leveldb" + "github.com/couchbaselabs/bleve/index/upside_down" + "github.com/couchbaselabs/bleve/search" +) + +type indexImpl struct { + s store.KVStore + i index.Index + m *IndexMapping +} + +func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) { + store, err := leveldb.Open(path) + if err != nil { + return nil, err + } + idx := upside_down.NewUpsideDownCouch(store) + err = idx.Open() + if err != nil { + return nil, err + } + return &indexImpl{ + s: store, + i: idx, + m: mapping, + }, nil +} + +// Index the provided data. +func (i *indexImpl) Index(data interface{}) error { + id, ok := i.determineID(data) + if ok { + return i.IndexID(id, data) + } + + return ERROR_NO_ID +} + +func (i *indexImpl) IndexID(id string, data interface{}) error { + doc := document.NewDocument(id) + err := i.m.MapDocument(doc, data) + if err != nil { + return err + } + err = i.i.Update(doc) + if err != nil { + return err + } + return nil +} + +func (i *indexImpl) IndexJSON(data []byte) error { + var obj interface{} + err := json.Unmarshal(data, &obj) + if err != nil { + return err + } + return i.Index(obj) +} + +func (i *indexImpl) IndexJSONID(id string, data []byte) error { + var obj interface{} + err := json.Unmarshal(data, &obj) + if err != nil { + return err + } + return i.IndexID(id, obj) +} + +func (i *indexImpl) Delete(data interface{}) error { + id, ok := i.determineID(data) + if ok { + return i.DeleteID(id) + } + + return ERROR_NO_ID +} + +func (i *indexImpl) DeleteID(id string) error { + err := i.i.Delete(id) + if err != nil { + return err + } + return nil +} + +func (i *indexImpl) Document(id string) (*document.Document, error) { + return i.i.Document(id) +} + +func (i *indexImpl) DocCount() uint64 { + return i.i.DocCount() +} + +func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) { + collector := search.NewTopScorerSkipCollector(req.Size, req.From) + searcher, err := req.Query.Searcher(i, req.Explain) + if err != nil { + return nil, err + } + err = collector.Collect(searcher) + if err != nil { + return nil, err + } + + hits := collector.Results() + + if req.Highlight != nil { + // get the right highlighter + highlighter := config.Highlight.Highlighters[*config.DefaultHighlighter] + if req.Highlight.Style != nil { + highlighter = config.Highlight.Highlighters[*req.Highlight.Style] + if highlighter == nil { + return nil, fmt.Errorf("no highlighter named `%s` registered", req.Highlight.Style) + } + } + + for _, hit := range hits { + doc, err := i.Document(hit.ID) + if err == nil { + highlightFields := req.Highlight.Fields + if highlightFields == nil { + // add all fields with matches + highlightFields = make([]string, 0, len(hit.Locations)) + for k, _ := range hit.Locations { + highlightFields = append(highlightFields, k) + } + } + + for _, hf := range highlightFields { + highlighter.BestFragmentsInField(hit, doc, hf, 3) + } + } + } + } + + return &SearchResult{ + Request: req, + Hits: hits, + Total: collector.Total(), + MaxScore: collector.MaxScore(), + Took: collector.Took(), + }, nil +} + +func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) { + return i.i.DumpDoc(id) +} + +func (i *indexImpl) Close() { + i.i.Close() +} + +func (i *indexImpl) determineID(data interface{}) (string, bool) { + // first see if the object implements Identifier + identifier, ok := data.(Identifier) + if ok { + return identifier.ID(), true + } + + // now see if we can find an ID using the mapping + if i.m.IdField != nil { + id, ok := mustString(lookupPropertyPath(data, *i.m.IdField)) + if ok { + return id, true + } + } + + return "", false +} diff --git a/index_test.go b/index_test.go new file mode 100644 index 00000000..7c2be20e --- /dev/null +++ b/index_test.go @@ -0,0 +1,90 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "os" + "testing" +) + +type Address struct { + Street string `json:"street"` + City string `json:"city"` + State string `json:"state"` + Zip string `json:"zip"` +} + +type Person struct { + Identifier string `json:"id"` + Name string `json:"name"` + Address *Address `json:"address"` + Hideouts []*Address `json:"hideouts"` + Tags []string `json:"tags"` +} + +func (p *Person) ID() string { + return p.Identifier +} + +func (p *Person) Type() string { + return "person" +} + +// FIXME needs more assertions +func TestIndex(t *testing.T) { + defer os.RemoveAll("testidx") + + nameMapping := NewDocumentMapping(). + AddFieldMapping(NewFieldMapping("", "text", "standard", true, true, true, true)) + + tagsMapping := NewDocumentMapping(). + AddFieldMapping(NewFieldMapping("", "text", "standard", true, true, true, false)) + personMapping := NewDocumentMapping(). + AddSubDocumentMapping("name", nameMapping). + AddSubDocumentMapping("id", NewDocumentDisabledMapping()). + AddSubDocumentMapping("tags", tagsMapping) + + mapping := NewIndexMapping(). + AddDocumentMapping("person", personMapping) + index, err := Open("testidx", mapping) + if err != nil { + t.Fatal(err) + } + + obj := Person{ + Identifier: "a", + Name: "marty", + Address: &Address{ + Street: "123 Sesame St.", + City: "Garden", + State: "MIND", + Zip: "12345", + }, + Hideouts: []*Address{ + &Address{ + Street: "999 Gopher St.", + City: "Denver", + State: "CO", + Zip: "86753", + }, + &Address{ + Street: "88 Rusty Ln.", + City: "Amsterdam", + State: "CA", + Zip: "09090", + }, + }, + Tags: []string{"amped", "bogus", "gnarley", "tubed"}, + } + + err = index.Index(&obj) + if err != nil { + t.Error(err) + } +} diff --git a/mapping_document.go b/mapping_document.go new file mode 100644 index 00000000..9fd1bb0e --- /dev/null +++ b/mapping_document.go @@ -0,0 +1,131 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" + "fmt" + + "github.com/couchbaselabs/bleve/analysis" +) + +type DocumentMapping struct { + Enabled *bool `json:"enabled"` + Dynamic *bool `json:"dynamic"` + Properties map[string]*DocumentMapping `json:"properties"` + Fields []*FieldMapping `json:"fields"` + DefaultAnalyzer *string `json:"default_analyzer"` +} + +func (dm *DocumentMapping) GoString() string { + return fmt.Sprintf(" &bleve.DocumentMapping{Enabled:%t, Dynamic:%t, Properties:%#v, Fields:%#v}", *dm.Enabled, *dm.Dynamic, dm.Properties, dm.Fields) +} + +func (dm *DocumentMapping) DocumentMappingForPath(path string) *DocumentMapping { + pathElements := decodePath(path) + current := dm + for _, pathElement := range pathElements { + var ok bool + current, ok = current.Properties[pathElement] + if !ok { + return nil + } + } + return current +} + +func NewDocumentMapping() *DocumentMapping { + return &DocumentMapping{ + Enabled: &tRUE, + Dynamic: &tRUE, + } +} + +func NewDocumentStaticMapping() *DocumentMapping { + return &DocumentMapping{ + Enabled: &tRUE, + Dynamic: &fALSE, + } +} + +func NewDocumentDisabledMapping() *DocumentMapping { + return &DocumentMapping{ + Enabled: &fALSE, + } +} + +func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) *DocumentMapping { + if dm.Properties == nil { + dm.Properties = make(map[string]*DocumentMapping) + } + dm.Properties[property] = sdm + return dm +} + +func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) *DocumentMapping { + if dm.Fields == nil { + dm.Fields = make([]*FieldMapping, 0) + } + dm.Fields = append(dm.Fields, fm) + return dm +} + +func (dm *DocumentMapping) UnmarshalJSON(data []byte) error { + var tmp struct { + Enabled *bool `json:"enabled"` + Dynamic *bool `json:"dynamic"` + Properties map[string]*DocumentMapping `json:"properties"` + Fields []*FieldMapping `json:"fields"` + DefaultAnalyzer *string `json:"default_analyzer"` + } + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + dm.Enabled = &tRUE + if tmp.Enabled != nil { + dm.Enabled = tmp.Enabled + } + dm.Dynamic = &tRUE + if tmp.Dynamic != nil { + dm.Dynamic = tmp.Dynamic + } + if tmp.DefaultAnalyzer != nil { + dm.DefaultAnalyzer = tmp.DefaultAnalyzer + } + if tmp.Properties != nil { + dm.Properties = make(map[string]*DocumentMapping, len(tmp.Properties)) + } + for propName, propMapping := range tmp.Properties { + dm.Properties[propName] = propMapping + } + if tmp.Fields != nil { + dm.Fields = make([]*FieldMapping, len(tmp.Fields)) + } + for i, field := range tmp.Fields { + dm.Fields[i] = field + } + return nil +} + +func (dm *DocumentMapping) defaultAnalyzer(path []string) *analysis.Analyzer { + var rv *analysis.Analyzer + current := dm + for _, pathElement := range path { + var ok bool + current, ok = current.Properties[pathElement] + if !ok { + break + } + if current.DefaultAnalyzer != nil { + rv = config.Analysis.Analyzers[*current.DefaultAnalyzer] + } + } + return rv +} diff --git a/mapping_field.go b/mapping_field.go new file mode 100644 index 00000000..93c77a73 --- /dev/null +++ b/mapping_field.go @@ -0,0 +1,55 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "fmt" + + "github.com/couchbaselabs/bleve/document" +) + +type FieldMapping struct { + Name *string `json:"name"` + Type *string `json:"type"` + Analyzer *string `json:"analyzer"` + Store *bool `json:"store"` + Index *bool `json:"index"` + IncludeTermVectors *bool `json:"include_term_vectors"` + IncludeInAll *bool `json:"include_in_all"` +} + +func NewFieldMapping(name, typ, analyzer string, store, index bool, includeTermVectors bool, includeInAll bool) *FieldMapping { + return &FieldMapping{ + Name: &name, + Type: &typ, + Analyzer: &analyzer, + Store: &store, + Index: &index, + IncludeTermVectors: &includeTermVectors, + IncludeInAll: &includeInAll, + } +} + +func (fm *FieldMapping) Options() document.IndexingOptions { + var rv document.IndexingOptions + if *fm.Store { + rv |= document.STORE_FIELD + } + if *fm.Index { + rv |= document.INDEX_FIELD + } + if *fm.IncludeTermVectors { + rv |= document.INCLUDE_TERM_VECTORS + } + return rv +} + +func (fm *FieldMapping) GoString() string { + return fmt.Sprintf("&bleve.FieldMapping{Name:%s, Type:%s, Analyzer:%s, Store:%t, Index:%t}", *fm.Name, *fm.Type, *fm.Analyzer, *fm.Store, *fm.Index) +} diff --git a/mapping_index.go b/mapping_index.go new file mode 100644 index 00000000..60d41b2c --- /dev/null +++ b/mapping_index.go @@ -0,0 +1,316 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" + "fmt" + "reflect" + + "github.com/couchbaselabs/bleve/analysis" + "github.com/couchbaselabs/bleve/document" +) + +var tRUE = true + +var fALSE = false + +var DEFAULT_ID_FIELD = "_id" +var DEFAULT_TYPE_FIELD = "_type" +var DEFAULT_TYPE = "_default" + +type IndexMapping struct { + TypeMapping map[string]*DocumentMapping `json:"types"` + DefaultMapping *DocumentMapping `json:"default_mapping"` + IdField *string `json:"id_field"` + TypeField *string `json:"type_field"` + DefaultType *string `json:"default_type"` + DefaultAnalyzer *string `json:"default_analyzer"` +} + +func (im *IndexMapping) GoString() string { + return fmt.Sprintf("&bleve.IndexMapping{TypeMapping:%#v, TypeField:%s, DefaultType:%s}", im.TypeMapping, *im.TypeField, *im.DefaultType) +} + +func NewIndexMapping() *IndexMapping { + return &IndexMapping{ + TypeMapping: make(map[string]*DocumentMapping), + DefaultMapping: NewDocumentMapping(), + IdField: &DEFAULT_ID_FIELD, + TypeField: &DEFAULT_TYPE_FIELD, + DefaultType: &DEFAULT_TYPE, + } +} + +func (im *IndexMapping) AddDocumentMapping(doctype string, dm *DocumentMapping) *IndexMapping { + im.TypeMapping[doctype] = dm + return im +} + +func (im *IndexMapping) SetTypeField(typeField string) *IndexMapping { + im.TypeField = &typeField + return im +} + +func (im *IndexMapping) SetDefaultAnalyzer(analyzer string) *IndexMapping { + im.DefaultAnalyzer = &analyzer + return im +} + +func (im *IndexMapping) MappingForType(docType string) *DocumentMapping { + docMapping := im.TypeMapping[docType] + if docMapping == nil { + docMapping = im.DefaultMapping + } + return docMapping +} + +func (im *IndexMapping) UnmarshalJSON(data []byte) error { + var tmp struct { + TypeMapping map[string]*DocumentMapping `json:"types"` + DefaultMapping *DocumentMapping `json:"default_mapping"` + IdField *string `json:"id_field"` + TypeField *string `json:"type_field"` + DefaultType *string `json:"default_type"` + DefaultAnalyzer *string `json:"default_analyzer"` + } + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + + im.IdField = &DEFAULT_ID_FIELD + if tmp.IdField != nil { + im.IdField = tmp.IdField + } + + im.TypeField = &DEFAULT_TYPE_FIELD + if tmp.TypeField != nil { + im.TypeField = tmp.TypeField + } + + im.DefaultType = &DEFAULT_TYPE + if tmp.DefaultType != nil { + im.DefaultType = tmp.DefaultType + } + + im.DefaultMapping = NewDocumentMapping() + if tmp.DefaultMapping != nil { + im.DefaultMapping = tmp.DefaultMapping + } + + if tmp.DefaultAnalyzer != nil { + im.DefaultAnalyzer = tmp.DefaultAnalyzer + } + + im.TypeMapping = make(map[string]*DocumentMapping, len(tmp.TypeMapping)) + for typeName, typeDocMapping := range tmp.TypeMapping { + im.TypeMapping[typeName] = typeDocMapping + } + return nil +} + +func (im *IndexMapping) determineType(data interface{}) (string, bool) { + // first see if the object implements Identifier + classifier, ok := data.(Classifier) + if ok { + return classifier.Type(), true + } + + // now see if we can find type using the mapping + if im.TypeField != nil { + typ, ok := mustString(lookupPropertyPath(data, *im.TypeField)) + if ok { + return typ, true + } + } + + // fall back to default type if there was one + if im.DefaultType != nil { + return *im.DefaultType, true + } + + return "", false +} + +func (im *IndexMapping) MapDocument(doc *document.Document, data interface{}) error { + docType, ok := im.determineType(data) + if !ok { + return ERROR_NO_TYPE + } + docMapping := im.MappingForType(docType) + walkContext := newWalkContext(doc, docMapping) + im.walkDocument(data, []string{}, walkContext) + + // see if the _all field was disabled + allMapping := docMapping.DocumentMappingForPath("_all") + if allMapping == nil || (allMapping.Enabled != nil && *allMapping.Enabled != false) { + field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS) + doc.AddField(field) + } + + return nil +} + +type walkContext struct { + doc *document.Document + dm *DocumentMapping + excludedFromAll []string +} + +func newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext { + return &walkContext{ + doc: doc, + dm: dm, + excludedFromAll: []string{}, + } +} + +func (im *IndexMapping) walkDocument(data interface{}, path []string, context *walkContext) { + val := reflect.ValueOf(data) + typ := val.Type() + switch typ.Kind() { + case reflect.Map: + // FIXME can add support for other map keys in the future + if typ.Key().Kind() == reflect.String { + for _, key := range val.MapKeys() { + fieldName := key.String() + fieldVal := val.MapIndex(key).Interface() + im.processProperty(fieldVal, append(path, fieldName), context) + } + } + case reflect.Struct: + for i := 0; i < val.NumField(); i++ { + field := typ.Field(i) + fieldName := field.Name + + // if the field has a JSON name, prefer that + jsonTag := field.Tag.Get("json") + jsonFieldName := parseJSONTagName(jsonTag) + if jsonFieldName != "" { + fieldName = jsonFieldName + } + + if val.Field(i).CanInterface() { + fieldVal := val.Field(i).Interface() + im.processProperty(fieldVal, append(path, fieldName), context) + } + } + case reflect.Slice, reflect.Array: + for i := 0; i < val.Len(); i++ { + if val.Index(i).CanInterface() { + fieldVal := val.Index(i).Interface() + im.processProperty(fieldVal, path, context) + } + } + case reflect.Ptr: + ptrElem := val.Elem() + if ptrElem.CanInterface() { + im.walkDocument(ptrElem.Interface(), path, context) + } + } +} + +func (im *IndexMapping) processProperty(property interface{}, path []string, context *walkContext) { + pathString := encodePath(path) + // look to see if there is a mapping for this field + subDocMapping := context.dm.DocumentMappingForPath(pathString) + + // check tos see if we even need to do further processing + if subDocMapping != nil && subDocMapping.Enabled != nil && !*subDocMapping.Enabled { + return + } + + propertyValue := reflect.ValueOf(property) + propertyType := propertyValue.Type() + switch propertyType.Kind() { + case reflect.String: + propertyValueString := propertyValue.String() + if subDocMapping != nil { + // index by explicit mapping + + for _, fieldMapping := range subDocMapping.Fields { + if *fieldMapping.Type == "text" { + + fieldName := pathString + if fieldMapping.Name != nil && *fieldMapping.Name != "" { + parentName := "" + if len(path) > 1 { + parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR + } + fieldName = parentName + *fieldMapping.Name + } + options := fieldMapping.Options() + analyzer := config.Analysis.Analyzers[*fieldMapping.Analyzer] + if analyzer != nil { + field := document.NewTextFieldCustom(fieldName, []byte(propertyValueString), options, analyzer) + context.doc.AddField(field) + + if fieldMapping.IncludeInAll != nil && !*fieldMapping.IncludeInAll { + context.excludedFromAll = append(context.excludedFromAll, fieldName) + } + } + + } + } + } else { + // automatic indexing behavior + options := document.STORE_FIELD | document.INDEX_FIELD | document.INCLUDE_TERM_VECTORS + analyzer := im.defaultAnalyzer(context.dm, path) + field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer) + context.doc.AddField(field) + } + default: + im.walkDocument(property, path, context) + } +} + +func (im *IndexMapping) defaultAnalyzer(dm *DocumentMapping, path []string) *analysis.Analyzer { + // first see if the document mapping has an analyzer + rv := dm.defaultAnalyzer(path) + if rv == nil { + if im.DefaultAnalyzer != nil { + rv = config.Analysis.Analyzers[*im.DefaultAnalyzer] + } else if config.DefaultAnalyzer != nil { + rv = config.Analysis.Analyzers[*config.DefaultAnalyzer] + } + } + return rv +} + +// attempts to find the best analyzer to use with only a field name +// will walk all the document types, look for field mappings at the +// provided path, if one exists and it has an explicit analyzer +// that is returned +// nil should be an acceptable return value meaning we don't know +func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer { + + // first we look for explicit mapping on the field + for _, docMapping := range im.TypeMapping { + pathMapping := docMapping.DocumentMappingForPath(path) + if pathMapping != nil { + if len(pathMapping.Fields) > 0 { + if pathMapping.Fields[0].Analyzer != nil { + return config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer] + } + } + } + } + + // next we will try default analyzers for the path + for _, docMapping := range im.TypeMapping { + rv := im.defaultAnalyzer(docMapping, decodePath(path)) + if rv != nil { + return rv + } + } + + // finally just return the system-wide default analyzer + return config.Analysis.Analyzers[*config.DefaultAnalyzer] +} diff --git a/mapping_test.go b/mapping_test.go new file mode 100644 index 00000000..89adfea2 --- /dev/null +++ b/mapping_test.go @@ -0,0 +1,60 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" + "reflect" + "testing" +) + +var mappingSource = []byte(`{ + "types": { + "beer": { + "properties": { + "name": { + "fields": [ + { + "name": "name", + "type": "text", + "analyzer": "standard", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ] + } + } + }, + "brewery": { + } + }, + "type_field": "_type", + "default_type": "_default" +}`) + +var nameField = NewFieldMapping("name", "text", "standard", true, true, true, true) +var nameMapping = NewDocumentMapping().AddFieldMapping(nameField) +var beerMapping = NewDocumentMapping().AddSubDocumentMapping("name", nameMapping) +var breweryMapping = NewDocumentMapping() +var mappingObject = NewIndexMapping(). + AddDocumentMapping("beer", beerMapping). + AddDocumentMapping("brewery", breweryMapping) + +func TestUnmarshalMappingJSON(t *testing.T) { + var indexMapping IndexMapping + err := json.Unmarshal(mappingSource, &indexMapping) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(&indexMapping, mappingObject) { + t.Errorf("expected %#v,\n got %#v", mappingObject, &indexMapping) + } +} diff --git a/search/query.go b/query.go similarity index 85% rename from search/query.go rename to query.go index a4ed44fa..a2f278d5 100644 --- a/search/query.go +++ b/query.go @@ -6,24 +6,23 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package search +package bleve import ( "encoding/json" "fmt" "log" - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index" + "github.com/couchbaselabs/bleve/search" ) type Query interface { Boost() float64 - Searcher(index index.Index) (Searcher, error) + Searcher(i *indexImpl, explain bool) (search.Searcher, error) Validate() error } -func ParseQuery(input []byte, mapping document.Mapping) (Query, error) { +func ParseQuery(input []byte) (Query, error) { var tmp map[string]interface{} err := json.Unmarshal(input, &tmp) if err != nil { @@ -42,7 +41,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) { if isMatchQuery { log.Printf("detected match query") var rv MatchQuery - rv.mapping = mapping err := json.Unmarshal(input, &rv) if err != nil { return nil, err @@ -53,7 +51,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) { if isMatchPhraseQuery { log.Printf("detected match phrase query") var rv MatchPhraseQuery - rv.mapping = mapping err := json.Unmarshal(input, &rv) if err != nil { return nil, err @@ -64,8 +61,7 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) { _, hasShould := tmp["should"] _, hasMustNot := tmp["must_not"] if hasMust || hasShould || hasMustNot { - var rv TermBooleanQuery - rv.mapping = mapping + var rv BooleanQuery err := json.Unmarshal(input, &rv) if err != nil { return nil, err @@ -84,7 +80,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) { _, hasSyntaxQuery := tmp["query"] if hasSyntaxQuery { var rv SyntaxQuery - rv.mapping = mapping err := json.Unmarshal(input, &rv) if err != nil { return nil, err diff --git a/query_boolean.go b/query_boolean.go new file mode 100644 index 00000000..1d04f722 --- /dev/null +++ b/query_boolean.go @@ -0,0 +1,80 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "fmt" + + "github.com/couchbaselabs/bleve/search" +) + +type BooleanQuery struct { + Must *ConjunctionQuery `json:"must,omitempty"` + Should *DisjunctionQuery `json:"should,omitempty"` + MustNot *DisjunctionQuery `json:"must_not,omitempty"` + BoostVal float64 `json:"boost,omitempty"` +} + +func NewBooleanQuery(must *ConjunctionQuery, should *DisjunctionQuery, mustNot *DisjunctionQuery) *BooleanQuery { + return &BooleanQuery{ + Must: must, + Should: should, + MustNot: mustNot, + BoostVal: 1.0, + } +} + +func (q *BooleanQuery) Boost() float64 { + return q.BoostVal +} + +func (q *BooleanQuery) SetBoost(b float64) *BooleanQuery { + q.BoostVal = b + return q +} + +func (q *BooleanQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + + var err error + var mustSearcher *search.TermConjunctionSearcher + if q.Must != nil { + mustSearcher, err = q.Must.Searcher(i, explain) + if err != nil { + return nil, err + } + } + + var shouldSearcher *search.TermDisjunctionSearcher + if q.Should != nil { + shouldSearcher, err = q.Should.Searcher(i, explain) + if err != nil { + return nil, err + } + } + + var mustNotSearcher *search.TermDisjunctionSearcher + if q.MustNot != nil { + mustNotSearcher, err = q.MustNot.Searcher(i, explain) + if err != nil { + return nil, err + } + } + + return search.NewTermBooleanSearcher(i.i, mustSearcher, shouldSearcher, mustNotSearcher, explain) +} + +func (q *BooleanQuery) Validate() error { + if q.Must == nil && q.Should == nil { + return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause") + } + if q.Must != nil && len(q.Must.Conjuncts) == 0 && q.Should != nil && len(q.Should.Disjuncts) == 0 { + return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause") + } + return nil +} diff --git a/query_conjunction.go b/query_conjunction.go new file mode 100644 index 00000000..53c5ec52 --- /dev/null +++ b/query_conjunction.go @@ -0,0 +1,78 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" + + "github.com/couchbaselabs/bleve/search" +) + +type ConjunctionQuery struct { + Conjuncts []Query `json:"terms"` + BoostVal float64 `json:"boost,omitempty"` +} + +func NewConjunctionQuery(conjuncts []Query) *ConjunctionQuery { + return &ConjunctionQuery{ + Conjuncts: conjuncts, + BoostVal: 1.0, + } +} + +func (q *ConjunctionQuery) Boost() float64 { + return q.BoostVal +} + +func (q *ConjunctionQuery) SetBoost(b float64) *ConjunctionQuery { + q.BoostVal = b + return q +} + +func (q *ConjunctionQuery) AddQuery(aq Query) *ConjunctionQuery { + q.Conjuncts = append(q.Conjuncts, aq) + return q +} + +func (q *ConjunctionQuery) Searcher(i *indexImpl, explain bool) (*search.TermConjunctionSearcher, error) { + searchers := make([]search.Searcher, len(q.Conjuncts)) + for in, conjunct := range q.Conjuncts { + var err error + searchers[in], err = conjunct.Searcher(i, explain) + if err != nil { + return nil, err + } + } + return search.NewTermConjunctionSearcher(i.i, searchers, explain) +} + +func (q *ConjunctionQuery) Validate() error { + return nil +} + +func (q *ConjunctionQuery) UnmarshalJSON(data []byte) error { + tmp := struct { + Conjuncts []json.RawMessage `json:"terms"` + BoostVal float64 `json:"boost,omitempty"` + }{} + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + q.Conjuncts = make([]Query, len(tmp.Conjuncts)) + for i, term := range tmp.Conjuncts { + query, err := ParseQuery(term) + if err != nil { + return err + } + q.Conjuncts[i] = query + } + q.BoostVal = tmp.BoostVal + return nil +} diff --git a/query_disjunction.go b/query_disjunction.go new file mode 100644 index 00000000..649bc80d --- /dev/null +++ b/query_disjunction.go @@ -0,0 +1,94 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" + "fmt" + + "github.com/couchbaselabs/bleve/search" +) + +type DisjunctionQuery struct { + Disjuncts []Query `json:"terms"` + BoostVal float64 `json:"boost,omitempty"` + MinVal float64 `json:"min"` +} + +func NewDisjunctionQuery(disjuncts []Query) *DisjunctionQuery { + return &DisjunctionQuery{ + Disjuncts: disjuncts, + BoostVal: 1.0, + } +} + +func (q *DisjunctionQuery) Boost() float64 { + return q.BoostVal +} + +func (q *DisjunctionQuery) SetBoost(b float64) *DisjunctionQuery { + q.BoostVal = b + return q +} + +func (q *DisjunctionQuery) AddQuery(aq Query) *DisjunctionQuery { + q.Disjuncts = append(q.Disjuncts, aq) + return q +} + +func (q *DisjunctionQuery) Min() float64 { + return q.MinVal +} + +func (q *DisjunctionQuery) SetMin(m float64) *DisjunctionQuery { + q.MinVal = m + return q +} + +func (q *DisjunctionQuery) Searcher(i *indexImpl, explain bool) (*search.TermDisjunctionSearcher, error) { + searchers := make([]search.Searcher, len(q.Disjuncts)) + for in, disjunct := range q.Disjuncts { + var err error + searchers[in], err = disjunct.Searcher(i, explain) + if err != nil { + return nil, err + } + } + return search.NewTermDisjunctionSearcher(i.i, searchers, q.MinVal, explain) +} + +func (q *DisjunctionQuery) Validate() error { + if int(q.MinVal) > len(q.Disjuncts) { + return fmt.Errorf("Minimum clauses in disjunction exceeds total number of clauses") + } + return nil +} + +func (q *DisjunctionQuery) UnmarshalJSON(data []byte) error { + tmp := struct { + Disjuncts []json.RawMessage `json:"terms"` + BoostVal float64 `json:"boost,omitempty"` + MinVal float64 `json:"min"` + }{} + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + q.Disjuncts = make([]Query, len(tmp.Disjuncts)) + for i, term := range tmp.Disjuncts { + query, err := ParseQuery(term) + if err != nil { + return err + } + q.Disjuncts[i] = query + } + q.BoostVal = tmp.BoostVal + q.MinVal = tmp.MinVal + return nil +} diff --git a/query_match.go b/query_match.go new file mode 100644 index 00000000..f9b296da --- /dev/null +++ b/query_match.go @@ -0,0 +1,85 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "fmt" + + "github.com/couchbaselabs/bleve/analysis" + "github.com/couchbaselabs/bleve/search" +) + +type MatchQuery struct { + Match string `json:"match"` + FieldVal string `json:"field,omitempty"` + Analyzer string `json:"analyzer,omitempty"` + BoostVal float64 `json:"boost,omitempty"` +} + +func NewMatchQuery(match string) *MatchQuery { + return &MatchQuery{ + Match: match, + BoostVal: 1.0, + } +} + +func (q *MatchQuery) Boost() float64 { + return q.BoostVal +} + +func (q *MatchQuery) SetBoost(b float64) *MatchQuery { + q.BoostVal = b + return q +} + +func (q *MatchQuery) Field() string { + return q.FieldVal +} + +func (q *MatchQuery) SetField(f string) *MatchQuery { + q.FieldVal = f + return q +} + +func (q *MatchQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + + var analyzer *analysis.Analyzer + if q.Analyzer != "" { + analyzer = config.Analysis.Analyzers[q.Analyzer] + } else { + analyzer = i.m.analyzerForPath(q.FieldVal) + } + if analyzer == nil { + return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer) + } + + tokens := analyzer.Analyze([]byte(q.Match)) + if len(tokens) > 0 { + + tqs := make([]Query, len(tokens)) + for i, token := range tokens { + tqs[i] = NewTermQuery(string(token.Term)). + SetField(q.FieldVal). + SetBoost(q.BoostVal) + } + + shouldQuery := NewDisjunctionQuery(tqs). + SetBoost(q.BoostVal). + SetMin(1) + + return shouldQuery.Searcher(i, explain) + } else { + noneQuery := NewMatchNoneQuery() + return noneQuery.Searcher(i, explain) + } +} + +func (q *MatchQuery) Validate() error { + return nil +} diff --git a/search/query_match_all.go b/query_match_all.go similarity index 67% rename from search/query_match_all.go rename to query_match_all.go index ab24d20d..08115896 100644 --- a/search/query_match_all.go +++ b/query_match_all.go @@ -6,23 +6,33 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package search +package bleve import ( - "github.com/couchbaselabs/bleve/index" + "github.com/couchbaselabs/bleve/search" ) type MatchAllQuery struct { BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` +} + +func NewMatchAllQuery() *MatchAllQuery { + return &MatchAllQuery{ + BoostVal: 1.0, + } } func (q *MatchAllQuery) Boost() float64 { return q.BoostVal } -func (q *MatchAllQuery) Searcher(index index.Index) (Searcher, error) { - return NewMatchAllSearcher(index, q) +func (q *MatchAllQuery) SetBoost(b float64) *MatchAllQuery { + q.BoostVal = b + return q +} + +func (q *MatchAllQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + return search.NewMatchAllSearcher(i.i, q.BoostVal, explain) } func (q *MatchAllQuery) Validate() error { diff --git a/search/query_match_none.go b/query_match_none.go similarity index 68% rename from search/query_match_none.go rename to query_match_none.go index 0dcff41e..5f2dd98d 100644 --- a/search/query_match_none.go +++ b/query_match_none.go @@ -6,23 +6,33 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package search +package bleve import ( - "github.com/couchbaselabs/bleve/index" + "github.com/couchbaselabs/bleve/search" ) type MatchNoneQuery struct { BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` +} + +func NewMatchNoneQuery() *MatchNoneQuery { + return &MatchNoneQuery{ + BoostVal: 1.0, + } } func (q *MatchNoneQuery) Boost() float64 { return q.BoostVal } -func (q *MatchNoneQuery) Searcher(index index.Index) (Searcher, error) { - return NewMatchNoneSearcher(index, q) +func (q *MatchNoneQuery) SetBoost(b float64) *MatchNoneQuery { + q.BoostVal = b + return q +} + +func (q *MatchNoneQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + return search.NewMatchNoneSearcher(i.i) } func (q *MatchNoneQuery) Validate() error { diff --git a/query_match_phrase.go b/query_match_phrase.go new file mode 100644 index 00000000..50b4140a --- /dev/null +++ b/query_match_phrase.go @@ -0,0 +1,82 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "fmt" + + "github.com/couchbaselabs/bleve/analysis" + "github.com/couchbaselabs/bleve/search" +) + +type MatchPhraseQuery struct { + MatchPhrase string `json:"match_phrase"` + FieldVal string `json:"field,omitempty"` + Analyzer string `json:"analyzer,omitempty"` + BoostVal float64 `json:"boost,omitempty"` +} + +func NewMatchPhraseQuery(matchPhrase string) *MatchPhraseQuery { + return &MatchPhraseQuery{ + MatchPhrase: matchPhrase, + BoostVal: 1.0, + } +} + +func (q *MatchPhraseQuery) Boost() float64 { + return q.BoostVal +} + +func (q *MatchPhraseQuery) SetBoost(b float64) *MatchPhraseQuery { + q.BoostVal = b + return q +} + +func (q *MatchPhraseQuery) Field() string { + return q.FieldVal +} + +func (q *MatchPhraseQuery) SetField(f string) *MatchPhraseQuery { + q.FieldVal = f + return q +} + +func (q *MatchPhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + + var analyzer *analysis.Analyzer + if q.Analyzer != "" { + analyzer = config.Analysis.Analyzers[q.Analyzer] + } else { + analyzer = i.m.analyzerForPath(q.FieldVal) + } + if analyzer == nil { + return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer) + } + + tokens := analyzer.Analyze([]byte(q.MatchPhrase)) + if len(tokens) > 0 { + tqs := make([]*TermQuery, len(tokens)) + for i, token := range tokens { + tqs[i] = NewTermQuery(string(token.Term)). + SetField(q.FieldVal). + SetBoost(q.BoostVal) + } + + phraseQuery := NewPhraseQuery(tqs) + + return phraseQuery.Searcher(i, explain) + } else { + noneQuery := NewMatchNoneQuery() + return noneQuery.Searcher(i, explain) + } +} + +func (q *MatchPhraseQuery) Validate() error { + return nil +} diff --git a/query_phrase.go b/query_phrase.go new file mode 100644 index 00000000..145a136d --- /dev/null +++ b/query_phrase.go @@ -0,0 +1,60 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "fmt" + + "github.com/couchbaselabs/bleve/search" +) + +type PhraseQuery struct { + Terms []*TermQuery `json:"terms"` + BoostVal float64 `json:"boost,omitempty"` +} + +func NewPhraseQuery(terms []*TermQuery) *PhraseQuery { + return &PhraseQuery{ + Terms: terms, + BoostVal: 1.0, + } +} + +func (q *PhraseQuery) Boost() float64 { + return q.BoostVal +} + +func (q *PhraseQuery) SetBoost(b float64) *PhraseQuery { + q.BoostVal = b + return q +} + +func (q *PhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + + terms := make([]string, len(q.Terms)) + conjuncts := make([]Query, len(q.Terms)) + for i, term := range q.Terms { + conjuncts[i] = term + terms[i] = term.Term + } + + conjunctionQuery := NewConjunctionQuery(conjuncts) + conjunctionSearcher, err := conjunctionQuery.Searcher(i, explain) + if err != nil { + return nil, err + } + return search.NewPhraseSearcher(i.i, conjunctionSearcher, terms) +} + +func (q *PhraseQuery) Validate() error { + if q.Terms == nil { + return fmt.Errorf("Phrase query must contain at least one term") + } + return nil +} diff --git a/search/query_syntax.go b/query_syntax.go similarity index 50% rename from search/query_syntax.go rename to query_syntax.go index ddee659a..860b3115 100644 --- a/search/query_syntax.go +++ b/query_syntax.go @@ -6,31 +6,49 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package search +package bleve import ( - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index" + "github.com/couchbaselabs/bleve/search" ) type SyntaxQuery struct { - Query string `json:"query"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` - DefaultField string `json:"default_field,omitemtpy"` - mapping document.Mapping + Query string `json:"query"` + DefaultFieldVal string `json:"default_field,omitempty"` + BoostVal float64 `json:"boost,omitempty"` +} + +func NewSyntaxQuery(query string) *SyntaxQuery { + return &SyntaxQuery{ + Query: query, + BoostVal: 1.0, + } } func (q *SyntaxQuery) Boost() float64 { return q.BoostVal } -func (q *SyntaxQuery) Searcher(index index.Index) (Searcher, error) { - newQuery, err := ParseQuerySyntax(q.Query, q.mapping, q.DefaultField) +func (q *SyntaxQuery) SetBoost(b float64) *SyntaxQuery { + q.BoostVal = b + return q +} + +func (q *SyntaxQuery) DefaultField() string { + return q.DefaultFieldVal +} + +func (q *SyntaxQuery) SetField(f string) *SyntaxQuery { + q.DefaultFieldVal = f + return q +} + +func (q *SyntaxQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + newQuery, err := ParseQuerySyntax(q.Query, i.m, q.DefaultFieldVal) if err != nil { return nil, err } - return newQuery.Searcher(index) + return newQuery.Searcher(i, explain) } func (q *SyntaxQuery) Validate() error { diff --git a/search/query_syntax.nex b/query_syntax.nex similarity index 98% rename from search/query_syntax.nex rename to query_syntax.nex index cedcbc70..aa55a1f9 100644 --- a/search/query_syntax.nex +++ b/query_syntax.nex @@ -22,7 +22,7 @@ return STRING } // -package search +package bleve import("log") import("strconv") diff --git a/search/query_syntax.nn.go b/query_syntax.nn.go similarity index 73% rename from search/query_syntax.nn.go rename to query_syntax.nn.go index fa4606a8..afe2ff03 100644 --- a/search/query_syntax.nn.go +++ b/query_syntax.nn.go @@ -1,4 +1,4 @@ -package search +package bleve import ( "log" @@ -23,382 +23,382 @@ a = make([]family, 1) { var acc [18]bool var fun [18]func(rune) int -fun[1] = func(r rune) int { - switch(r) { - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - case 117: return 3 - case 102: return 3 - case 47: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[14] = func(r rune) int { - switch(r) { - case 117: return 3 - case 102: return 15 - case 47: return 3 - case 34: return 2 - case 98: return 15 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - default: - switch { - case 48 <= r && r <= 57: return 15 - case 65 <= r && r <= 70: return 15 - case 97 <= r && r <= 102: return 15 - default: return 3 - } - } - panic("unreachable") -} -fun[10] = func(r rune) int { - switch(r) { - case 117: return 3 - case 102: return 3 - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[3] = func(r rune) int { - switch(r) { - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - case 117: return 3 - case 102: return 3 - case 47: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[4] = func(r rune) int { - switch(r) { - case 117: return 5 - case 102: return 6 - case 47: return 7 - case 34: return 8 - case 98: return 9 - case 110: return 10 - case 92: return 11 - case 116: return 12 - case 114: return 13 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[16] = func(r rune) int { - switch(r) { - case 34: return 2 - case 98: return 17 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - case 117: return 3 - case 102: return 17 - case 47: return 3 - default: - switch { - case 48 <= r && r <= 57: return 17 - case 65 <= r && r <= 70: return 17 - case 97 <= r && r <= 102: return 17 - default: return 3 - } - } - panic("unreachable") -} -fun[15] = func(r rune) int { - switch(r) { - case 114: return 3 - case 117: return 3 - case 102: return 16 - case 47: return 3 - case 34: return 2 - case 98: return 16 - case 110: return 3 - case 92: return 4 - case 116: return 3 - default: - switch { - case 48 <= r && r <= 57: return 16 - case 65 <= r && r <= 70: return 16 - case 97 <= r && r <= 102: return 16 - default: return 3 - } - } - panic("unreachable") -} -fun[17] = func(r rune) int { - switch(r) { - case 117: return 3 - case 102: return 3 - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} fun[11] = func(r rune) int { switch(r) { - case 117: return 5 - case 102: return 6 - case 47: return 7 - case 34: return 8 - case 98: return 9 - case 110: return 10 case 92: return 11 - case 116: return 12 + case 98: return 12 case 114: return 13 + case 34: return 5 + case 117: return 6 + case 47: return 7 + case 116: return 8 + case 110: return 9 + case 102: return 10 default: switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[12] = func(r rune) int { - switch(r) { - case 116: return 3 - case 114: return 3 - case 117: return 3 - case 102: return 3 - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -acc[8] = true -fun[8] = func(r rune) int { - switch(r) { - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - case 117: return 3 - case 102: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 } } panic("unreachable") } fun[6] = func(r rune) int { switch(r) { - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - case 117: return 3 - case 102: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[9] = func(r rune) int { - switch(r) { - case 117: return 3 - case 102: return 3 - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[7] = func(r rune) int { - switch(r) { - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - case 117: return 3 - case 102: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -acc[2] = true -fun[2] = func(r rune) int { - switch(r) { - case 110: return -1 - case 92: return -1 - case 116: return -1 - case 114: return -1 - case 117: return -1 - case 102: return -1 - case 47: return -1 - case 34: return -1 - case 98: return -1 - default: - switch { - case 48 <= r && r <= 57: return -1 - case 65 <= r && r <= 70: return -1 - case 97 <= r && r <= 102: return -1 - default: return -1 - } - } - panic("unreachable") -} -fun[13] = func(r rune) int { - switch(r) { - case 117: return 3 - case 102: return 3 - case 47: return 3 - case 34: return 2 - case 98: return 3 - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - default: - switch { - case 48 <= r && r <= 57: return 3 - case 65 <= r && r <= 70: return 3 - case 97 <= r && r <= 102: return 3 - default: return 3 - } - } - panic("unreachable") -} -fun[0] = func(r rune) int { - switch(r) { - case 117: return -1 - case 102: return -1 - case 47: return -1 - case 34: return 1 - case 98: return -1 - case 110: return -1 - case 92: return -1 - case 116: return -1 - case 114: return -1 - default: - switch { - case 48 <= r && r <= 57: return -1 - case 65 <= r && r <= 70: return -1 - case 97 <= r && r <= 102: return -1 - default: return -1 - } - } - panic("unreachable") -} -fun[5] = func(r rune) int { - switch(r) { - case 110: return 3 - case 92: return 4 - case 116: return 3 - case 114: return 3 - case 117: return 3 + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 case 102: return 14 - case 47: return 3 - case 34: return 2 + case 92: return 3 case 98: return 14 default: switch { case 48 <= r && r <= 57: return 14 case 65 <= r && r <= 70: return 14 case 97 <= r && r <= 102: return 14 - default: return 3 + default: return 2 + } + } + panic("unreachable") +} +acc[4] = true +fun[4] = func(r rune) int { + switch(r) { + case 92: return -1 + case 98: return -1 + case 114: return -1 + case 34: return -1 + case 117: return -1 + case 47: return -1 + case 116: return -1 + case 110: return -1 + case 102: return -1 + default: + switch { + case 48 <= r && r <= 57: return -1 + case 65 <= r && r <= 70: return -1 + case 97 <= r && r <= 102: return -1 + default: return -1 + } + } + panic("unreachable") +} +fun[17] = func(r rune) int { + switch(r) { + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +acc[5] = true +fun[5] = func(r rune) int { + switch(r) { + case 110: return 2 + case 102: return 2 + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[0] = func(r rune) int { + switch(r) { + case 114: return -1 + case 34: return 1 + case 117: return -1 + case 47: return -1 + case 116: return -1 + case 110: return -1 + case 102: return -1 + case 92: return -1 + case 98: return -1 + default: + switch { + case 48 <= r && r <= 57: return -1 + case 65 <= r && r <= 70: return -1 + case 97 <= r && r <= 102: return -1 + default: return -1 + } + } + panic("unreachable") +} +fun[10] = func(r rune) int { + switch(r) { + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + case 92: return 3 + case 98: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[16] = func(r rune) int { + switch(r) { + case 102: return 17 + case 92: return 3 + case 98: return 17 + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + default: + switch { + case 48 <= r && r <= 57: return 17 + case 65 <= r && r <= 70: return 17 + case 97 <= r && r <= 102: return 17 + default: return 2 + } + } + panic("unreachable") +} +fun[13] = func(r rune) int { + switch(r) { + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[7] = func(r rune) int { + switch(r) { + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[2] = func(r rune) int { + switch(r) { + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[8] = func(r rune) int { + switch(r) { + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[3] = func(r rune) int { + switch(r) { + case 34: return 5 + case 117: return 6 + case 47: return 7 + case 116: return 8 + case 110: return 9 + case 102: return 10 + case 92: return 11 + case 98: return 12 + case 114: return 13 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[12] = func(r rune) int { + switch(r) { + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[14] = func(r rune) int { + switch(r) { + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 15 + case 92: return 3 + case 98: return 15 + case 114: return 2 + case 34: return 4 + default: + switch { + case 48 <= r && r <= 57: return 15 + case 65 <= r && r <= 70: return 15 + case 97 <= r && r <= 102: return 15 + default: return 2 + } + } + panic("unreachable") +} +fun[15] = func(r rune) int { + switch(r) { + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 16 + case 92: return 3 + case 98: return 16 + case 114: return 2 + case 34: return 4 + case 117: return 2 + default: + switch { + case 48 <= r && r <= 57: return 16 + case 65 <= r && r <= 70: return 16 + case 97 <= r && r <= 102: return 16 + default: return 2 + } + } + panic("unreachable") +} +fun[1] = func(r rune) int { + switch(r) { + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[9] = func(r rune) int { + switch(r) { + case 92: return 3 + case 98: return 2 + case 114: return 2 + case 34: return 4 + case 117: return 2 + case 47: return 2 + case 116: return 2 + case 110: return 2 + case 102: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 } } panic("unreachable") @@ -410,9 +410,10 @@ a0[0].id = 0 { var acc [2]bool var fun [2]func(rune) int -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 43: return 1 + case 43: return -1 default: switch { default: return -1 @@ -420,10 +421,9 @@ fun[0] = func(r rune) int { } panic("unreachable") } -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 43: return -1 + case 43: return 1 default: switch { default: return -1 @@ -466,9 +466,10 @@ a0[2].id = 2 { var acc [2]bool var fun [2]func(rune) int -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 58: return 1 + case 58: return -1 default: switch { default: return -1 @@ -476,10 +477,9 @@ fun[0] = func(r rune) int { } panic("unreachable") } -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 58: return -1 + case 58: return 1 default: switch { default: return -1 @@ -494,9 +494,10 @@ a0[3].id = 3 { var acc [2]bool var fun [2]func(rune) int -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 94: return 1 + case 94: return -1 default: switch { default: return -1 @@ -504,10 +505,9 @@ fun[0] = func(r rune) int { } panic("unreachable") } -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 94: return -1 + case 94: return 1 default: switch { default: return -1 @@ -578,18 +578,6 @@ a0[6].id = 6 { var acc [5]bool var fun [5]func(rune) int -fun[1] = func(r rune) int { - switch(r) { - case 45: return -1 - default: - switch { - case 48 <= r && r <= 48: return 2 - case 49 <= r && r <= 57: return 2 - default: return -1 - } - } - panic("unreachable") -} acc[2] = true fun[2] = func(r rune) int { switch(r) { @@ -603,31 +591,6 @@ fun[2] = func(r rune) int { } panic("unreachable") } -acc[4] = true -fun[4] = func(r rune) int { - switch(r) { - case 45: return -1 - default: - switch { - case 48 <= r && r <= 48: return 4 - case 49 <= r && r <= 57: return 4 - default: return -1 - } - } - panic("unreachable") -} -fun[0] = func(r rune) int { - switch(r) { - case 45: return 1 - default: - switch { - case 48 <= r && r <= 48: return 2 - case 49 <= r && r <= 57: return 3 - default: return -1 - } - } - panic("unreachable") -} acc[3] = true fun[3] = func(r rune) int { switch(r) { @@ -641,6 +604,43 @@ fun[3] = func(r rune) int { } panic("unreachable") } +acc[4] = true +fun[4] = func(r rune) int { + switch(r) { + case 45: return -1 + default: + switch { + case 48 <= r && r <= 48: return 4 + case 49 <= r && r <= 57: return 4 + default: return -1 + } + } + panic("unreachable") +} +fun[1] = func(r rune) int { + switch(r) { + case 45: return -1 + default: + switch { + case 48 <= r && r <= 48: return 2 + case 49 <= r && r <= 57: return 2 + default: return -1 + } + } + panic("unreachable") +} +fun[0] = func(r rune) int { + switch(r) { + case 45: return 1 + default: + switch { + case 48 <= r && r <= 48: return 2 + case 49 <= r && r <= 57: return 3 + default: return -1 + } + } + panic("unreachable") +} a0[7].acc = acc[:] a0[7].f = fun[:] a0[7].id = 7 @@ -651,9 +651,9 @@ var fun [2]func(rune) int acc[1] = true fun[1] = func(r rune) int { switch(r) { - case 9: return 1 - case 10: return 1 case 32: return 1 + case 10: return 1 + case 9: return 1 default: switch { default: return -1 @@ -664,8 +664,8 @@ fun[1] = func(r rune) int { fun[0] = func(r rune) int { switch(r) { case 9: return 1 - case 10: return 1 case 32: return 1 + case 10: return 1 default: switch { default: return -1 @@ -680,18 +680,17 @@ a0[8].id = 8 { var acc [2]bool var fun [2]func(rune) int -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { + case 45: return -1 + case 32: return -1 + case 13: return -1 + case 94: return -1 + case 58: return -1 + case 9: return -1 + case 12: return -1 case 10: return -1 case 43: return -1 - case 9: return -1 - case 32: return -1 - case 12: return -1 - case 13: return -1 - case 58: return -1 - case 94: return -1 - case 45: return -1 default: switch { default: return 1 @@ -699,17 +698,18 @@ fun[1] = func(r rune) int { } panic("unreachable") } -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 9: return -1 - case 32: return -1 - case 12: return -1 - case 13: return -1 - case 58: return -1 case 94: return -1 - case 45: return -1 + case 58: return -1 + case 9: return -1 + case 12: return -1 case 10: return -1 case 43: return -1 + case 45: return -1 + case 32: return -1 + case 13: return -1 default: switch { default: return 1 diff --git a/search/query_syntax.y b/query_syntax.y similarity index 50% rename from search/query_syntax.y rename to query_syntax.y index f36cd3ae..c440b2cb 100644 --- a/search/query_syntax.y +++ b/query_syntax.y @@ -1,5 +1,5 @@ %{ -package search +package bleve import "log" func logDebugGrammar(format string, v ...interface{}) { @@ -62,46 +62,30 @@ searchBase: STRING { str := $1.s logDebugGrammar("STRING - %s", str) - q := &MatchQuery{ - Match: str, - Field: parsingDefaultField, - BoostVal: 1.0, - Explain: true, - } - if parsingMapping[parsingDefaultField] != nil { - q.Analyzer = parsingMapping[parsingDefaultField].Analyzer - } + q := NewMatchQuery(str).SetField(parsingDefaultField) if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) + parsingMustList.AddQuery(q) parsingMust = false } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) + parsingMustNotList.AddQuery(q) parsingMustNot = false } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) + parsingShouldList.AddQuery(q) } } | PHRASE { phrase := $1.s logDebugGrammar("PHRASE - %s", phrase) - q := &MatchPhraseQuery{ - MatchPhrase: phrase, - Field: parsingDefaultField, - BoostVal: 1.0, - Explain: true, - } - if parsingMapping[parsingDefaultField] != nil { - q.Analyzer = parsingMapping[parsingDefaultField].Analyzer - } + q := NewMatchPhraseQuery(phrase).SetField(parsingDefaultField) if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) + parsingMustList.AddQuery(q) parsingMust = false } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) + parsingMustNotList.AddQuery(q) parsingMustNot = false } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) + parsingShouldList.AddQuery(q) } } | @@ -109,23 +93,15 @@ STRING COLON STRING { field := $1.s str := $3.s logDebugGrammar("FIELD - %s STRING - %s", field, str) - q := &MatchQuery{ - Match: str, - Field: field, - BoostVal: 1.0, - Explain: true, - } - if parsingMapping[field] != nil { - q.Analyzer = parsingMapping[field].Analyzer - } + q := NewMatchQuery(str).SetField(field) if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) + parsingMustList.AddQuery(q) parsingMust = false } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) + parsingMustNotList.AddQuery(q) parsingMustNot = false } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) + parsingShouldList.AddQuery(q) } } | @@ -133,23 +109,15 @@ STRING COLON PHRASE { field := $1.s phrase := $3.s logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase) - q := &MatchPhraseQuery{ - MatchPhrase: phrase, - Field: field, - BoostVal: 1.0, - Explain: true, - } - if parsingMapping[field] != nil { - q.Analyzer = parsingMapping[field].Analyzer - } + q := NewMatchPhraseQuery(phrase).SetField(field) if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) + parsingMustList.AddQuery(q) parsingMust = false } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) + parsingMustNotList.AddQuery(q) parsingMustNot = false } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) + parsingShouldList.AddQuery(q) } }; diff --git a/query_syntax_parser.go b/query_syntax_parser.go new file mode 100644 index 00000000..81755298 --- /dev/null +++ b/query_syntax_parser.go @@ -0,0 +1,68 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "fmt" + "strings" + "sync" +) + +var crashHard = false +var parserMutex sync.Mutex +var parsingDefaultField string +var parsingMust bool +var parsingMustNot bool +var debugParser bool +var debugLexer bool + +var parsingMustList *ConjunctionQuery +var parsingMustNotList *DisjunctionQuery +var parsingShouldList *DisjunctionQuery +var parsingIndexMapping *IndexMapping + +func ParseQuerySyntax(query string, mapping *IndexMapping, defaultField string) (rq Query, err error) { + parserMutex.Lock() + defer parserMutex.Unlock() + + parsingIndexMapping = mapping + parsingDefaultField = defaultField + parsingMustList = NewConjunctionQuery([]Query{}) + parsingMustNotList = NewDisjunctionQuery([]Query{}) + parsingShouldList = NewDisjunctionQuery([]Query{}) + + defer func() { + r := recover() + if r != nil && r == "syntax error" { + // if we're panicing over a syntax error, chill + err = fmt.Errorf("Parse Error - %v", r) + } else if r != nil { + // otherise continue to panic + if crashHard { + panic(r) + } else { + err = fmt.Errorf("Other Error - %v", r) + } + } + }() + + yyParse(NewLexer(strings.NewReader(query))) + parsingQuery := NewBooleanQuery(nil, nil, nil) + if len(parsingMustList.Conjuncts) > 0 { + parsingQuery.Must = parsingMustList + } + if len(parsingMustNotList.Disjuncts) > 0 { + parsingQuery.MustNot = parsingMustNotList + } + if len(parsingShouldList.Disjuncts) > 0 { + parsingQuery.Should = parsingShouldList + } + rq = parsingQuery + return rq, err +} diff --git a/search/query_term.go b/query_term.go similarity index 58% rename from search/query_term.go rename to query_term.go index 3f3dc067..18fea765 100644 --- a/search/query_term.go +++ b/query_term.go @@ -6,25 +6,45 @@ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package search +package bleve import ( - "github.com/couchbaselabs/bleve/index" + "github.com/couchbaselabs/bleve/search" ) type TermQuery struct { Term string `json:"term"` - Field string `json:"field,omitempty"` + FieldVal string `json:"field,omitempty"` BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` +} + +func NewTermQuery(term string) *TermQuery { + return &TermQuery{ + Term: term, + BoostVal: 1.0, + } } func (q *TermQuery) Boost() float64 { return q.BoostVal } -func (q *TermQuery) Searcher(index index.Index) (Searcher, error) { - return NewTermSearcher(index, q) +func (q *TermQuery) SetBoost(b float64) *TermQuery { + q.BoostVal = b + return q +} + +func (q *TermQuery) Field() string { + return q.FieldVal +} + +func (q *TermQuery) SetField(f string) *TermQuery { + q.FieldVal = f + return q +} + +func (q *TermQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) { + return search.NewTermSearcher(i.i, q.Term, q.FieldVal, q.BoostVal, explain) } func (q *TermQuery) Validate() error { diff --git a/reflect.go b/reflect.go new file mode 100644 index 00000000..8250a188 --- /dev/null +++ b/reflect.go @@ -0,0 +1,78 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "reflect" + "strings" +) + +func lookupPropertyPath(data interface{}, path string) interface{} { + pathParts := decodePath(path) + + current := data + for _, part := range pathParts { + current = lookupProptyPathPart(current, part) + if current == nil { + break + } + } + + return current +} + +func lookupProptyPathPart(data interface{}, part string) interface{} { + val := reflect.ValueOf(data) + typ := val.Type() + switch typ.Kind() { + case reflect.Map: + // FIXME can add support for other map keys in the future + if typ.Key().Kind() == reflect.String { + key := reflect.ValueOf(part) + entry := val.MapIndex(key) + if entry.IsValid() { + return entry.Interface() + } + } + case reflect.Struct: + field := val.FieldByName(part) + if field.IsValid() && field.CanInterface() { + return field.Interface() + } + } + return nil +} + +const PATH_SEPARATOR = "." + +func decodePath(path string) []string { + return strings.Split(path, PATH_SEPARATOR) +} + +func encodePath(pathElements []string) string { + return strings.Join(pathElements, PATH_SEPARATOR) +} + +func mustString(data interface{}) (string, bool) { + if data != nil { + str, ok := data.(string) + if ok { + return str, true + } + } + return "", false +} + +// parseJSONTagName extracts the JSON field name from a struct tag +func parseJSONTagName(tag string) string { + if idx := strings.Index(tag, ","); idx != -1 { + return tag[:idx] + } + return tag +} diff --git a/search.go b/search.go new file mode 100644 index 00000000..0f0850a5 --- /dev/null +++ b/search.go @@ -0,0 +1,110 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" + "fmt" + "time" + + "github.com/couchbaselabs/bleve/search" +) + +type HighlightRequest struct { + Style *string `json:"style"` + Fields []string `json:"fields"` +} + +func NewHighlight() *HighlightRequest { + return &HighlightRequest{} +} + +func NewHighlightWithStyle(style string) *HighlightRequest { + return &HighlightRequest{ + Style: &style, + } +} + +type SearchRequest struct { + Query Query `json:"query"` + Size int `json:"size"` + From int `json:"from"` + Highlight *HighlightRequest `json:"highlight"` + Explain bool `json:"explain"` +} + +func (r *SearchRequest) UnmarshalJSON(input []byte) error { + var temp struct { + Q json.RawMessage `json:"query"` + Size int `json:"size"` + From int `json:"from"` + Highlight *HighlightRequest `json:"highlight"` + Explain bool `json:"explain"` + } + + err := json.Unmarshal(input, &temp) + if err != nil { + return err + } + + r.Size = temp.Size + r.From = temp.From + r.Explain = temp.Explain + r.Highlight = temp.Highlight + r.Query, err = ParseQuery(temp.Q) + if err != nil { + return err + } + + if r.Size <= 0 { + r.Size = 10 + } + if r.From <= 0 { + r.From = 0 + } + + return nil + +} + +func NewSearchRequest(q Query, size, from int, explain bool) *SearchRequest { + return &SearchRequest{ + Query: q, + Size: size, + From: from, + Explain: explain, + } +} + +type SearchResult struct { + Request *SearchRequest `json:"request"` + Hits search.DocumentMatchCollection `json:"hits"` + Total uint64 `json:"total_hits"` + MaxScore float64 `json:"max_score"` + Took time.Duration `json:"took"` +} + +func (sr *SearchResult) String() string { + rv := "" + if len(sr.Hits) > 0 { + rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took) + for i, hit := range sr.Hits { + rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score) + for fragmentField, fragments := range hit.Fragments { + rv += fmt.Sprintf("\t%s\n", fragmentField) + for _, fragment := range fragments { + rv += fmt.Sprintf("\t\t%s\n", fragment) + } + } + } + } else { + rv = "No matches" + } + return rv +} diff --git a/search/base_test.go b/search/base_test.go index acbb76fe..7565ac68 100644 --- a/search/base_test.go +++ b/search/base_test.go @@ -10,7 +10,10 @@ package search import ( "math" + "regexp" + "github.com/couchbaselabs/bleve/analysis" + "github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer" "github.com/couchbaselabs/bleve/document" "github.com/couchbaselabs/bleve/index" "github.com/couchbaselabs/bleve/index/store/inmem" @@ -27,6 +30,11 @@ func init() { } } +// create a simpler analyzer which will support these tests +var testAnalyzer = &analysis.Analyzer{ + Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)), +} + // sets up some mock data used in many tests in this package var twoDocIndexDescIndexingOptions = document.DEFAULT_TEXT_INDEXING_OPTIONS | document.INCLUDE_TERM_VECTORS @@ -34,28 +42,28 @@ var twoDocIndexDocs = []*document.Document{ // must have 4/4 beer document.NewDocument("1"). AddField(document.NewTextField("name", []byte("marty"))). - AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions)). - AddField(document.NewTextField("street", []byte("couchbase way"))), + AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)). + AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)), // must have 1/4 beer document.NewDocument("2"). AddField(document.NewTextField("name", []byte("steve"))). - AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions)). - AddField(document.NewTextField("street", []byte("couchbase way"))). - AddField(document.NewTextField("title", []byte("mister"))), + AddField(document.NewTextFieldCustom("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)). + AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)). + AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)), // must have 1/4 beer document.NewDocument("3"). AddField(document.NewTextField("name", []byte("dustin"))). - AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions)). - AddField(document.NewTextField("title", []byte("mister"))), + AddField(document.NewTextFieldCustom("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)). + AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)), // must have 65/65 beer document.NewDocument("4"). AddField(document.NewTextField("name", []byte("ravi"))). - AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions)), + AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)), // must have 0/x beer document.NewDocument("5"). AddField(document.NewTextField("name", []byte("bobert"))). - AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("water"), twoDocIndexDescIndexingOptions)). - AddField(document.NewTextField("title", []byte("mister"))), + AddField(document.NewTextFieldCustom("desc", []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)). + AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)), } func scoresCloseEnough(a, b float64) bool { diff --git a/search/collector_top_score.go b/search/collector_top_score.go index 01acfbea..e54a6608 100644 --- a/search/collector_top_score.go +++ b/search/collector_top_score.go @@ -15,6 +15,7 @@ import ( type TopScoreCollector struct { k int + skip int results *list.List took time.Duration maxScore float64 @@ -24,6 +25,15 @@ type TopScoreCollector struct { func NewTopScorerCollector(k int) *TopScoreCollector { return &TopScoreCollector{ k: k, + skip: 0, + results: list.New(), + } +} + +func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector { + return &TopScoreCollector{ + k: k, + skip: skip, results: list.New(), } } @@ -70,7 +80,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) { tksc.results.InsertBefore(dm, e) // if we just made the list too long - if tksc.results.Len() > tksc.k { + if tksc.results.Len() > (tksc.k + tksc.skip) { // remove the head tksc.results.Remove(tksc.results.Front()) } @@ -79,18 +89,26 @@ func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) { } // if we got to the end, we still have to add it tksc.results.PushBack(dm) - if tksc.results.Len() > tksc.k { + if tksc.results.Len() > (tksc.k + tksc.skip) { // remove the head tksc.results.Remove(tksc.results.Front()) } } func (tksc *TopScoreCollector) Results() DocumentMatchCollection { - rv := make(DocumentMatchCollection, tksc.results.Len()) - i := 0 - for e := tksc.results.Back(); e != nil; e = e.Prev() { - rv[i] = e.Value.(*DocumentMatch) - i++ + if tksc.results.Len()-tksc.skip > 0 { + rv := make(DocumentMatchCollection, tksc.results.Len()-tksc.skip) + i := 0 + skipped := 0 + for e := tksc.results.Back(); e != nil; e = e.Prev() { + if skipped < tksc.skip { + skipped++ + continue + } + rv[i] = e.Value.(*DocumentMatch) + i++ + } + return rv } - return rv + return DocumentMatchCollection{} } diff --git a/search/collector_top_score_test.go b/search/collector_top_score_test.go index 917e8acd..fc5dd660 100644 --- a/search/collector_top_score_test.go +++ b/search/collector_top_score_test.go @@ -116,3 +116,97 @@ func TestTop10Scores(t *testing.T) { t.Errorf("expected minimum score to be higher than 10, got %f", minScore) } } + +func TestTop10ScoresSkip10(t *testing.T) { + + // a stub search with more than 10 matches + // the top-10 scores are > 10 + // everything else is less than 10 + searcher := &stubSearcher{ + matches: DocumentMatchCollection{ + &DocumentMatch{ + ID: "a", + Score: 11, + }, + &DocumentMatch{ + ID: "b", + Score: 9.5, + }, + &DocumentMatch{ + ID: "c", + Score: 11, + }, + &DocumentMatch{ + ID: "d", + Score: 9, + }, + &DocumentMatch{ + ID: "e", + Score: 11, + }, + &DocumentMatch{ + ID: "f", + Score: 9, + }, + &DocumentMatch{ + ID: "g", + Score: 11, + }, + &DocumentMatch{ + ID: "h", + Score: 9, + }, + &DocumentMatch{ + ID: "i", + Score: 11, + }, + &DocumentMatch{ + ID: "j", + Score: 11, + }, + &DocumentMatch{ + ID: "k", + Score: 11, + }, + &DocumentMatch{ + ID: "l", + Score: 99, + }, + &DocumentMatch{ + ID: "m", + Score: 11, + }, + &DocumentMatch{ + ID: "n", + Score: 11, + }, + }, + } + + collector := NewTopScorerSkipCollector(10, 10) + collector.Collect(searcher) + + maxScore := collector.MaxScore() + if maxScore != 99.0 { + t.Errorf("expected max score 99.0, got %f", maxScore) + } + + total := collector.Total() + if total != 14 { + t.Errorf("expected 14 total results, got %d", total) + } + + results := collector.Results() + + if len(results) != 4 { + t.Fatalf("expected 4 results, got %d", len(results)) + } + + if results[0].ID != "b" { + t.Errorf("expected first result to have ID 'b', got %s", results[0].ID) + } + + if results[0].Score != 9.5 { + t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score) + } +} diff --git a/search/fragment_formatter_ansi.go b/search/fragment_formatter_ansi.go index 699ed33b..2e19989e 100644 --- a/search/fragment_formatter_ansi.go +++ b/search/fragment_formatter_ansi.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import () diff --git a/search/fragment_formatter_html.go b/search/fragment_formatter_html.go index df63ad85..75e3ebda 100644 --- a/search/fragment_formatter_html.go +++ b/search/fragment_formatter_html.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import () diff --git a/search/fragment_formatter_html_test.go b/search/fragment_formatter_html_test.go index af95faa6..a2b8a0dc 100644 --- a/search/fragment_formatter_html_test.go +++ b/search/fragment_formatter_html_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( diff --git a/search/fragment_scorer_simple.go b/search/fragment_scorer_simple.go index 71374b87..9b17b168 100644 --- a/search/fragment_scorer_simple.go +++ b/search/fragment_scorer_simple.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import () diff --git a/search/fragment_scorer_simple_test.go b/search/fragment_scorer_simple_test.go index 3b9e6ed3..7ba692dd 100644 --- a/search/fragment_scorer_simple_test.go +++ b/search/fragment_scorer_simple_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( diff --git a/search/fragmenter_simple.go b/search/fragmenter_simple.go index 32d94c7e..e8f06469 100644 --- a/search/fragmenter_simple.go +++ b/search/fragmenter_simple.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import () diff --git a/search/fragmenter_simple_test.go b/search/fragmenter_simple_test.go index 226075a9..32492aca 100644 --- a/search/fragmenter_simple_test.go +++ b/search/fragmenter_simple_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( diff --git a/search/highlighter.go b/search/highlighter.go index 2f2a0e13..375d5186 100644 --- a/search/highlighter.go +++ b/search/highlighter.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( diff --git a/search/highlighter_simple.go b/search/highlighter_simple.go index 601582ca..a7a7e209 100644 --- a/search/highlighter_simple.go +++ b/search/highlighter_simple.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( diff --git a/search/highlighter_simple_test.go b/search/highlighter_simple_test.go index e65d64eb..4e8dfff7 100644 --- a/search/highlighter_simple_test.go +++ b/search/highlighter_simple_test.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( diff --git a/search/query_boolean.go b/search/query_boolean.go deleted file mode 100644 index da018d9a..00000000 --- a/search/query_boolean.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package search - -import ( - "encoding/json" - "fmt" - - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index" -) - -type TermBooleanQuery struct { - Must *TermConjunctionQuery `json:"must,omitempty"` - MustNot *TermDisjunctionQuery `json:"must_not,omitempty"` - Should *TermDisjunctionQuery `json:"should,omitempty"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` - mapping document.Mapping -} - -func (q *TermBooleanQuery) Boost() float64 { - return q.BoostVal -} - -func (q *TermBooleanQuery) Searcher(index index.Index) (Searcher, error) { - return NewTermBooleanSearcher(index, q) -} - -func (q *TermBooleanQuery) Validate() error { - if q.Must == nil && q.Should == nil { - return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause") - } - if q.Must != nil && len(q.Must.Terms) == 0 && q.Should != nil && len(q.Should.Terms) == 0 { - return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause") - } - return nil -} - -func (q *TermBooleanQuery) UnmarshalJSON(data []byte) error { - tmp := struct { - Must json.RawMessage `json:"must,omitempty"` - MustNot json.RawMessage `json:"must_not,omitempty"` - Should json.RawMessage `json:"should,omitempty"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` - }{} - err := json.Unmarshal(data, &tmp) - if err != nil { - return err - } - - q.BoostVal = tmp.BoostVal - q.Explain = tmp.Explain - - var must TermConjunctionQuery - if len(tmp.Must) > 0 { - must.mapping = q.mapping - err = json.Unmarshal(tmp.Must, &must) - if err != nil { - return err - } - q.Must = &must - } - var mustNot TermDisjunctionQuery - if len(tmp.MustNot) > 0 { - mustNot.mapping = q.mapping - err = json.Unmarshal(tmp.MustNot, &mustNot) - if err != nil { - return err - } - q.MustNot = &mustNot - } - var should TermDisjunctionQuery - if len(tmp.Should) > 0 { - must.mapping = q.mapping - err = json.Unmarshal(tmp.Should, &should) - if err != nil { - return err - } - q.Should = &should - } - return nil -} diff --git a/search/query_match.go b/search/query_match.go deleted file mode 100644 index e2ce4f08..00000000 --- a/search/query_match.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package search - -import ( - "encoding/json" - - "github.com/couchbaselabs/bleve/analysis" - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index" -) - -type MatchQuery struct { - Match string `json:"match"` - Field string `json:"field,omitempty"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` - Analyzer *analysis.Analyzer - mapping document.Mapping -} - -func (q *MatchQuery) Boost() float64 { - return q.BoostVal -} - -func (q *MatchQuery) Searcher(index index.Index) (Searcher, error) { - tokens := q.Analyzer.Analyze([]byte(q.Match)) - if len(tokens) > 0 { - tqs := make([]Query, len(tokens)) - for i, token := range tokens { - tqs[i] = &TermQuery{ - Term: string(token.Term), - Field: q.Field, - BoostVal: q.BoostVal, - Explain: q.Explain, - } - } - boolQuery := &TermBooleanQuery{ - Should: &TermDisjunctionQuery{ - Terms: tqs, - BoostVal: q.BoostVal, - Explain: q.Explain, - Min: 1, - }, - BoostVal: q.BoostVal, - Explain: q.Explain, - } - return NewTermBooleanSearcher(index, boolQuery) - } else { - noneQuery := &MatchNoneQuery{BoostVal: q.BoostVal, Explain: q.Explain} - return NewMatchNoneSearcher(index, noneQuery) - } -} - -func (q *MatchQuery) Validate() error { - return nil -} - -func (q *MatchQuery) UnmarshalJSON(data []byte) error { - tmp := struct { - Match string `json:"match"` - Field string `json:"field,omitempty"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` - }{} - err := json.Unmarshal(data, &tmp) - if err != nil { - return err - } - q.Match = tmp.Match - q.Field = tmp.Field - q.BoostVal = tmp.BoostVal - q.Explain = tmp.Explain - q.Analyzer = q.mapping[q.Field].Analyzer - return nil -} diff --git a/search/query_match_phrase.go b/search/query_match_phrase.go deleted file mode 100644 index 5c09b9c5..00000000 --- a/search/query_match_phrase.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package search - -import ( - "encoding/json" - - "github.com/couchbaselabs/bleve/analysis" - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index" -) - -type MatchPhraseQuery struct { - MatchPhrase string `json:"match_phrase"` - Field string `json:"field,omitempty"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` - Analyzer *analysis.Analyzer - mapping document.Mapping -} - -func (q *MatchPhraseQuery) Boost() float64 { - return q.BoostVal -} - -func (q *MatchPhraseQuery) Searcher(index index.Index) (Searcher, error) { - tokens := q.Analyzer.Analyze([]byte(q.MatchPhrase)) - if len(tokens) > 0 { - maxPos := 0 - // find the highest position index - for _, token := range tokens { - if int(token.Position) > maxPos { - maxPos = int(token.Position) - } - } - // use tokens to build phrase - phraseTerms := make([]*TermQuery, maxPos) - for _, token := range tokens { - phraseTerms[int(token.Position)-1] = &TermQuery{ - Term: string(token.Term), - Field: q.Field, - BoostVal: q.BoostVal, - Explain: q.Explain, - } - } - phraseQuery := &PhraseQuery{ - Terms: phraseTerms, - BoostVal: q.BoostVal, - Explain: q.Explain, - } - return NewPhraseSearcher(index, phraseQuery) - } else { - noneQuery := &MatchNoneQuery{BoostVal: q.BoostVal, Explain: q.Explain} - return NewMatchNoneSearcher(index, noneQuery) - } -} - -func (q *MatchPhraseQuery) Validate() error { - return nil -} - -func (q *MatchPhraseQuery) UnmarshalJSON(data []byte) error { - tmp := struct { - MatchPhrase string `json:"match_phrase"` - Field string `json:"field,omitempty"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` - }{} - err := json.Unmarshal(data, &tmp) - if err != nil { - return err - } - q.MatchPhrase = tmp.MatchPhrase - q.Field = tmp.Field - q.BoostVal = tmp.BoostVal - q.Explain = tmp.Explain - q.Analyzer = q.mapping[q.Field].Analyzer - return nil -} diff --git a/search/query_phrase.go b/search/query_phrase.go deleted file mode 100644 index 4709b899..00000000 --- a/search/query_phrase.go +++ /dev/null @@ -1,28 +0,0 @@ -package search - -import ( - "fmt" - - "github.com/couchbaselabs/bleve/index" -) - -type PhraseQuery struct { - Terms []*TermQuery `json:"terms,omitempty"` - BoostVal float64 `json:"boost,omitempty"` - Explain bool `json:"explain,omitempty"` -} - -func (q *PhraseQuery) Boost() float64 { - return q.BoostVal -} - -func (q *PhraseQuery) Searcher(index index.Index) (Searcher, error) { - return NewPhraseSearcher(index, q) -} - -func (q *PhraseQuery) Validate() error { - if q.Terms == nil { - return fmt.Errorf("Phrase query must contain at least one term") - } - return nil -} diff --git a/search/query_syntax_parser.go b/search/query_syntax_parser.go deleted file mode 100644 index 94802ee2..00000000 --- a/search/query_syntax_parser.go +++ /dev/null @@ -1,81 +0,0 @@ -package search - -import ( - "fmt" - "strings" - "sync" - - "github.com/couchbaselabs/bleve/document" -) - -var crashHard = false -var parserMutex sync.Mutex -var parsingDefaultField string -var parsingMust bool -var parsingMustNot bool -var debugParser bool -var debugLexer bool - -var parsingMustList *TermConjunctionQuery -var parsingMustNotList *TermDisjunctionQuery -var parsingShouldList *TermDisjunctionQuery -var parsingMapping document.Mapping - -func ParseQuerySyntax(query string, mapping document.Mapping, defaultField string) (rq Query, err error) { - parserMutex.Lock() - defer parserMutex.Unlock() - - parsingMapping = mapping - parsingDefaultField = defaultField - - parsingMustList = &TermConjunctionQuery{ - Terms: make([]Query, 0), - BoostVal: 1.0, - Explain: true, - } - - parsingMustNotList = &TermDisjunctionQuery{ - Terms: make([]Query, 0), - BoostVal: 1.0, - Explain: true, - } - - parsingShouldList = &TermDisjunctionQuery{ - Terms: make([]Query, 0), - BoostVal: 1.0, - Explain: true, - Min: 1, - } - - defer func() { - r := recover() - if r != nil && r == "syntax error" { - // if we're panicing over a syntax error, chill - err = fmt.Errorf("Parse Error - %v", r) - } else if r != nil { - // otherise continue to panic - if crashHard { - panic(r) - } else { - err = fmt.Errorf("Other Error - %v", r) - } - } - }() - - yyParse(NewLexer(strings.NewReader(query))) - parsingQuery := &TermBooleanQuery{ - BoostVal: 1.0, - Explain: true, - } - if len(parsingMustList.Terms) > 0 { - parsingQuery.Must = parsingMustList - } - if len(parsingMustNotList.Terms) > 0 { - parsingQuery.MustNot = parsingMustNotList - } - if len(parsingShouldList.Terms) > 0 { - parsingQuery.Should = parsingShouldList - } - rq = parsingQuery - return rq, err -} diff --git a/search/query_syntax_parser_test.go b/search/query_syntax_parser_test.go deleted file mode 100644 index f9aa33c7..00000000 --- a/search/query_syntax_parser_test.go +++ /dev/null @@ -1,256 +0,0 @@ -package search - -import ( - "reflect" - "testing" - - "github.com/couchbaselabs/bleve/document" -) - -func TestQuerySyntaxParserValid(t *testing.T) { - - tests := []struct { - input string - result Query - mapping document.Mapping - }{ - { - input: "test", - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchQuery{ - Match: "test", - Field: "_all", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - Min: 1.0, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: `"test phrase 1"`, - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchPhraseQuery{ - MatchPhrase: "test phrase 1", - Field: "_all", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - Min: 1.0, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: "field:test", - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchQuery{ - Match: "test", - Field: "field", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - Min: 1.0, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: "+field1:test1", - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &MatchQuery{ - Match: "test1", - Field: "field1", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: "-field2:test2", - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchQuery{ - Match: "test2", - Field: "field2", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: `field3:"test phrase 2"`, - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchPhraseQuery{ - MatchPhrase: "test phrase 2", - Field: "field3", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - Min: 1.0, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: `+field4:"test phrase 1"`, - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &MatchPhraseQuery{ - MatchPhrase: "test phrase 1", - Field: "field4", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: `-field5:"test phrase 2"`, - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchPhraseQuery{ - MatchPhrase: "test phrase 2", - Field: "field5", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - { - input: `+field6:test3 -field7:test4 field8:test5`, - mapping: document.Mapping{}, - result: &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &MatchQuery{ - Match: "test3", - Field: "field6", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - }, - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchQuery{ - Match: "test4", - Field: "field7", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - }, - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &MatchQuery{ - Match: "test5", - Field: "field8", - BoostVal: 1.0, - Explain: true, - }, - }, - BoostVal: 1.0, - Explain: true, - Min: 1.0, - }, - BoostVal: 1.0, - Explain: true, - }, - }, - } - parsingDefaultField = "_all" - for _, test := range tests { - q, err := ParseQuerySyntax(test.input, test.mapping, parsingDefaultField) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(q, test.result) { - t.Errorf("Expected %#v, got %#v: for %s", test.result, q, test.input) - for _, x := range q.(*TermBooleanQuery).Should.Terms { - t.Logf("term: %#v", x) - } - } - } -} - -func TestQuerySyntaxParserInvalid(t *testing.T) { - tests := []struct { - input string - }{ - {"^"}, - {"^5"}, - } - - for _, test := range tests { - _, err := ParseQuerySyntax(test.input, document.Mapping{}, "_all") - if err == nil { - t.Errorf("expected error, got nil for `%s`", test.input) - } - } -} diff --git a/search/query_term_conjunction.go b/search/query_term_conjunction.go deleted file mode 100644 index 7b0e18af..00000000 --- a/search/query_term_conjunction.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package search - -import ( - "encoding/json" - - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index" -) - -type TermConjunctionQuery struct { - Terms []Query `json:"terms"` - BoostVal float64 `json:"boost"` - Explain bool `json:"explain"` - mapping document.Mapping -} - -func (q *TermConjunctionQuery) Boost() float64 { - return q.BoostVal -} - -func (q *TermConjunctionQuery) Searcher(index index.Index) (Searcher, error) { - return NewTermConjunctionSearcher(index, q) -} - -func (q *TermConjunctionQuery) Validate() error { - return nil -} - -func (q *TermConjunctionQuery) UnmarshalJSON(data []byte) error { - tmp := struct { - Terms []json.RawMessage `json:"terms"` - BoostVal float64 `json:"boost"` - Explain bool `json:"explain"` - }{} - err := json.Unmarshal(data, &tmp) - if err != nil { - return err - } - q.Terms = make([]Query, len(tmp.Terms)) - for i, term := range tmp.Terms { - query, err := ParseQuery(term, q.mapping) - if err != nil { - return err - } - q.Terms[i] = query - } - q.BoostVal = tmp.BoostVal - q.Explain = tmp.Explain - return nil -} diff --git a/search/query_term_disjunction.go b/search/query_term_disjunction.go deleted file mode 100644 index 69d24137..00000000 --- a/search/query_term_disjunction.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package search - -import ( - "encoding/json" - "fmt" - - "github.com/couchbaselabs/bleve/document" - "github.com/couchbaselabs/bleve/index" -) - -type TermDisjunctionQuery struct { - Terms []Query `json:"terms"` - BoostVal float64 `json:"boost"` - Explain bool `json:"explain"` - Min float64 `json:"min"` - mapping document.Mapping -} - -func (q *TermDisjunctionQuery) Boost() float64 { - return q.BoostVal -} - -func (q *TermDisjunctionQuery) Searcher(index index.Index) (Searcher, error) { - return NewTermDisjunctionSearcher(index, q) -} - -func (q *TermDisjunctionQuery) Validate() error { - if int(q.Min) > len(q.Terms) { - return fmt.Errorf("Minimum clauses in disjunction exceeds total number of clauses") - } - return nil -} - -func (q *TermDisjunctionQuery) UnmarshalJSON(data []byte) error { - tmp := struct { - Terms []json.RawMessage `json:"terms"` - BoostVal float64 `json:"boost"` - Explain bool `json:"explain"` - Min float64 `json:"min"` - }{} - err := json.Unmarshal(data, &tmp) - if err != nil { - return err - } - q.Terms = make([]Query, len(tmp.Terms)) - for i, term := range tmp.Terms { - query, err := ParseQuery(term, q.mapping) - if err != nil { - return err - } - q.Terms[i] = query - } - q.BoostVal = tmp.BoostVal - q.Explain = tmp.Explain - q.Min = tmp.Min - return nil -} diff --git a/search/scorer_contsant.go b/search/scorer_contsant.go index 6eaeb4a2..1f374642 100644 --- a/search/scorer_contsant.go +++ b/search/scorer_contsant.go @@ -14,26 +14,26 @@ import ( type ConstantScorer struct { constant float64 - query Query + boost float64 explain bool queryNorm float64 queryWeight float64 queryWeightExplanation *Explanation } -func NewConstantScorer(query Query, constant float64, explain bool) *ConstantScorer { +func NewConstantScorer(constant float64, boost float64, explain bool) *ConstantScorer { rv := ConstantScorer{ - query: query, explain: explain, queryWeight: 1.0, constant: constant, + boost: boost, } return &rv } func (s *ConstantScorer) Weight() float64 { - sum := s.query.Boost() + sum := s.boost return sum * sum } @@ -41,12 +41,12 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) { s.queryNorm = qnorm // update the query weight - s.queryWeight = s.query.Boost() * s.queryNorm + s.queryWeight = s.boost * s.queryNorm if s.explain { childrenExplanations := make([]*Explanation, 2) childrenExplanations[0] = &Explanation{ - Value: s.query.Boost(), + Value: s.boost, Message: "boost", } childrenExplanations[1] = &Explanation{ @@ -55,7 +55,7 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) { } s.queryWeightExplanation = &Explanation{ Value: s.queryWeight, - Message: fmt.Sprintf("ConstantScore()^%f, product of:", s.query.Boost()), + Message: fmt.Sprintf("ConstantScore()^%f, product of:", s.boost), Children: childrenExplanations, } } @@ -82,7 +82,7 @@ func (s *ConstantScorer) Score(id string) *DocumentMatch { childExplanations[1] = scoreExplanation scoreExplanation = &Explanation{ Value: score, - Message: fmt.Sprintf("weight(^%f), product of:", s.query.Boost()), + Message: fmt.Sprintf("weight(^%f), product of:", s.boost), Children: childExplanations, } } diff --git a/search/scorer_term.go b/search/scorer_term.go index 62f1e8f4..1b0009aa 100644 --- a/search/scorer_term.go +++ b/search/scorer_term.go @@ -18,7 +18,9 @@ import ( const MAX_SCORE_CACHE = 64 type TermQueryScorer struct { - query *TermQuery + queryTerm string + queryField string + queryBoost float64 docTerm uint64 docTotal uint64 idf float64 @@ -29,9 +31,11 @@ type TermQueryScorer struct { queryWeightExplanation *Explanation } -func NewTermQueryScorer(query *TermQuery, docTotal, docTerm uint64, explain bool) *TermQueryScorer { +func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, explain bool) *TermQueryScorer { rv := TermQueryScorer{ - query: query, + queryTerm: queryTerm, + queryField: queryField, + queryBoost: queryBoost, docTerm: docTerm, docTotal: docTotal, idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), @@ -50,7 +54,7 @@ func NewTermQueryScorer(query *TermQuery, docTotal, docTerm uint64, explain bool } func (s *TermQueryScorer) Weight() float64 { - sum := s.query.Boost() * s.idf + sum := s.queryBoost * s.idf return sum * sum } @@ -58,12 +62,12 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { s.queryNorm = qnorm // update the query weight - s.queryWeight = s.query.Boost() * s.idf * s.queryNorm + s.queryWeight = s.queryBoost * s.idf * s.queryNorm if s.explain { childrenExplanations := make([]*Explanation, 3) childrenExplanations[0] = &Explanation{ - Value: s.query.Boost(), + Value: s.queryBoost, Message: "boost", } childrenExplanations[1] = s.idfExplanation @@ -73,7 +77,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { } s.queryWeightExplanation = &Explanation{ Value: s.queryWeight, - Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.query.Field, string(s.query.Term), s.query.Boost()), + Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost), Children: childrenExplanations, } } @@ -95,16 +99,16 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *DocumentMatch { childrenExplanations := make([]*Explanation, 3) childrenExplanations[0] = &Explanation{ Value: tf, - Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.query.Field, string(s.query.Term), termMatch.Freq), + Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq), } childrenExplanations[1] = &Explanation{ Value: termMatch.Norm, - Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.query.Field, termMatch.ID), + Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), } childrenExplanations[2] = s.idfExplanation scoreExplanation = &Explanation{ Value: score, - Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.query.Field, string(s.query.Term), termMatch.ID), + Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID), Children: childrenExplanations, } } @@ -118,7 +122,7 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *DocumentMatch { childExplanations[1] = scoreExplanation scoreExplanation = &Explanation{ Value: score, - Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.query.Field, string(s.query.Term), s.query.Boost(), termMatch.ID), + Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID), Children: childExplanations, } } @@ -147,14 +151,14 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *DocumentMatch { End: float64(v.End), } - locations := tlm[s.query.Term] + locations := tlm[s.queryTerm] if locations == nil { locations = make(Locations, 1) locations[0] = &loc } else { locations = append(locations, &loc) } - tlm[s.query.Term] = locations + tlm[s.queryTerm] = locations rv.Locations[v.Field] = tlm } diff --git a/search/scorer_term_test.go b/search/scorer_term_test.go index c305302e..74ec2a59 100644 --- a/search/scorer_term_test.go +++ b/search/scorer_term_test.go @@ -18,16 +18,12 @@ import ( func TestTermScorer(t *testing.T) { - query := TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - } - var docTotal uint64 = 100 var docTerm uint64 = 9 - scorer := NewTermQueryScorer(&query, docTotal, docTerm, true) + var queryTerm = "beer" + var queryField = "desc" + var queryBoost = 1.0 + scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true) idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)) tests := []struct { @@ -157,16 +153,12 @@ func TestTermScorer(t *testing.T) { func TestTermScorerWithQueryNorm(t *testing.T) { - query := TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 3.0, - Explain: true, - } - var docTotal uint64 = 100 var docTerm uint64 = 9 - scorer := NewTermQueryScorer(&query, docTotal, docTerm, true) + var queryTerm = "beer" + var queryField = "desc" + var queryBoost = 3.0 + scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true) idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)) scorer.SetQueryNorm(2.0) diff --git a/search/search_boolean.go b/search/search_boolean.go index 3a9ea3ac..6aec9e54 100644 --- a/search/search_boolean.go +++ b/search/search_boolean.go @@ -28,41 +28,17 @@ type TermBooleanSearcher struct { scorer *TermConjunctionQueryScorer } -func NewTermBooleanSearcher(index index.Index, query *TermBooleanQuery) (*TermBooleanSearcher, error) { - // build the downstream searchres - var err error - var mustSearcher *TermConjunctionSearcher - if query.Must != nil { - mustSearcher, err = NewTermConjunctionSearcher(index, query.Must) - if err != nil { - return nil, err - } - } - var shouldSearcher *TermDisjunctionSearcher - if query.Should != nil { - shouldSearcher, err = NewTermDisjunctionSearcher(index, query.Should) - if err != nil { - return nil, err - } - } - var mustNotSearcher *TermDisjunctionSearcher - if query.MustNot != nil { - mustNotSearcher, err = NewTermDisjunctionSearcher(index, query.MustNot) - if err != nil { - return nil, err - } - } - +func NewTermBooleanSearcher(index index.Index, mustSearcher *TermConjunctionSearcher, shouldSearcher *TermDisjunctionSearcher, mustNotSearcher *TermDisjunctionSearcher, explain bool) (*TermBooleanSearcher, error) { // build our searcher rv := TermBooleanSearcher{ index: index, mustSearcher: mustSearcher, shouldSearcher: shouldSearcher, mustNotSearcher: mustNotSearcher, - scorer: NewTermConjunctionQueryScorer(query.Explain), + scorer: NewTermConjunctionQueryScorer(explain), } rv.computeQueryNorm() - err = rv.initSearchers() + err := rv.initSearchers() if err != nil { return nil, err } diff --git a/search/search_boolean_test.go b/search/search_boolean_test.go index 2e8409e8..6ee33b65 100644 --- a/search/search_boolean_test.go +++ b/search/search_boolean_test.go @@ -10,63 +10,225 @@ package search import ( "testing" - - "github.com/couchbaselabs/bleve/index" ) func TestTermBooleanSearch(t *testing.T) { + // test 0 + beerTermSearcher, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustSearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher}, true) + if err != nil { + t.Fatal(err) + } + martyTermSearcher, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher, err := NewTermSearcher(twoDocIndex, "dustin", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + shouldSearcher, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher, dustinTermSearcher}, 0, true) + if err != nil { + t.Fatal(err) + } + steveTermSearcher, err := NewTermSearcher(twoDocIndex, "steve", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustNotSearcher, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{steveTermSearcher}, 0, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher, err := NewTermBooleanSearcher(twoDocIndex, mustSearcher, shouldSearcher, mustNotSearcher, true) + if err != nil { + t.Fatal(err) + } + + // test 1 + martyTermSearcher2, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher2, err := NewTermSearcher(twoDocIndex, "dustin", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + shouldSearcher2, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true) + if err != nil { + t.Fatal(err) + } + steveTermSearcher2, err := NewTermSearcher(twoDocIndex, "steve", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustNotSearcher2, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{steveTermSearcher2}, 0, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher2, err := NewTermBooleanSearcher(twoDocIndex, nil, shouldSearcher2, mustNotSearcher2, true) + if err != nil { + t.Fatal(err) + } + + // test 2 + steveTermSearcher3, err := NewTermSearcher(twoDocIndex, "steve", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustNotSearcher3, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{steveTermSearcher3}, 0, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher3, err := NewTermBooleanSearcher(twoDocIndex, nil, nil, mustNotSearcher3, true) + if err != nil { + t.Fatal(err) + } + + // test 3 + beerTermSearcher4, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustSearcher4, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher4}, true) + if err != nil { + t.Fatal(err) + } + steveTermSearcher4, err := NewTermSearcher(twoDocIndex, "steve", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustNotSearcher4, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{steveTermSearcher4}, 0, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher4, err := NewTermBooleanSearcher(twoDocIndex, mustSearcher4, nil, mustNotSearcher4, true) + if err != nil { + t.Fatal(err) + } + + // test 4 + beerTermSearcher5, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustSearcher5, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher5}, true) + if err != nil { + t.Fatal(err) + } + steveTermSearcher5, err := NewTermSearcher(twoDocIndex, "steve", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + martyTermSearcher5, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustNotSearcher5, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{steveTermSearcher5, martyTermSearcher5}, 0, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher5, err := NewTermBooleanSearcher(twoDocIndex, mustSearcher5, nil, mustNotSearcher5, true) + if err != nil { + t.Fatal(err) + } + + // test 5 + beerTermSearcher6, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustSearcher6, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher6}, true) + if err != nil { + t.Fatal(err) + } + martyTermSearcher6, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher6, err := NewTermSearcher(twoDocIndex, "dustin", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + shouldSearcher6, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher6, err := NewTermBooleanSearcher(twoDocIndex, mustSearcher6, shouldSearcher6, nil, true) + if err != nil { + t.Fatal(err) + } + + // test 6 + beerTermSearcher7, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustSearcher7, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher7}, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher7, err := NewTermBooleanSearcher(twoDocIndex, mustSearcher7, nil, nil, true) + if err != nil { + t.Fatal(err) + } + martyTermSearcher7, err := NewTermSearcher(twoDocIndex, "marty", "name", 5.0, true) + if err != nil { + t.Fatal(err) + } + conjunctionSearcher7, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher7, booleanSearcher7}, true) + + // test 7 + beerTermSearcher8, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustSearcher8, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher8}, true) + if err != nil { + t.Fatal(err) + } + martyTermSearcher8, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher8, err := NewTermSearcher(twoDocIndex, "dustin", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + shouldSearcher8, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, true) + if err != nil { + t.Fatal(err) + } + steveTermSearcher8, err := NewTermSearcher(twoDocIndex, "steve", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustNotSearcher8, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{steveTermSearcher8}, 0, true) + if err != nil { + t.Fatal(err) + } + booleanSearcher8, err := NewTermBooleanSearcher(twoDocIndex, mustSearcher8, shouldSearcher8, mustNotSearcher8, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher8a, err := NewTermSearcher(twoDocIndex, "dustin", "name", 5.0, true) + if err != nil { + t.Fatal(err) + } + conjunctionSearcher8, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{booleanSearcher8, dustinTermSearcher8a}, true) + if err != nil { + t.Fatal(err) + } + tests := []struct { - index index.Index - query Query - results []*DocumentMatch + searcher Searcher + results []*DocumentMatch }{ { - index: twoDocIndex, - query: &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "steve", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - Explain: true, - }, + searcher: booleanSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "1", @@ -83,40 +245,7 @@ func TestTermBooleanSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermBooleanQuery{ - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "steve", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - Explain: true, - }, + searcher: booleanSearcher2, results: []*DocumentMatch{ &DocumentMatch{ ID: "1", @@ -130,52 +259,11 @@ func TestTermBooleanSearch(t *testing.T) { }, // no MUST or SHOULD clauses yields no results { - index: twoDocIndex, - query: &TermBooleanQuery{ - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "steve", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - Explain: true, - }, - results: []*DocumentMatch{}, + searcher: booleanSearcher3, + results: []*DocumentMatch{}, }, { - index: twoDocIndex, - query: &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "steve", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - Explain: true, - }, + searcher: booleanSearcher4, results: []*DocumentMatch{ &DocumentMatch{ ID: "1", @@ -192,39 +280,7 @@ func TestTermBooleanSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "steve", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - Explain: true, - }, + searcher: booleanSearcher5, results: []*DocumentMatch{ &DocumentMatch{ ID: "3", @@ -237,69 +293,12 @@ func TestTermBooleanSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 2, - }, - Explain: true, - }, - results: []*DocumentMatch{}, + searcher: booleanSearcher6, + results: []*DocumentMatch{}, }, // test a conjunction query with a nested boolean { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, - Explain: true, - }, - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 5.0, - Explain: true, - }, - }, - Explain: true, - }, + searcher: conjunctionSearcher7, results: []*DocumentMatch{ &DocumentMatch{ ID: "1", @@ -308,62 +307,7 @@ func TestTermBooleanSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermBooleanQuery{ - Must: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, - Should: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - MustNot: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "steve", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 5.0, - Explain: true, - }, - }, - Explain: true, - }, + searcher: conjunctionSearcher8, results: []*DocumentMatch{ &DocumentMatch{ ID: "3", @@ -374,10 +318,9 @@ func TestTermBooleanSearch(t *testing.T) { } for testIndex, test := range tests { - searcher, err := test.query.Searcher(test.index) - defer searcher.Close() + defer test.searcher.Close() - next, err := searcher.Next() + next, err := test.searcher.Next() i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -389,7 +332,7 @@ func TestTermBooleanSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = searcher.Next() + next, err = test.searcher.Next() i++ } if err != nil { diff --git a/search/search_match_all.go b/search/search_match_all.go index 9813338a..ad7e353e 100644 --- a/search/search_match_all.go +++ b/search/search_match_all.go @@ -14,20 +14,18 @@ import ( type MatchAllSearcher struct { index index.Index - query *MatchAllQuery reader index.DocIdReader scorer *ConstantScorer } -func NewMatchAllSearcher(index index.Index, query *MatchAllQuery) (*MatchAllSearcher, error) { +func NewMatchAllSearcher(index index.Index, boost float64, explain bool) (*MatchAllSearcher, error) { reader, err := index.DocIdReader("", "") if err != nil { return nil, err } - scorer := NewConstantScorer(query, 1.0, query.Explain) + scorer := NewConstantScorer(1.0, boost, explain) return &MatchAllSearcher{ index: index, - query: query, reader: reader, scorer: scorer, }, nil diff --git a/search/search_match_all_test.go b/search/search_match_all_test.go index c375350b..772f89eb 100644 --- a/search/search_match_all_test.go +++ b/search/search_match_all_test.go @@ -10,24 +10,27 @@ package search import ( "testing" - - "github.com/couchbaselabs/bleve/index" ) func TestMatchAllSearch(t *testing.T) { + allSearcher, err := NewMatchAllSearcher(twoDocIndex, 1.0, true) + if err != nil { + t.Fatal(err) + } + + allSearcher2, err := NewMatchAllSearcher(twoDocIndex, 1.2, true) + if err != nil { + t.Fatal(err) + } + tests := []struct { - index index.Index - query Query + searcher Searcher queryNorm float64 results []*DocumentMatch }{ { - index: twoDocIndex, - query: &MatchAllQuery{ - BoostVal: 1.0, - Explain: true, - }, + searcher: allSearcher, queryNorm: 1.0, results: []*DocumentMatch{ &DocumentMatch{ @@ -53,11 +56,7 @@ func TestMatchAllSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &MatchAllQuery{ - BoostVal: 1.2, - Explain: true, - }, + searcher: allSearcher2, queryNorm: 0.8333333, results: []*DocumentMatch{ &DocumentMatch{ @@ -85,13 +84,13 @@ func TestMatchAllSearch(t *testing.T) { } for testIndex, test := range tests { - searcher, err := test.query.Searcher(test.index) - if test.queryNorm != 1.0 { - searcher.SetQueryNorm(test.queryNorm) - } - defer searcher.Close() - next, err := searcher.Next() + if test.queryNorm != 1.0 { + test.searcher.SetQueryNorm(test.queryNorm) + } + defer test.searcher.Close() + + next, err := test.searcher.Next() i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -103,7 +102,7 @@ func TestMatchAllSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = searcher.Next() + next, err = test.searcher.Next() i++ } if err != nil { diff --git a/search/search_match_none.go b/search/search_match_none.go index c3c6e530..a6030940 100644 --- a/search/search_match_none.go +++ b/search/search_match_none.go @@ -14,13 +14,11 @@ import ( type MatchNoneSearcher struct { index index.Index - query *MatchNoneQuery } -func NewMatchNoneSearcher(index index.Index, query *MatchNoneQuery) (*MatchNoneSearcher, error) { +func NewMatchNoneSearcher(index index.Index) (*MatchNoneSearcher, error) { return &MatchNoneSearcher{ index: index, - query: query, }, nil } diff --git a/search/search_match_none_test.go b/search/search_match_none_test.go index 7f4e6a9c..13247116 100644 --- a/search/search_match_none_test.go +++ b/search/search_match_none_test.go @@ -10,31 +10,29 @@ package search import ( "testing" - - "github.com/couchbaselabs/bleve/index" ) func TestMatchNoneSearch(t *testing.T) { + noneSearcher, err := NewMatchNoneSearcher(twoDocIndex) + if err != nil { + t.Fatal(err) + } + tests := []struct { - index index.Index - query Query - results []*DocumentMatch + searcher Searcher + results []*DocumentMatch }{ { - index: twoDocIndex, - query: &MatchNoneQuery{ - Explain: true, - }, - results: []*DocumentMatch{}, + searcher: noneSearcher, + results: []*DocumentMatch{}, }, } for testIndex, test := range tests { - searcher, err := test.query.Searcher(test.index) - defer searcher.Close() + defer test.searcher.Close() - next, err := searcher.Next() + next, err := test.searcher.Next() i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -46,7 +44,7 @@ func TestMatchNoneSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = searcher.Next() + next, err = test.searcher.Next() i++ } if err != nil { diff --git a/search/search_phrase.go b/search/search_phrase.go index a413ebbd..0c9e57af 100644 --- a/search/search_phrase.go +++ b/search/search_phrase.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( @@ -7,46 +15,24 @@ import ( ) type PhraseSearcher struct { - query *PhraseQuery index index.Index mustSearcher *TermConjunctionSearcher queryNorm float64 currMust *DocumentMatch slop int + terms []string } -func NewPhraseSearcher(index index.Index, query *PhraseQuery) (*PhraseSearcher, error) { - // build the downstream searchres - var err error - var mustSearcher *TermConjunctionSearcher - - if query.Terms != nil { - qterms := make([]Query, 0, len(query.Terms)) - for _, qt := range query.Terms { - if qt != nil { - qterms = append(qterms, qt) - } - } - tcq := TermConjunctionQuery{ - Terms: qterms, - BoostVal: 1.0, - Explain: query.Explain, - } - - mustSearcher, err = NewTermConjunctionSearcher(index, &tcq) - if err != nil { - return nil, err - } - } +func NewPhraseSearcher(index index.Index, mustSearcher *TermConjunctionSearcher, terms []string) (*PhraseSearcher, error) { // build our searcher rv := PhraseSearcher{ index: index, - query: query, mustSearcher: mustSearcher, + terms: terms, } rv.computeQueryNorm() - err = rv.initSearchers() + err := rv.initSearchers() if err != nil { return nil, err } @@ -111,26 +97,26 @@ func (s *PhraseSearcher) Next() (*DocumentMatch, error) { for s.currMust != nil { rvftlm := make(FieldTermLocationMap, 0) freq := 0 - firstTerm := s.query.Terms[0] + firstTerm := s.terms[0] for field, termLocMap := range s.currMust.Locations { rvtlm := make(TermLocationMap, 0) - locations, ok := termLocMap[firstTerm.Term] + locations, ok := termLocMap[firstTerm] if ok { OUTER: for _, location := range locations { crvtlm := make(TermLocationMap, 0) INNER: - for i := 0; i < len(s.query.Terms); i++ { - nextTerm := s.query.Terms[i] - if nextTerm != nil { + for i := 0; i < len(s.mustSearcher.searchers); i++ { + nextTerm := s.terms[i] + if nextTerm != "" { // look through all this terms locations // to try and find the correct offsets - nextLocations, ok := termLocMap[nextTerm.Term] + nextLocations, ok := termLocMap[nextTerm] if ok { for _, nextLocation := range nextLocations { if nextLocation.Pos == location.Pos+float64(i) { // found a location match for this term - crvtlm.AddLocation(nextTerm.Term, nextLocation) + crvtlm.AddLocation(nextTerm, nextLocation) continue INNER } } diff --git a/search/search_phrase_test.go b/search/search_phrase_test.go index b71a6b38..7bd117b7 100644 --- a/search/search_phrase_test.go +++ b/search/search_phrase_test.go @@ -10,36 +10,33 @@ package search import ( "testing" - - "github.com/couchbaselabs/bleve/index" ) func TestPhraseSearch(t *testing.T) { + angstTermSearcher, err := NewTermSearcher(twoDocIndex, "angst", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + beerTermSearcher, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + mustSearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{angstTermSearcher, beerTermSearcher}, true) + if err != nil { + t.Fatal(err) + } + phraseSearcher, err := NewPhraseSearcher(twoDocIndex, mustSearcher, []string{"angst", "beer"}) + if err != nil { + t.Fatal(err) + } + tests := []struct { - index index.Index - query Query - results []*DocumentMatch + searcher Searcher + results []*DocumentMatch }{ { - index: twoDocIndex, - query: &PhraseQuery{ - Terms: []*TermQuery{ - &TermQuery{ - Term: "angst", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, + searcher: phraseSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "2", @@ -50,10 +47,9 @@ func TestPhraseSearch(t *testing.T) { } for testIndex, test := range tests { - searcher, err := test.query.Searcher(test.index) - defer searcher.Close() + defer test.searcher.Close() - next, err := searcher.Next() + next, err := test.searcher.Next() i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -65,7 +61,7 @@ func TestPhraseSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = searcher.Next() + next, err = test.searcher.Next() i++ } if err != nil { diff --git a/search/search_term.go b/search/search_term.go index 119068f6..6cabca96 100644 --- a/search/search_term.go +++ b/search/search_term.go @@ -13,23 +13,27 @@ import ( ) type TermSearcher struct { - index index.Index - query *TermQuery - reader index.TermFieldReader - scorer *TermQueryScorer + index index.Index + term string + field string + explain bool + reader index.TermFieldReader + scorer *TermQueryScorer } -func NewTermSearcher(index index.Index, query *TermQuery) (*TermSearcher, error) { - reader, err := index.TermFieldReader([]byte(query.Term), query.Field) +func NewTermSearcher(index index.Index, term string, field string, boost float64, explain bool) (*TermSearcher, error) { + reader, err := index.TermFieldReader([]byte(term), field) if err != nil { return nil, err } - scorer := NewTermQueryScorer(query, index.DocCount(), reader.Count(), query.Explain) + scorer := NewTermQueryScorer(term, field, boost, index.DocCount(), reader.Count(), explain) return &TermSearcher{ - index: index, - query: query, - reader: reader, - scorer: scorer, + index: index, + term: term, + field: field, + explain: explain, + reader: reader, + scorer: scorer, }, nil } diff --git a/search/search_term_conjunction.go b/search/search_term_conjunction.go index 375469a7..92e039b5 100644 --- a/search/search_term_conjunction.go +++ b/search/search_term_conjunction.go @@ -18,20 +18,17 @@ import ( type TermConjunctionSearcher struct { index index.Index searchers OrderedSearcherList + explain bool queryNorm float64 currs []*DocumentMatch currentId string scorer *TermConjunctionQueryScorer } -func NewTermConjunctionSearcher(index index.Index, query *TermConjunctionQuery) (*TermConjunctionSearcher, error) { +func NewTermConjunctionSearcher(index index.Index, qsearchers []Searcher, explain bool) (*TermConjunctionSearcher, error) { // build the downstream searchres - searchers := make(OrderedSearcherList, len(query.Terms)) - for i, termQuery := range query.Terms { - searcher, err := termQuery.Searcher(index) - if err != nil { - return nil, err - } + searchers := make(OrderedSearcherList, len(qsearchers)) + for i, searcher := range qsearchers { searchers[i] = searcher } // sort the searchers @@ -39,9 +36,10 @@ func NewTermConjunctionSearcher(index index.Index, query *TermConjunctionQuery) // build our searcher rv := TermConjunctionSearcher{ index: index, + explain: explain, searchers: searchers, currs: make([]*DocumentMatch, len(searchers)), - scorer: NewTermConjunctionQueryScorer(query.Explain), + scorer: NewTermConjunctionQueryScorer(explain), } rv.computeQueryNorm() err := rv.initSearchers() diff --git a/search/search_term_conjunction_test.go b/search/search_term_conjunction_test.go index e12b9c82..cafd0297 100644 --- a/search/search_term_conjunction_test.go +++ b/search/search_term_conjunction_test.go @@ -10,36 +10,108 @@ package search import ( "testing" - - "github.com/couchbaselabs/bleve/index" ) func TestTermConjunctionSearch(t *testing.T) { + // test 0 + beerTermSearcher, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + martyTermSearcher, err := NewTermSearcher(twoDocIndex, "marty", "name", 5.0, true) + if err != nil { + t.Fatal(err) + } + beerAndMartySearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher, martyTermSearcher}, true) + if err != nil { + t.Fatal(err) + } + + // test 1 + angstTermSearcher, err := NewTermSearcher(twoDocIndex, "angst", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + beerTermSearcher2, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + angstAndBeerSearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{angstTermSearcher, beerTermSearcher2}, true) + if err != nil { + t.Fatal(err) + } + + // test 2 + beerTermSearcher3, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + jackTermSearcher, err := NewTermSearcher(twoDocIndex, "jack", "name", 5.0, true) + if err != nil { + t.Fatal(err) + } + beerAndJackSearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher3, jackTermSearcher}, true) + if err != nil { + t.Fatal(err) + } + + // test 3 + beerTermSearcher4, err := NewTermSearcher(twoDocIndex, "beer", "desc", 1.0, true) + if err != nil { + t.Fatal(err) + } + misterTermSearcher, err := NewTermSearcher(twoDocIndex, "mister", "title", 5.0, true) + if err != nil { + t.Fatal(err) + } + beerAndMisterSearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher4, misterTermSearcher}, true) + if err != nil { + t.Fatal(err) + } + + // test 4 + couchbaseTermSearcher, err := NewTermSearcher(twoDocIndex, "couchbase", "street", 1.0, true) + if err != nil { + t.Fatal(err) + } + misterTermSearcher2, err := NewTermSearcher(twoDocIndex, "mister", "title", 5.0, true) + if err != nil { + t.Fatal(err) + } + couchbaseAndMisterSearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{couchbaseTermSearcher, misterTermSearcher2}, true) + if err != nil { + t.Fatal(err) + } + + // test 5 + beerTermSearcher5, err := NewTermSearcher(twoDocIndex, "beer", "desc", 5.0, true) + if err != nil { + t.Fatal(err) + } + couchbaseTermSearcher2, err := NewTermSearcher(twoDocIndex, "couchbase", "street", 1.0, true) + if err != nil { + t.Fatal(err) + } + misterTermSearcher3, err := NewTermSearcher(twoDocIndex, "mister", "title", 5.0, true) + if err != nil { + t.Fatal(err) + } + couchbaseAndMisterSearcher2, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{couchbaseTermSearcher2, misterTermSearcher3}, true) + if err != nil { + t.Fatal(err) + } + beerAndCouchbaseAndMisterSearcher, err := NewTermConjunctionSearcher(twoDocIndex, []Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, true) + if err != nil { + t.Fatal(err) + } + tests := []struct { - index index.Index - query *TermConjunctionQuery - results []*DocumentMatch + searcher Searcher + results []*DocumentMatch }{ { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 5.0, - Explain: true, - }, - }, - Explain: true, - }, + searcher: beerAndMartySearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "1", @@ -48,24 +120,7 @@ func TestTermConjunctionSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "angst", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - }, + searcher: angstAndBeerSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "2", @@ -74,45 +129,11 @@ func TestTermConjunctionSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "jack", - Field: "name", - BoostVal: 5.0, - Explain: true, - }, - }, - Explain: true, - }, - results: []*DocumentMatch{}, + searcher: beerAndJackSearcher, + results: []*DocumentMatch{}, }, { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "mister", - Field: "title", - BoostVal: 5.0, - Explain: true, - }, - }, - Explain: true, - }, + searcher: beerAndMisterSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "2", @@ -125,24 +146,7 @@ func TestTermConjunctionSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "couchbase", - Field: "street", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "mister", - Field: "title", - BoostVal: 5.0, - Explain: true, - }, - }, - Explain: true, - }, + searcher: couchbaseAndMisterSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "2", @@ -151,35 +155,7 @@ func TestTermConjunctionSearch(t *testing.T) { }, }, { - index: twoDocIndex, - query: &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 5.0, - Explain: true, - }, - &TermConjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "couchbase", - Field: "street", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "mister", - Field: "title", - BoostVal: 5.0, - Explain: true, - }, - }, - Explain: true, - }, - }, - Explain: true, - }, + searcher: beerAndCouchbaseAndMisterSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "2", @@ -190,10 +166,9 @@ func TestTermConjunctionSearch(t *testing.T) { } for testIndex, test := range tests { - searcher, err := NewTermConjunctionSearcher(test.index, test.query) - defer searcher.Close() + defer test.searcher.Close() - next, err := searcher.Next() + next, err := test.searcher.Next() i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -205,7 +180,7 @@ func TestTermConjunctionSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = searcher.Next() + next, err = test.searcher.Next() i++ } if err != nil { diff --git a/search/search_term_disjunction.go b/search/search_term_disjunction.go index 173dab7b..327b371e 100644 --- a/search/search_term_disjunction.go +++ b/search/search_term_disjunction.go @@ -25,14 +25,10 @@ type TermDisjunctionSearcher struct { min float64 } -func NewTermDisjunctionSearcher(index index.Index, query *TermDisjunctionQuery) (*TermDisjunctionSearcher, error) { +func NewTermDisjunctionSearcher(index index.Index, qsearchers []Searcher, min float64, explain bool) (*TermDisjunctionSearcher, error) { // build the downstream searchres - searchers := make(OrderedSearcherList, len(query.Terms)) - for i, termQuery := range query.Terms { - searcher, err := termQuery.Searcher(index) - if err != nil { - return nil, err - } + searchers := make(OrderedSearcherList, len(qsearchers)) + for i, searcher := range qsearchers { searchers[i] = searcher } // sort the searchers @@ -42,8 +38,8 @@ func NewTermDisjunctionSearcher(index index.Index, query *TermDisjunctionQuery) index: index, searchers: searchers, currs: make([]*DocumentMatch, len(searchers)), - scorer: NewTermDisjunctionQueryScorer(query.Explain), - min: query.Min, + scorer: NewTermDisjunctionQueryScorer(explain), + min: min, } rv.computeQueryNorm() err := rv.initSearchers() diff --git a/search/search_term_disjunction_test.go b/search/search_term_disjunction_test.go index 40308f9a..cae89566 100644 --- a/search/search_term_disjunction_test.go +++ b/search/search_term_disjunction_test.go @@ -10,37 +10,51 @@ package search import ( "testing" - - "github.com/couchbaselabs/bleve/index" ) func TestTermDisjunctionSearch(t *testing.T) { + martyTermSearcher, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher, err := NewTermSearcher(twoDocIndex, "dustin", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + martyOrDustinSearcher, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher, dustinTermSearcher}, 0, true) + if err != nil { + t.Fatal(err) + } + + martyTermSearcher2, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher2, err := NewTermSearcher(twoDocIndex, "dustin", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + martyOrDustinSearcher2, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true) + if err != nil { + t.Fatal(err) + } + + raviTermSearcher, err := NewTermSearcher(twoDocIndex, "ravi", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + nestedRaviOrMartyOrDustinSearcher, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, true) + if err != nil { + t.Fatal(err) + } + tests := []struct { - index index.Index - query Query - results []*DocumentMatch + searcher Searcher + results []*DocumentMatch }{ { - index: twoDocIndex, - query: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, + searcher: martyOrDustinSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "1", @@ -54,37 +68,7 @@ func TestTermDisjunctionSearch(t *testing.T) { }, // test a nested disjunction { - index: twoDocIndex, - query: &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "ravi", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, - }, - }, - Explain: true, - Min: 0, - }, + searcher: nestedRaviOrMartyOrDustinSearcher, results: []*DocumentMatch{ &DocumentMatch{ ID: "1", @@ -103,10 +87,10 @@ func TestTermDisjunctionSearch(t *testing.T) { } for testIndex, test := range tests { - searcher, err := test.query.Searcher(test.index) - defer searcher.Close() - next, err := searcher.Next() + defer test.searcher.Close() + + next, err := test.searcher.Next() i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -118,7 +102,7 @@ func TestTermDisjunctionSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = searcher.Next() + next, err = test.searcher.Next() i++ } if err != nil { @@ -131,27 +115,21 @@ func TestTermDisjunctionSearch(t *testing.T) { } func TestDisjunctionAdvance(t *testing.T) { - query := &TermDisjunctionQuery{ - Terms: []Query{ - &TermQuery{ - Term: "marty", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - &TermQuery{ - Term: "dustin", - Field: "name", - BoostVal: 1.0, - Explain: true, - }, - }, - Explain: true, - Min: 0, + + martyTermSearcher, err := NewTermSearcher(twoDocIndex, "marty", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + dustinTermSearcher, err := NewTermSearcher(twoDocIndex, "dustin", "name", 1.0, true) + if err != nil { + t.Fatal(err) + } + martyOrDustinSearcher, err := NewTermDisjunctionSearcher(twoDocIndex, []Searcher{martyTermSearcher, dustinTermSearcher}, 0, true) + if err != nil { + t.Fatal(err) } - searcher, err := query.Searcher(twoDocIndex) - match, err := searcher.Advance("3") + match, err := martyOrDustinSearcher.Advance("3") if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/search/search_term_test.go b/search/search_term_test.go index ecde6146..1b5fd4b2 100644 --- a/search/search_term_test.go +++ b/search/search_term_test.go @@ -19,12 +19,10 @@ import ( func TestTermSearcher(t *testing.T) { - query := TermQuery{ - Term: "beer", - Field: "desc", - BoostVal: 3.0, - Explain: true, - } + var queryTerm = "beer" + var queryField = "desc" + var queryBoost = 3.0 + var queryExplain = true inMemStore, _ := inmem.Open() i := upside_down.NewUpsideDownCouch(inMemStore) @@ -89,7 +87,7 @@ func TestTermSearcher(t *testing.T) { }, }) - searcher, err := NewTermSearcher(i, &query) + searcher, err := NewTermSearcher(i, queryTerm, queryField, queryBoost, queryExplain) if err != nil { t.Fatal(err) } diff --git a/search/term_locations.go b/search/term_locations.go index 95376f4d..5cea8e37 100644 --- a/search/term_locations.go +++ b/search/term_locations.go @@ -1,3 +1,11 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. package search import ( diff --git a/search/y.output b/search/y.output deleted file mode 100644 index e6f6aafe..00000000 --- a/search/y.output +++ /dev/null @@ -1,159 +0,0 @@ - -state 0 - $accept: .input $end - searchPrefix: . (5) - - PLUS shift 6 - MINUS shift 7 - . reduce 5 (src line 41) - - input goto 1 - searchParts goto 2 - searchPart goto 3 - searchPrefix goto 4 - searchMustMustNot goto 5 - -state 1 - $accept: input.$end - - $end accept - . error - - -state 2 - input: searchParts. (1) - - . reduce 1 (src line 21) - - -state 3 - searchParts: searchPart.searchParts - searchParts: searchPart. (3) - searchPrefix: . (5) - - $end reduce 3 (src line 30) - PLUS shift 6 - MINUS shift 7 - . reduce 5 (src line 41) - - searchParts goto 8 - searchPart goto 3 - searchPrefix goto 4 - searchMustMustNot goto 5 - -state 4 - searchPart: searchPrefix.searchBase searchSuffix - - STRING shift 10 - PHRASE shift 11 - . error - - searchBase goto 9 - -state 5 - searchPrefix: searchMustMustNot. (6) - - . reduce 6 (src line 44) - - -state 6 - searchMustMustNot: PLUS. (7) - - . reduce 7 (src line 50) - - -state 7 - searchMustMustNot: MINUS. (8) - - . reduce 8 (src line 55) - - -state 8 - searchParts: searchPart searchParts. (2) - - . reduce 2 (src line 26) - - -state 9 - searchPart: searchPrefix searchBase.searchSuffix - searchSuffix: . (14) - - BOOST shift 14 - . reduce 14 (src line 163) - - searchSuffix goto 12 - searchBoost goto 13 - -state 10 - searchBase: STRING. (9) - searchBase: STRING.COLON STRING - searchBase: STRING.COLON PHRASE - - COLON shift 15 - . reduce 9 (src line 61) - - -state 11 - searchBase: PHRASE. (10) - - . reduce 10 (src line 84) - - -state 12 - searchPart: searchPrefix searchBase searchSuffix. (4) - - . reduce 4 (src line 35) - - -state 13 - searchSuffix: searchBoost. (15) - - . reduce 15 (src line 167) - - -state 14 - searchBoost: BOOST.INT - - INT shift 16 - . error - - -state 15 - searchBase: STRING COLON.STRING - searchBase: STRING COLON.PHRASE - - STRING shift 17 - PHRASE shift 18 - . error - - -state 16 - searchBoost: BOOST INT. (13) - - . reduce 13 (src line 157) - - -state 17 - searchBase: STRING COLON STRING. (11) - - . reduce 11 (src line 107) - - -state 18 - searchBase: STRING COLON PHRASE. (12) - - . reduce 12 (src line 131) - - -12 terminals, 9 nonterminals -16 grammar rules, 19/2000 states -0 shift/reduce, 0 reduce/reduce conflicts reported -58 working sets used -memory: parser 13/30000 -0 extra closures -11 shift entries, 2 exceptions -9 goto entries -3 entries saved by goto default -Optimizer space used: output 18/30000 -18 table entries, 0 zero -maximum spread: 12, maximum offset: 12 diff --git a/shredder/json_shredder.go b/shredder/json_shredder.go deleted file mode 100644 index 56a96522..00000000 --- a/shredder/json_shredder.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package shredder - -import ( - "encoding/json" - "strconv" - - "github.com/couchbaselabs/bleve/document" -) - -// A simple automatic JSON shredder which parses the whole document body. -// Any strings found in the JSON are added as text fields - -type AutoJsonShredder struct { - options document.IndexingOptions -} - -func NewAutoJsonShredder() *AutoJsonShredder { - return &AutoJsonShredder{ - options: document.INDEX_FIELD, - } -} - -func NewAutoJsonShredderWithOptions(options document.IndexingOptions) *AutoJsonShredder { - return &AutoJsonShredder{ - options: options, - } -} - -func (s *AutoJsonShredder) Shred(id string, body []byte) (*document.Document, error) { - rv := document.NewDocument(id) - - var section interface{} - err := json.Unmarshal(body, §ion) - if err != nil { - return nil, err - } - - s.shredSection(rv, section, "") - - return rv, nil -} - -func (s *AutoJsonShredder) shredSection(doc *document.Document, section interface{}, parent string) { - nextParent := parent - if nextParent != "" { - nextParent = nextParent + "." - } - switch section := section.(type) { - - case string: - f := document.NewTextFieldWithIndexingOptions(parent, []byte(section), s.options) - doc.AddField(f) - - case []interface{}: - for i, sub := range section { - s.shredSection(doc, sub, nextParent+strconv.Itoa(i)) - } - - case map[string]interface{}: - for k, sub := range section { - s.shredSection(doc, sub, nextParent+k) - } - } -} diff --git a/shredder/jsonpointer_shredder.go b/shredder/jsonpointer_shredder.go deleted file mode 100644 index 6f2a6039..00000000 --- a/shredder/jsonpointer_shredder.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -package shredder - -import ( - "bytes" - - "github.com/couchbaselabs/bleve/analysis" - "github.com/couchbaselabs/bleve/document" - - "github.com/dustin/go-jsonpointer" -) - -// A simple automatic JSON shredder which parses the whole document body. -// Any strings found in the JSON are added as text fields - -type JsonPointerShredder struct { - fieldPaths map[string]string - paths []string - analyzers map[string]*analysis.Analyzer - options map[string]document.IndexingOptions -} - -func NewJsonPointerShredder() *JsonPointerShredder { - return &JsonPointerShredder{ - fieldPaths: make(map[string]string), - paths: make([]string, 0), - analyzers: make(map[string]*analysis.Analyzer), - options: make(map[string]document.IndexingOptions), - } -} - -func (s *JsonPointerShredder) AddTextField(name string, path string) { - s.fieldPaths[name] = path - s.paths = append(s.paths, path) -} - -func (s *JsonPointerShredder) AddFieldCustom(name string, path string, options document.IndexingOptions, analyzer *analysis.Analyzer) { - s.fieldPaths[name] = path - s.analyzers[name] = analyzer - s.options[name] = options - s.paths = append(s.paths, path) -} - -func (s *JsonPointerShredder) Shred(id string, body []byte) (*document.Document, error) { - rv := document.NewDocument(id) - - values, err := jsonpointer.FindMany(body, s.paths) - if err != nil { - return nil, err - } - - for fieldName, fieldPath := range s.fieldPaths { - fieldValue := bytes.TrimSpace(values[fieldPath]) - if bytes.HasPrefix(fieldValue, []byte{'"'}) { - fieldValue = fieldValue[1:] - } - if bytes.HasSuffix(fieldValue, []byte{'"'}) { - fieldValue = fieldValue[:len(fieldValue)-1] - } - analyzer, custom := s.analyzers[fieldName] - if custom { - options := s.options[fieldName] - field := document.NewTextFieldCustom(fieldName, fieldValue, options, analyzer) - rv.AddField(field) - } else { - field := document.NewTextField(fieldName, fieldValue) - rv.AddField(field) - } - } - - return rv, nil -} diff --git a/utils/bleve_dump/main.go b/utils/bleve_dump/main.go index fff3b0d5..c240d05d 100644 --- a/utils/bleve_dump/main.go +++ b/utils/bleve_dump/main.go @@ -34,5 +34,9 @@ func main() { } defer index.Close() - index.Dump() + if !*fieldsOnly { + index.Dump() + } else { + index.DumpFields() + } } diff --git a/search/y.go b/y.go similarity index 69% rename from search/y.go rename to y.go index b6a21b4e..ddd9d968 100644 --- a/search/y.go +++ b/y.go @@ -1,22 +1,24 @@ +//line query_syntax.y:2 +package bleve + +import __yyfmt__ "fmt" //line query_syntax.y:2 -package search -import __yyfmt__ "fmt" -//line query_syntax.y:2 - import "log" +import "log" func logDebugGrammar(format string, v ...interface{}) { if debugParser { - log.Printf(format, v...) - } + log.Printf(format, v...) + } } //line query_syntax.y:12 type yySymType struct { - yys int -s string -n int -f float64} + yys int + s string + n int + f float64 +} const STRING = 57346 const PHRASE = 57347 @@ -338,158 +340,126 @@ yydefault: case 1: //line query_syntax.y:22 { - logDebugGrammar("INPUT") - } + logDebugGrammar("INPUT") + } case 2: //line query_syntax.y:27 { - logDebugGrammar("SEARCH PARTS") - } + logDebugGrammar("SEARCH PARTS") + } case 3: //line query_syntax.y:31 { - logDebugGrammar("SEARCH PART") - } + logDebugGrammar("SEARCH PART") + } case 4: //line query_syntax.y:36 { - - } + + } case 5: //line query_syntax.y:42 { - } + } case 6: //line query_syntax.y:45 { - - } + + } case 7: //line query_syntax.y:51 { - logDebugGrammar("PLUS") - parsingMust = true - } + logDebugGrammar("PLUS") + parsingMust = true + } case 8: //line query_syntax.y:56 { - logDebugGrammar("MINUS") - parsingMustNot = true - } + logDebugGrammar("MINUS") + parsingMustNot = true + } case 9: //line query_syntax.y:62 { - str := yyS[yypt-0].s - logDebugGrammar("STRING - %s", str) - q := &MatchQuery{ - Match: str, - Field: parsingDefaultField, - BoostVal: 1.0, - Explain: true, + str := yyS[yypt-0].s + logDebugGrammar("STRING - %s", str) + q := NewMatchQuery(str).SetField(parsingDefaultField) + if parsingMust { + parsingMustList.AddQuery(q) + parsingMust = false + } else if parsingMustNot { + parsingMustNotList.AddQuery(q) + parsingMustNot = false + } else { + parsingShouldList.AddQuery(q) + } } - if parsingMapping[parsingDefaultField] != nil { - q.Analyzer = parsingMapping[parsingDefaultField].Analyzer - } - if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) - parsingMust = false - } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) - parsingMustNot = false - } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) - } - } case 10: - //line query_syntax.y:85 + //line query_syntax.y:77 { - phrase := yyS[yypt-0].s - logDebugGrammar("PHRASE - %s", phrase) - q := &MatchPhraseQuery{ - MatchPhrase: phrase, - Field: parsingDefaultField, - BoostVal: 1.0, - Explain: true, + phrase := yyS[yypt-0].s + logDebugGrammar("PHRASE - %s", phrase) + q := NewMatchPhraseQuery(phrase).SetField(parsingDefaultField) + if parsingMust { + parsingMustList.AddQuery(q) + parsingMust = false + } else if parsingMustNot { + parsingMustNotList.AddQuery(q) + parsingMustNot = false + } else { + parsingShouldList.AddQuery(q) + } } - if parsingMapping[parsingDefaultField] != nil { - q.Analyzer = parsingMapping[parsingDefaultField].Analyzer - } - if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) - parsingMust = false - } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) - parsingMustNot = false - } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) - } - } case 11: + //line query_syntax.y:92 + { + field := yyS[yypt-2].s + str := yyS[yypt-0].s + logDebugGrammar("FIELD - %s STRING - %s", field, str) + q := NewMatchQuery(str).SetField(field) + if parsingMust { + parsingMustList.AddQuery(q) + parsingMust = false + } else if parsingMustNot { + parsingMustNotList.AddQuery(q) + parsingMustNot = false + } else { + parsingShouldList.AddQuery(q) + } + } + case 12: //line query_syntax.y:108 { - field := yyS[yypt-2].s - str := yyS[yypt-0].s - logDebugGrammar("FIELD - %s STRING - %s", field, str) - q := &MatchQuery{ - Match: str, - Field: field, - BoostVal: 1.0, - Explain: true, + field := yyS[yypt-2].s + phrase := yyS[yypt-0].s + logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase) + q := NewMatchPhraseQuery(phrase).SetField(field) + if parsingMust { + parsingMustList.AddQuery(q) + parsingMust = false + } else if parsingMustNot { + parsingMustNotList.AddQuery(q) + parsingMustNot = false + } else { + parsingShouldList.AddQuery(q) + } } - if parsingMapping[field] != nil { - q.Analyzer = parsingMapping[field].Analyzer + case 13: + //line query_syntax.y:126 + { + boost := yyS[yypt-1].n + logDebugGrammar("BOOST %d", boost) } - if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) - parsingMust = false - } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) - parsingMustNot = false - } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) - } - } - case 12: + case 14: //line query_syntax.y:132 { - field := yyS[yypt-2].s - phrase := yyS[yypt-0].s - logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase) - q := &MatchPhraseQuery{ - MatchPhrase: phrase, - Field: field, - BoostVal: 1.0, - Explain: true, + } - if parsingMapping[field] != nil { - q.Analyzer = parsingMapping[field].Analyzer - } - if parsingMust { - parsingMustList.Terms = append(parsingMustList.Terms, q) - parsingMust = false - } else if parsingMustNot { - parsingMustNotList.Terms = append(parsingMustNotList.Terms, q) - parsingMustNot = false - } else { - parsingShouldList.Terms = append(parsingShouldList.Terms, q) - } - } - case 13: - //line query_syntax.y:158 - { - boost := yyS[yypt-1].n - logDebugGrammar("BOOST %d", boost) - } - case 14: - //line query_syntax.y:164 - { - - } case 15: - //line query_syntax.y:168 + //line query_syntax.y:136 { - - } + + } } goto yystack /* stack new state and value */ }