major refactor, apologies for the large commit
removed analyzers (these are now built as needed through config) removed html chacter filter (now built as needed through config) added missing license header changed constructor signature of filters that cannot return errors filter constructors that can have errors, now have Must variant which panics change cdl2 tokenizer into filter (should only see lower-case input) new top level index api, closes #5 refactored index tests to not rely directly on analyzers moved query objects to top-level new top level search api, closes #12 top score collector allows skipping results index mapping supports _all by default, closes #3 and closes #6 index mapping supports disabled sections, closes #7 new http sub package with reusable http.Handler's, closes #22
This commit is contained in:
parent
8150146dc7
commit
2968d3538a
|
@ -5,8 +5,9 @@
|
|||
.project
|
||||
.settings
|
||||
.DS_Store
|
||||
/analysis/tokenizers/cld2/cld2-read-only
|
||||
/analysis/token_filters/cld2/cld2-read-only
|
||||
/examples/bleve_index_json/bleve_index_json
|
||||
/examples/bleve_index_json/index/
|
||||
/examples/bleve_query/bleve_query
|
||||
/utils/bleve_dump/bleve_dump
|
||||
/y.output
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package standard_analyzer
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
func NewStandardAnalyzer() (*analysis.Analyzer, error) {
|
||||
lower_case_filter, err := lower_case_filter.NewLowerCaseFilter()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stop_words_filter, err := stop_words_filter.NewStopWordsFilter()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
standard := analysis.Analyzer{
|
||||
CharFilters: []analysis.CharFilter{},
|
||||
Tokenizer: unicode_word_boundary.NewUnicodeWordBoundaryTokenizer(),
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
lower_case_filter,
|
||||
stop_words_filter,
|
||||
},
|
||||
}
|
||||
|
||||
return &standard, nil
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package html_char_filter
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
|
||||
)
|
||||
|
||||
// the origin of this regex is here:
|
||||
// http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/
|
||||
// slightly modified by me to also match the DOCTYPE
|
||||
const htmlTagPattern = `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
|
||||
|
||||
var htmlRegex = regexp.MustCompile(htmlTagPattern)
|
||||
|
||||
type HtmlCharFilter struct {
|
||||
*regexp_char_filter.RegexpCharFilter
|
||||
}
|
||||
|
||||
func NewHtmlCharFilter() *HtmlCharFilter {
|
||||
return &HtmlCharFilter{
|
||||
regexp_char_filter.NewRegexpCharFilter(htmlRegex, []byte{' '}),
|
||||
}
|
||||
}
|
|
@ -6,14 +6,19 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package html_char_filter
|
||||
package regexp_char_filter
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestHtmlCharFilter(t *testing.T) {
|
||||
func TestRegexpCharFilter(t *testing.T) {
|
||||
|
||||
htmlTagPattern := `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
|
||||
htmlRegex := regexp.MustCompile(htmlTagPattern)
|
||||
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output []byte
|
||||
|
@ -43,7 +48,7 @@ func TestHtmlCharFilter(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, test := range tests {
|
||||
filter := NewHtmlCharFilter()
|
||||
filter := NewRegexpCharFilter(htmlRegex, []byte{' '})
|
||||
output := filter.Filter(test.input)
|
||||
if !reflect.DeepEqual(output, test.output) {
|
||||
t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package analysis
|
||||
|
||||
import (
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
# cld2 token filter
|
||||
|
||||
A bleve token filter which passes the text of each token and passes it to the cld2 library. The library determines what it thinks the language most likely is. The ISO-639 language code replaces the token term.
|
||||
|
||||
In normal usage, you use this with the "single" tokenizer, so there is only one input token. Further, you should precede it with the "to_lower" filter so that the input term is in all lower-case unicode characters.
|
||||
|
||||
# Building
|
||||
|
||||
1. Acquire the source to cld2 in this directory.
|
||||
|
||||
$ svn checkout http://cld2.googlecode.com/svn/trunk/ cld2-read-only
|
||||
|
||||
2. Build cld2
|
||||
|
||||
$ cd cld2-read-only/internal/
|
||||
$ ./compile_libs.sh
|
||||
|
||||
|
||||
3. Put the resulting libraries somewhere your dynamic linker can find.
|
||||
|
||||
$ cp *.so /usr/local/lib
|
||||
|
||||
4. Run the unit tests
|
||||
|
||||
$ cd ../..
|
||||
$ go test -v
|
||||
=== RUN TestCld2Filter
|
||||
--- PASS: TestCld2Filter (0.00 seconds)
|
||||
PASS
|
||||
ok github.com/couchbaselabs/bleve/analysis/token_filters/cld2 0.033s
|
|
@ -1,9 +1,16 @@
|
|||
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
#include <cstddef>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include "cld2_tokenizer.h"
|
||||
#include "cld2_filter.h"
|
||||
#include "cld2-read-only/public/compact_lang_det.h"
|
||||
|
||||
const char* DetectLang(const char *buffer) {
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package cld2
|
||||
|
||||
// #cgo LDFLAGS: -lcld2_full
|
||||
// #include "cld2_filter.h"
|
||||
// #include <string.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
)
|
||||
|
||||
type Cld2Filter struct {
|
||||
}
|
||||
|
||||
func NewCld2Filter() *Cld2Filter {
|
||||
return &Cld2Filter{}
|
||||
}
|
||||
|
||||
func (f *Cld2Filter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
rv := make(analysis.TokenStream, 0)
|
||||
|
||||
offset := 0
|
||||
for _, token := range input {
|
||||
var err error
|
||||
token.Term, err = f.detectLanguage(token.Term)
|
||||
if err != nil {
|
||||
token.Term = []byte("error")
|
||||
}
|
||||
token.Start = offset
|
||||
token.End = token.Start + len(token.Term)
|
||||
rv = append(rv, token)
|
||||
offset = token.End + 1
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (f *Cld2Filter) detectLanguage(input []byte) ([]byte, error) {
|
||||
cstr := C.CString(string(input))
|
||||
res := C.DetectLang(cstr)
|
||||
return C.GoBytes(unsafe.Pointer(res), C.int(C.strlen(res))), nil
|
||||
}
|
|
@ -6,12 +6,13 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package shredder
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
)
|
||||
const char* DetectLang(const char *buffer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
type Shredder interface {
|
||||
Shred(id string, body []byte) (document.Document, error)
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package cld2
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestCld2Filter(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("the quick brown fox"),
|
||||
Start: 0,
|
||||
End: 19,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("en"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こんにちは世界"),
|
||||
Start: 0,
|
||||
End: 21,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ja"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
|
||||
Start: 0,
|
||||
End: 72,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("th"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("مرحبا، العالم!"),
|
||||
Start: 0,
|
||||
End: 26,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ar"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
filter := NewCld2Filter()
|
||||
for _, test := range tests {
|
||||
res := filter.Filter(test.input)
|
||||
if !reflect.DeepEqual(res, test.output) {
|
||||
t.Errorf("expected:")
|
||||
for _, token := range test.output {
|
||||
t.Errorf("%#v - %s", token, token.Term)
|
||||
}
|
||||
t.Errorf("got:")
|
||||
for _, token := range res {
|
||||
t.Errorf("%#v - %s", token, token.Term)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -19,11 +19,11 @@ type LengthFilter struct {
|
|||
max int
|
||||
}
|
||||
|
||||
func NewLengthFilter(min, max int) (*LengthFilter, error) {
|
||||
func NewLengthFilter(min, max int) *LengthFilter {
|
||||
return &LengthFilter{
|
||||
min: min,
|
||||
max: max,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (f *LengthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
|
|
|
@ -28,10 +28,7 @@ func TestLengthFilter(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
lengthFilter, err := NewLengthFilter(3, 4)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lengthFilter := NewLengthFilter(3, 4)
|
||||
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
|
||||
if len(ouputTokenStream) != 1 {
|
||||
t.Fatalf("expected 1 output token")
|
||||
|
@ -55,10 +52,7 @@ func TestLengthFilterNoMax(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
lengthFilter, err := NewLengthFilter(3, -1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lengthFilter := NewLengthFilter(3, -1)
|
||||
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
|
||||
if len(ouputTokenStream) != 2 {
|
||||
t.Fatalf("expected 2 output token")
|
||||
|
@ -85,10 +79,7 @@ func TestLengthFilterNoMin(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
lengthFilter, err := NewLengthFilter(-1, 4)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lengthFilter := NewLengthFilter(-1, 4)
|
||||
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
|
||||
if len(ouputTokenStream) != 2 {
|
||||
t.Fatalf("expected 2 output token")
|
||||
|
|
|
@ -17,8 +17,8 @@ import (
|
|||
type LowerCaseFilter struct {
|
||||
}
|
||||
|
||||
func NewLowerCaseFilter() (*LowerCaseFilter, error) {
|
||||
return &LowerCaseFilter{}, nil
|
||||
func NewLowerCaseFilter() *LowerCaseFilter {
|
||||
return &LowerCaseFilter{}
|
||||
}
|
||||
|
||||
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
|
|
|
@ -41,10 +41,7 @@ func TestLowerCaseFilter(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
filter, err := NewLowerCaseFilter()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
filter := NewLowerCaseFilter()
|
||||
ouputTokenStream := filter.Filter(inputTokenStream)
|
||||
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
|
||||
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
## Languages supported
|
||||
|
||||
"danish",
|
||||
"dutch",
|
||||
"english",
|
||||
"finnish",
|
||||
"french",
|
||||
"german",
|
||||
"hungarian",
|
||||
"italian",
|
||||
"norwegian",
|
||||
"porter",
|
||||
"portuguese",
|
||||
"romanian",
|
||||
"russian",
|
||||
"spanish",
|
||||
"swedish",
|
||||
"turkish"
|
|
@ -29,6 +29,14 @@ func NewStemmerFilter(lang string) (*StemmerFilter, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func MustNewStemmerFilter(lang string) *StemmerFilter {
|
||||
sf, err := NewStemmerFilter(lang)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return sf
|
||||
}
|
||||
|
||||
func (s *StemmerFilter) List() []string {
|
||||
return snowball.LangList()
|
||||
}
|
||||
|
|
|
@ -24,10 +24,10 @@ type StopWordsFilter struct {
|
|||
stopWords map[string]bool
|
||||
}
|
||||
|
||||
func NewStopWordsFilter() (*StopWordsFilter, error) {
|
||||
func NewStopWordsFilter() *StopWordsFilter {
|
||||
return &StopWordsFilter{
|
||||
stopWords: buildStopWordMap(DEFAULT_STOP_WORDS),
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (f *StopWordsFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
|
|
|
@ -44,10 +44,7 @@ func TestStopWordsFilter(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
filter, err := NewStopWordsFilter()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
filter := NewStopWordsFilter()
|
||||
ouputTokenStream := filter.Filter(inputTokenStream)
|
||||
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
|
||||
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
# cld2 tokenizer
|
||||
|
||||
A bleve tokenizer which passes the input text to the cld2 library. The library determines what it thinks the language most likely is. The ISO-639 language code is returned as the single token resulting from the analysis.
|
||||
|
||||
# Building
|
||||
|
||||
1. Acquire the source to cld2 in this directory.
|
||||
|
||||
$ svn checkout http://cld2.googlecode.com/svn/trunk/ cld2-read-only
|
||||
|
||||
2. Build cld2
|
||||
|
||||
$ cd cld2-read-only/internal/
|
||||
$ ./compile_libs.sh
|
||||
|
||||
|
||||
3. Put the resulting libraries somewhere your dynamic linker can find.
|
||||
|
||||
$ cp *.so /usr/local/lib
|
||||
|
||||
4. Run the unit tests
|
||||
|
||||
$ cd ../..
|
||||
$ go test -v
|
||||
=== RUN TestCld2Tokenizer
|
||||
--- PASS: TestCld2Tokenizer (0.03 seconds)
|
||||
PASS
|
||||
ok github.com/couchbaselabs/bleve/analysis/tokenizers/cld2 0.067s
|
|
@ -1,41 +0,0 @@
|
|||
package cld2
|
||||
|
||||
// #cgo LDFLAGS: -Lcld2-read-only/internal/ -lcld2_full
|
||||
// #include "cld2_tokenizer.h"
|
||||
// #include <string.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
)
|
||||
|
||||
type Cld2Tokenizer struct {
|
||||
}
|
||||
|
||||
func NewCld2Tokenizer() *Cld2Tokenizer {
|
||||
return &Cld2Tokenizer{}
|
||||
}
|
||||
|
||||
func (rt *Cld2Tokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
rv := make(analysis.TokenStream, 0)
|
||||
lang, err := rt.detectLanguage(input)
|
||||
if err != nil {
|
||||
return rv
|
||||
}
|
||||
token := analysis.Token{
|
||||
Term: lang,
|
||||
Start: 0,
|
||||
End: len(lang),
|
||||
Position: 1,
|
||||
}
|
||||
rv = append(rv, &token)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (rt *Cld2Tokenizer) detectLanguage(input []byte) ([]byte, error) {
|
||||
cstr := C.CString(string(input))
|
||||
res := C.DetectLang(cstr)
|
||||
return C.GoBytes(unsafe.Pointer(res), C.int(C.strlen(res))), nil
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
const char* DetectLang(const char *buffer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
package cld2
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestCld2Tokenizer(t *testing.T) {
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: []byte("the quick brown fox"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("en"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("こんにちは世界"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ja"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("th"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("مرحبا، العالم!"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ar"),
|
||||
Start: 0,
|
||||
End: 2,
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tokenizer := NewCld2Tokenizer()
|
||||
for _, test := range tests {
|
||||
res := tokenizer.Tokenize(test.input)
|
||||
if !reflect.DeepEqual(res, test.output) {
|
||||
t.Errorf("expected:")
|
||||
for _, token := range test.output {
|
||||
t.Errorf("%#v - %s", token, token.Term)
|
||||
}
|
||||
t.Errorf("got:")
|
||||
for _, token := range res {
|
||||
t.Errorf("%#v - %s", token, token.Term)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/cld2"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/length_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter"
|
||||
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type AnalysisConfig struct {
|
||||
CharFilters map[string]analysis.CharFilter
|
||||
Tokenizers map[string]analysis.Tokenizer
|
||||
TokenFilters map[string]analysis.TokenFilter
|
||||
Analyzers map[string]*analysis.Analyzer
|
||||
}
|
||||
|
||||
type HighlightConfig struct {
|
||||
Highlighters map[string]search.Highlighter
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Analysis *AnalysisConfig
|
||||
DefaultAnalyzer *string
|
||||
Highlight *HighlightConfig
|
||||
DefaultHighlighter *string
|
||||
}
|
||||
|
||||
func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
|
||||
rv := analysis.Analyzer{}
|
||||
if len(charFilterNames) > 0 {
|
||||
rv.CharFilters = make([]analysis.CharFilter, len(charFilterNames))
|
||||
for i, charFilterName := range charFilterNames {
|
||||
charFilter := c.Analysis.CharFilters[charFilterName]
|
||||
if charFilter == nil {
|
||||
return nil, fmt.Errorf("no character filter named `%s` registered", charFilterName)
|
||||
}
|
||||
rv.CharFilters[i] = charFilter
|
||||
}
|
||||
}
|
||||
rv.Tokenizer = c.Analysis.Tokenizers[tokenizerName]
|
||||
if rv.Tokenizer == nil {
|
||||
return nil, fmt.Errorf("no tokenizer named `%s` registered", tokenizerName)
|
||||
}
|
||||
if len(tokenFilterNames) > 0 {
|
||||
rv.TokenFilters = make([]analysis.TokenFilter, len(tokenFilterNames))
|
||||
for i, tokenFilterName := range tokenFilterNames {
|
||||
tokenFilter := c.Analysis.TokenFilters[tokenFilterName]
|
||||
if tokenFilter == nil {
|
||||
return nil, fmt.Errorf("no token filter named `%s` registered", tokenFilterName)
|
||||
}
|
||||
rv.TokenFilters[i] = tokenFilter
|
||||
}
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer {
|
||||
analyzer, err := c.BuildNewAnalyzer(charFilterNames, tokenizerName, tokenFilterNames)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return analyzer
|
||||
}
|
||||
|
||||
func NewConfig() *Config {
|
||||
return &Config{
|
||||
Analysis: &AnalysisConfig{
|
||||
CharFilters: make(map[string]analysis.CharFilter),
|
||||
Tokenizers: make(map[string]analysis.Tokenizer),
|
||||
TokenFilters: make(map[string]analysis.TokenFilter),
|
||||
Analyzers: make(map[string]*analysis.Analyzer),
|
||||
},
|
||||
Highlight: &HighlightConfig{
|
||||
Highlighters: make(map[string]search.Highlighter),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
var config *Config
|
||||
|
||||
func init() {
|
||||
|
||||
// build the default configuration
|
||||
config = NewConfig()
|
||||
|
||||
// register char filters
|
||||
htmlCharFilterRegexp := regexp.MustCompile(`</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
|
||||
htmlCharFilter := regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, []byte{' '})
|
||||
config.Analysis.CharFilters["html"] = htmlCharFilter
|
||||
|
||||
// register tokenizers
|
||||
whitespaceTokenizerRegexp := regexp.MustCompile(`\w+`)
|
||||
config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer()
|
||||
config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer()
|
||||
config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH")
|
||||
config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp)
|
||||
|
||||
// register token filters
|
||||
config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter()
|
||||
config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1)
|
||||
config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255)
|
||||
config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter()
|
||||
config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish")
|
||||
config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch")
|
||||
config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english")
|
||||
config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish")
|
||||
config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french")
|
||||
config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german")
|
||||
config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian")
|
||||
config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian")
|
||||
config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian")
|
||||
config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter")
|
||||
config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese")
|
||||
config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian")
|
||||
config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian")
|
||||
config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish")
|
||||
config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish")
|
||||
config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish")
|
||||
config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter()
|
||||
|
||||
// register analyzers
|
||||
keywordAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{})
|
||||
config.Analysis.Analyzers["keyword"] = keywordAnalyzer
|
||||
simpleAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"})
|
||||
config.Analysis.Analyzers["simple"] = simpleAnalyzer
|
||||
standardAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"})
|
||||
config.Analysis.Analyzers["standard"] = standardAnalyzer
|
||||
englishAnalyzer := config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"})
|
||||
config.Analysis.Analyzers["english"] = englishAnalyzer
|
||||
detectLangAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"})
|
||||
config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer
|
||||
|
||||
// register ansi highlighter
|
||||
config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter()
|
||||
|
||||
// register html highlighter
|
||||
htmlFormatter := search.NewHTMLFragmentFormatterCustom(`<span class="highlight">`, `</span>`)
|
||||
htmlHighlighter := search.NewSimpleHighlighter()
|
||||
htmlHighlighter.SetFragmentFormatter(htmlFormatter)
|
||||
config.Highlight.Highlighters["html"] = htmlHighlighter
|
||||
|
||||
// set the default analyzer
|
||||
simpleAnalyzerName := "simple"
|
||||
config.DefaultAnalyzer = &simpleAnalyzerName
|
||||
|
||||
// set the default highlighter
|
||||
htmlHighlighterName := "html"
|
||||
config.DefaultHighlighter = &htmlHighlighterName
|
||||
|
||||
}
|
|
@ -9,7 +9,8 @@
|
|||
package document
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
)
|
||||
|
||||
type Document struct {
|
||||
|
@ -36,7 +37,21 @@ func (d *Document) AddField(f Field) *Document {
|
|||
return d
|
||||
}
|
||||
|
||||
func (d *Document) String() string {
|
||||
bytes, _ := json.MarshalIndent(d, "", " ")
|
||||
return string(bytes)
|
||||
func (d *Document) GoString() string {
|
||||
fields := ""
|
||||
for i, field := range d.Fields {
|
||||
if i != 0 {
|
||||
fields += ", "
|
||||
}
|
||||
fields += fmt.Sprintf("%#v", field)
|
||||
}
|
||||
compositeFields := ""
|
||||
for i, field := range d.CompositeFields {
|
||||
log.Printf("see composite field")
|
||||
if i != 0 {
|
||||
compositeFields += ", "
|
||||
}
|
||||
compositeFields += fmt.Sprintf("%#v", field)
|
||||
}
|
||||
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package document
|
||||
|
||||
import (
|
||||
|
|
|
@ -9,22 +9,11 @@
|
|||
package document
|
||||
|
||||
import (
|
||||
"log"
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
|
||||
)
|
||||
|
||||
var standardAnalyzer *analysis.Analyzer
|
||||
|
||||
func init() {
|
||||
var err error
|
||||
standardAnalyzer, err = standard_analyzer.NewStandardAnalyzer()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
const DEFAULT_TEXT_INDEXING_OPTIONS = INDEX_FIELD
|
||||
|
||||
type TextField struct {
|
||||
|
@ -43,7 +32,19 @@ func (t *TextField) Options() IndexingOptions {
|
|||
}
|
||||
|
||||
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
|
||||
tokens := t.analyzer.Analyze(t.Value())
|
||||
var tokens analysis.TokenStream
|
||||
if t.analyzer != nil {
|
||||
tokens = t.analyzer.Analyze(t.Value())
|
||||
} else {
|
||||
tokens = analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: len(t.value),
|
||||
Term: t.value,
|
||||
Position: 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
fieldLength := len(tokens) // number of tokens in this doc field
|
||||
tokenFreqs := analysis.TokenFrequency(tokens)
|
||||
return fieldLength, tokenFreqs
|
||||
|
@ -53,15 +54,27 @@ func (t *TextField) Value() []byte {
|
|||
return t.value
|
||||
}
|
||||
|
||||
func (t *TextField) GoString() string {
|
||||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s}", t.name, t.options, t.analyzer, t.value)
|
||||
}
|
||||
|
||||
func NewTextField(name string, value []byte) *TextField {
|
||||
return NewTextFieldWithIndexingOptions(name, value, DEFAULT_TEXT_INDEXING_OPTIONS)
|
||||
}
|
||||
|
||||
func NewTextFieldWithIndexingOptions(name string, value []byte, options IndexingOptions) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
options: options,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
func NewTextFieldWithAnalyzer(name string, value []byte, analyzer *analysis.Analyzer) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
options: options,
|
||||
analyzer: standardAnalyzer,
|
||||
options: DEFAULT_TEXT_INDEXING_OPTIONS,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,3 +27,23 @@ func (o IndexingOptions) IsStored() bool {
|
|||
func (o IndexingOptions) IncludeTermVectors() bool {
|
||||
return o&INCLUDE_TERM_VECTORS != 0
|
||||
}
|
||||
|
||||
func (o IndexingOptions) String() string {
|
||||
rv := ""
|
||||
if o.IsIndexed() {
|
||||
rv += "INDEXED"
|
||||
}
|
||||
if o.IsStored() {
|
||||
if rv != "" {
|
||||
rv += ", "
|
||||
}
|
||||
rv += "STORE"
|
||||
}
|
||||
if o.IncludeTermVectors() {
|
||||
if rv != "" {
|
||||
rv += ", "
|
||||
}
|
||||
rv += "TV"
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
package document
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
)
|
||||
|
||||
type FieldMapping struct {
|
||||
Name string
|
||||
Options IndexingOptions
|
||||
Analyzer *analysis.Analyzer
|
||||
}
|
||||
|
||||
type Mapping map[string]*FieldMapping
|
|
@ -6,19 +6,19 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package keyword_analyzer
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
|
||||
const (
|
||||
ERROR_NO_ID Error = iota
|
||||
ERROR_NO_TYPE
|
||||
)
|
||||
|
||||
func NewKeywordAnalyzer() (*analysis.Analyzer, error) {
|
||||
keyword := analysis.Analyzer{
|
||||
CharFilters: []analysis.CharFilter{},
|
||||
Tokenizer: single_token.NewSingleTokenTokenizer(),
|
||||
TokenFilters: []analysis.TokenFilter{},
|
||||
}
|
||||
type Error int
|
||||
|
||||
return &keyword, nil
|
||||
func (e Error) Error() string {
|
||||
return errorMessages[int(e)]
|
||||
}
|
||||
|
||||
var errorMessages = map[int]string{
|
||||
0: "unable to determine document id",
|
||||
}
|
|
@ -13,67 +13,64 @@ import (
|
|||
"io/ioutil"
|
||||
"log"
|
||||
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index/store/leveldb"
|
||||
"github.com/couchbaselabs/bleve/index/upside_down"
|
||||
"github.com/couchbaselabs/bleve/shredder"
|
||||
"github.com/couchbaselabs/bleve"
|
||||
)
|
||||
|
||||
var jsonDir = flag.String("jsonDir", "json", "json directory")
|
||||
var indexDir = flag.String("indexDir", "index", "index directory")
|
||||
var storeFields = flag.Bool("storeFields", false, "store field data")
|
||||
var includeTermVectors = flag.Bool("includeTermVectors", false, "include term vectors")
|
||||
|
||||
func main() {
|
||||
|
||||
flag.Parse()
|
||||
|
||||
indexOptions := document.INDEX_FIELD
|
||||
if *storeFields {
|
||||
indexOptions |= document.STORE_FIELD
|
||||
}
|
||||
if *includeTermVectors {
|
||||
indexOptions |= document.INCLUDE_TERM_VECTORS
|
||||
}
|
||||
// create a new default mapping
|
||||
mapping := bleve.NewIndexMapping()
|
||||
|
||||
// create a automatic JSON document shredder
|
||||
jsonShredder := shredder.NewAutoJsonShredderWithOptions(indexOptions)
|
||||
|
||||
// create a new index
|
||||
store, err := leveldb.Open(*indexDir)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
index := upside_down.NewUpsideDownCouch(store)
|
||||
err = index.Open()
|
||||
// open the index
|
||||
index, err := bleve.Open(*indexDir, mapping)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
// open the directory
|
||||
dirEntries, err := ioutil.ReadDir(*jsonDir)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// walk the directory entries
|
||||
for _, dirEntry := range dirEntries {
|
||||
// read the bytes
|
||||
jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name())
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
// shred them into a document
|
||||
doc, err := jsonShredder.Shred(dirEntry.Name(), jsonBytes)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
//log.Printf("%+v", doc)
|
||||
// update the index
|
||||
err = index.Update(doc)
|
||||
for jsonFile := range walkDirectory(*jsonDir) {
|
||||
// index the json files
|
||||
err = index.IndexJSONID(jsonFile.filename, jsonFile.contents)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type jsonFile struct {
|
||||
filename string
|
||||
contents []byte
|
||||
}
|
||||
|
||||
func walkDirectory(dir string) chan jsonFile {
|
||||
rv := make(chan jsonFile)
|
||||
go func() {
|
||||
defer close(rv)
|
||||
|
||||
// open the directory
|
||||
dirEntries, err := ioutil.ReadDir(*jsonDir)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// walk the directory entries
|
||||
for _, dirEntry := range dirEntries {
|
||||
// read the bytes
|
||||
jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name())
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
rv <- jsonFile{
|
||||
filename: dirEntry.Name(),
|
||||
contents: jsonBytes,
|
||||
}
|
||||
}
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
|
|
@ -12,85 +12,50 @@ import (
|
|||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index/store/leveldb"
|
||||
"github.com/couchbaselabs/bleve/index/upside_down"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
"github.com/couchbaselabs/bleve"
|
||||
)
|
||||
|
||||
var field = flag.String("field", "description", "field to query")
|
||||
var field = flag.String("field", "_all", "field to query")
|
||||
var indexDir = flag.String("indexDir", "index", "index directory")
|
||||
var limit = flag.Int("limit", 10, "limit to first N results")
|
||||
var includeHighlights = flag.Bool("highlight", false, "highlight matches")
|
||||
var skip = flag.Int("skip", 0, "skip the first N results")
|
||||
var explain = flag.Bool("explain", false, "explain scores")
|
||||
var includeHighlights = flag.Bool("highlight", true, "highlight matches")
|
||||
|
||||
func main() {
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if flag.NArg() < 1 {
|
||||
log.Fatal("Specify search term")
|
||||
log.Fatal("Specify search query")
|
||||
}
|
||||
|
||||
// create a new default mapping
|
||||
mapping := bleve.NewIndexMapping()
|
||||
|
||||
// open index
|
||||
store, err := leveldb.Open(*indexDir)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
index := upside_down.NewUpsideDownCouch(store)
|
||||
err = index.Open()
|
||||
index, err := bleve.Open(*indexDir, mapping)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
tq := search.TermQuery{
|
||||
Term: flag.Arg(0),
|
||||
Field: *field,
|
||||
BoostVal: 1.0,
|
||||
Explain: true,
|
||||
// build a search with the provided parameters
|
||||
queryString := strings.Join(flag.Args(), " ")
|
||||
query := bleve.NewSyntaxQuery(queryString)
|
||||
searchRequest := bleve.NewSearchRequest(query, *limit, *skip, *explain)
|
||||
|
||||
// enable highlights if requested
|
||||
if *includeHighlights {
|
||||
searchRequest.Highlight = bleve.NewHighlightWithStyle("ansi")
|
||||
}
|
||||
collector := search.NewTopScorerCollector(*limit)
|
||||
searcher, err := tq.Searcher(index)
|
||||
if err != nil {
|
||||
log.Fatalf("searcher error: %v", err)
|
||||
return
|
||||
}
|
||||
err = collector.Collect(searcher)
|
||||
|
||||
// execute the search
|
||||
searchResult, err := index.Search(searchRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("search error: %v", err)
|
||||
return
|
||||
}
|
||||
results := collector.Results()
|
||||
if len(results) == 0 {
|
||||
fmt.Printf("No matches\n")
|
||||
} else {
|
||||
last := uint64(*limit)
|
||||
if searcher.Count() < last {
|
||||
last = searcher.Count()
|
||||
}
|
||||
fmt.Printf("%d matches, showing %d through %d\n", searcher.Count(), 1, last)
|
||||
for i, result := range results {
|
||||
fmt.Printf("%2d. %s (%f)\n", i+1, result.ID, result.Score)
|
||||
if *includeHighlights {
|
||||
highlighter := search.NewSimpleHighlighter()
|
||||
|
||||
doc, err := index.Document(result.ID)
|
||||
if err != nil {
|
||||
fmt.Print(err)
|
||||
return
|
||||
}
|
||||
|
||||
fragments := highlighter.BestFragmentsInField(result, doc, *field, 5)
|
||||
for _, fragment := range fragments {
|
||||
fmt.Printf("\t%s\n", fragment)
|
||||
}
|
||||
if len(fragments) == 0 {
|
||||
for _, f := range doc.Fields {
|
||||
fmt.Printf("\tfield: %s\n", f)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println(searchResult)
|
||||
}
|
||||
|
|
|
@ -4,5 +4,3 @@ echo Running nex...
|
|||
nex query_syntax.nex
|
||||
echo Running goyacc...
|
||||
go tool yacc query_syntax.y
|
||||
echo Running go build...
|
||||
go build
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package http
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index/upside_down"
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
// DebugDocumentHandler allows you to debug the index content
|
||||
// for a given document id. the document ID should be mapped
|
||||
// to the mux router URL with name "docId"
|
||||
type DebugDocumentHandler struct {
|
||||
defaultIndexName string
|
||||
}
|
||||
|
||||
func NewDebugDocumentHandler(defaultIndexName string) *DebugDocumentHandler {
|
||||
return &DebugDocumentHandler{
|
||||
defaultIndexName: defaultIndexName,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *DebugDocumentHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
// find the index to operate on
|
||||
indexName := mux.Vars(req)["indexName"]
|
||||
if indexName == "" {
|
||||
indexName = h.defaultIndexName
|
||||
}
|
||||
index := IndexByName(indexName)
|
||||
if index == nil {
|
||||
showError(w, req, fmt.Sprintf("no such index '%s'", indexName), 404)
|
||||
return
|
||||
}
|
||||
|
||||
// find the docID
|
||||
docID := mux.Vars(req)["docID"]
|
||||
rows, err := index.DumpDoc(docID)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error debugging document: %v", err), 500)
|
||||
return
|
||||
}
|
||||
rv := make([]interface{}, 0)
|
||||
for _, row := range rows {
|
||||
udcRow, ok := row.(upside_down.UpsideDownCouchRow)
|
||||
if ok {
|
||||
tmp := struct {
|
||||
Key []byte `json:"key"`
|
||||
Val []byte `json:"val"`
|
||||
}{
|
||||
Key: udcRow.Key(),
|
||||
Val: udcRow.Value(),
|
||||
}
|
||||
rv = append(rv, tmp)
|
||||
}
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package http
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/couchbaselabs/bleve"
|
||||
)
|
||||
|
||||
var indexNameMapping map[string]bleve.Index
|
||||
var indexNameMappingLock sync.RWMutex
|
||||
|
||||
func RegisterIndexName(name string, index bleve.Index) {
|
||||
indexNameMappingLock.Lock()
|
||||
defer indexNameMappingLock.Unlock()
|
||||
|
||||
if indexNameMapping == nil {
|
||||
indexNameMapping = make(map[string]bleve.Index)
|
||||
}
|
||||
indexNameMapping[name] = index
|
||||
}
|
||||
|
||||
func IndexByName(name string) bleve.Index {
|
||||
indexNameMappingLock.RLock()
|
||||
defer indexNameMappingLock.RUnlock()
|
||||
|
||||
return indexNameMapping[name]
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
|
||||
"github.com/couchbaselabs/bleve"
|
||||
)
|
||||
|
||||
// SearchHandler can handle search requests sent over HTTP
|
||||
// the index name can be selected in the URL by mapping a
|
||||
// gorilla mux var, or it can be set manually with by
|
||||
// setting the defaultIndex value
|
||||
type SearchHandler struct {
|
||||
defaultIndexName string
|
||||
}
|
||||
|
||||
func NewSearchHandler(defaultIndexName string) *SearchHandler {
|
||||
return &SearchHandler{
|
||||
defaultIndexName: defaultIndexName,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *SearchHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
// find the index to operate on
|
||||
indexName := mux.Vars(req)["indexName"]
|
||||
if indexName == "" {
|
||||
indexName = h.defaultIndexName
|
||||
}
|
||||
index := IndexByName(indexName)
|
||||
if index == nil {
|
||||
showError(w, req, fmt.Sprintf("no such index '%s'", indexName), 404)
|
||||
return
|
||||
}
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("request body: %s", requestBody)
|
||||
|
||||
// parse the request
|
||||
var searchRequest bleve.SearchRequest
|
||||
err = json.Unmarshal(requestBody, &searchRequest)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing query: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("parsed request %#v", searchRequest)
|
||||
|
||||
// varlidate the query
|
||||
err = searchRequest.Query.Validate()
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error validating query: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// execute the query
|
||||
searchResponse, err := index.Search(&searchRequest)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error executing query: %v", err), 500)
|
||||
return
|
||||
}
|
||||
|
||||
// encode the response
|
||||
mustEncode(w, searchResponse)
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func showError(w http.ResponseWriter, r *http.Request,
|
||||
msg string, code int) {
|
||||
log.Printf("Reporting error %v/%v", code, msg)
|
||||
http.Error(w, msg, code)
|
||||
}
|
||||
|
||||
func mustEncode(w io.Writer, i interface{}) {
|
||||
if headered, ok := w.(http.ResponseWriter); ok {
|
||||
headered.Header().Set("Cache-Control", "no-cache")
|
||||
headered.Header().Set("Content-type", "application/json")
|
||||
}
|
||||
|
||||
e := json.NewEncoder(w)
|
||||
if err := e.Encode(i); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
)
|
||||
|
||||
type Identifier interface {
|
||||
ID() string
|
||||
}
|
||||
|
||||
type Classifier interface {
|
||||
Type() string
|
||||
}
|
||||
|
||||
type Index interface {
|
||||
Index(data interface{}) error
|
||||
IndexID(id string, data interface{}) error
|
||||
|
||||
IndexJSON(data []byte) error
|
||||
IndexJSONID(id string, data []byte) error
|
||||
|
||||
Delete(data interface{}) error
|
||||
DeleteID(id string) error
|
||||
|
||||
Document(id string) (*document.Document, error)
|
||||
DocCount() uint64
|
||||
|
||||
Search(req *SearchRequest) (*SearchResult, error)
|
||||
|
||||
DumpDoc(id string) ([]interface{}, error)
|
||||
|
||||
Close()
|
||||
}
|
||||
|
||||
// Open the index at the specified path, and create it if it does not exist.
|
||||
// The provided mapping will be used for all Index/Search operations.
|
||||
func Open(path string, mapping *IndexMapping) (Index, error) {
|
||||
return newIndex(path, mapping)
|
||||
}
|
|
@ -1,3 +1,12 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build forestdb
|
||||
|
||||
package goforestdb
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build forestdb
|
||||
|
||||
package goforestdb
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build forestdb
|
||||
|
||||
package goforestdb
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build forestdb
|
||||
|
||||
package goforestdb
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package gouchstore
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package gouchstore
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package gouchstore
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package gouchstore
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package gouchstore
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package inmem
|
||||
|
||||
type InMemBatch struct {
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package inmem
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package inmem
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package inmem
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package store
|
||||
|
||||
type KVBatch interface {
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package leveldb
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package leveldb
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package leveldb
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package leveldb
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package leveldb
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package store_test
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package upside_down
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build forestdb
|
||||
|
||||
package upside_down
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package upside_down
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package upside_down
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package upside_down
|
||||
|
||||
import (
|
||||
|
|
|
@ -13,7 +13,6 @@ import (
|
|||
"reflect"
|
||||
"testing"
|
||||
|
||||
_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/index/store/gouchstore"
|
||||
|
@ -40,8 +39,8 @@ func TestIndexReader(t *testing.T) {
|
|||
expectedCount += 1
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []byte("test test test")))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
|
|
@ -486,6 +486,30 @@ func (udc *UpsideDownCouch) Dump() {
|
|||
}
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) DumpFields() {
|
||||
it := udc.store.Iterator([]byte{'f'})
|
||||
defer it.Close()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
if !bytes.HasPrefix(key, []byte{'f'}) {
|
||||
break
|
||||
}
|
||||
|
||||
row, err := ParseFromKeyValue(key, val)
|
||||
if err != nil {
|
||||
fmt.Printf("error parsing key/value: %v", err)
|
||||
return
|
||||
}
|
||||
if row != nil {
|
||||
fmt.Printf("%v\n", row)
|
||||
fmt.Printf("Key: % -100x\nValue: % -100x\n\n", key, val)
|
||||
}
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
}
|
||||
|
||||
type keyset [][]byte
|
||||
|
||||
func (k keyset) Len() int { return len(k) }
|
||||
|
|
|
@ -10,13 +10,19 @@ package upside_down
|
|||
|
||||
import (
|
||||
"os"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index/store/gouchstore"
|
||||
)
|
||||
|
||||
var testAnalyzer = &analysis.Analyzer{
|
||||
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
|
||||
}
|
||||
|
||||
func TestIndexOpenReopen(t *testing.T) {
|
||||
defer os.RemoveAll("test")
|
||||
|
||||
|
@ -180,7 +186,7 @@ func TestIndexInsertThenUpdate(t *testing.T) {
|
|||
|
||||
// this update should overwrite one term, and introduce one new one
|
||||
doc = document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test fail")))
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test fail"), testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error deleting entry from index: %v", err)
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/index/store"
|
||||
"github.com/couchbaselabs/bleve/index/store/leveldb"
|
||||
"github.com/couchbaselabs/bleve/index/upside_down"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type indexImpl struct {
|
||||
s store.KVStore
|
||||
i index.Index
|
||||
m *IndexMapping
|
||||
}
|
||||
|
||||
func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) {
|
||||
store, err := leveldb.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
idx := upside_down.NewUpsideDownCouch(store)
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &indexImpl{
|
||||
s: store,
|
||||
i: idx,
|
||||
m: mapping,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Index the provided data.
|
||||
func (i *indexImpl) Index(data interface{}) error {
|
||||
id, ok := i.determineID(data)
|
||||
if ok {
|
||||
return i.IndexID(id, data)
|
||||
}
|
||||
|
||||
return ERROR_NO_ID
|
||||
}
|
||||
|
||||
func (i *indexImpl) IndexID(id string, data interface{}) error {
|
||||
doc := document.NewDocument(id)
|
||||
err := i.m.MapDocument(doc, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = i.i.Update(doc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) IndexJSON(data []byte) error {
|
||||
var obj interface{}
|
||||
err := json.Unmarshal(data, &obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return i.Index(obj)
|
||||
}
|
||||
|
||||
func (i *indexImpl) IndexJSONID(id string, data []byte) error {
|
||||
var obj interface{}
|
||||
err := json.Unmarshal(data, &obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return i.IndexID(id, obj)
|
||||
}
|
||||
|
||||
func (i *indexImpl) Delete(data interface{}) error {
|
||||
id, ok := i.determineID(data)
|
||||
if ok {
|
||||
return i.DeleteID(id)
|
||||
}
|
||||
|
||||
return ERROR_NO_ID
|
||||
}
|
||||
|
||||
func (i *indexImpl) DeleteID(id string) error {
|
||||
err := i.i.Delete(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) Document(id string) (*document.Document, error) {
|
||||
return i.i.Document(id)
|
||||
}
|
||||
|
||||
func (i *indexImpl) DocCount() uint64 {
|
||||
return i.i.DocCount()
|
||||
}
|
||||
|
||||
func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
|
||||
collector := search.NewTopScorerSkipCollector(req.Size, req.From)
|
||||
searcher, err := req.Query.Searcher(i, req.Explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = collector.Collect(searcher)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hits := collector.Results()
|
||||
|
||||
if req.Highlight != nil {
|
||||
// get the right highlighter
|
||||
highlighter := config.Highlight.Highlighters[*config.DefaultHighlighter]
|
||||
if req.Highlight.Style != nil {
|
||||
highlighter = config.Highlight.Highlighters[*req.Highlight.Style]
|
||||
if highlighter == nil {
|
||||
return nil, fmt.Errorf("no highlighter named `%s` registered", req.Highlight.Style)
|
||||
}
|
||||
}
|
||||
|
||||
for _, hit := range hits {
|
||||
doc, err := i.Document(hit.ID)
|
||||
if err == nil {
|
||||
highlightFields := req.Highlight.Fields
|
||||
if highlightFields == nil {
|
||||
// add all fields with matches
|
||||
highlightFields = make([]string, 0, len(hit.Locations))
|
||||
for k, _ := range hit.Locations {
|
||||
highlightFields = append(highlightFields, k)
|
||||
}
|
||||
}
|
||||
|
||||
for _, hf := range highlightFields {
|
||||
highlighter.BestFragmentsInField(hit, doc, hf, 3)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &SearchResult{
|
||||
Request: req,
|
||||
Hits: hits,
|
||||
Total: collector.Total(),
|
||||
MaxScore: collector.MaxScore(),
|
||||
Took: collector.Took(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) {
|
||||
return i.i.DumpDoc(id)
|
||||
}
|
||||
|
||||
func (i *indexImpl) Close() {
|
||||
i.i.Close()
|
||||
}
|
||||
|
||||
func (i *indexImpl) determineID(data interface{}) (string, bool) {
|
||||
// first see if the object implements Identifier
|
||||
identifier, ok := data.(Identifier)
|
||||
if ok {
|
||||
return identifier.ID(), true
|
||||
}
|
||||
|
||||
// now see if we can find an ID using the mapping
|
||||
if i.m.IdField != nil {
|
||||
id, ok := mustString(lookupPropertyPath(data, *i.m.IdField))
|
||||
if ok {
|
||||
return id, true
|
||||
}
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type Address struct {
|
||||
Street string `json:"street"`
|
||||
City string `json:"city"`
|
||||
State string `json:"state"`
|
||||
Zip string `json:"zip"`
|
||||
}
|
||||
|
||||
type Person struct {
|
||||
Identifier string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Address *Address `json:"address"`
|
||||
Hideouts []*Address `json:"hideouts"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
func (p *Person) ID() string {
|
||||
return p.Identifier
|
||||
}
|
||||
|
||||
func (p *Person) Type() string {
|
||||
return "person"
|
||||
}
|
||||
|
||||
// FIXME needs more assertions
|
||||
func TestIndex(t *testing.T) {
|
||||
defer os.RemoveAll("testidx")
|
||||
|
||||
nameMapping := NewDocumentMapping().
|
||||
AddFieldMapping(NewFieldMapping("", "text", "standard", true, true, true, true))
|
||||
|
||||
tagsMapping := NewDocumentMapping().
|
||||
AddFieldMapping(NewFieldMapping("", "text", "standard", true, true, true, false))
|
||||
personMapping := NewDocumentMapping().
|
||||
AddSubDocumentMapping("name", nameMapping).
|
||||
AddSubDocumentMapping("id", NewDocumentDisabledMapping()).
|
||||
AddSubDocumentMapping("tags", tagsMapping)
|
||||
|
||||
mapping := NewIndexMapping().
|
||||
AddDocumentMapping("person", personMapping)
|
||||
index, err := Open("testidx", mapping)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
obj := Person{
|
||||
Identifier: "a",
|
||||
Name: "marty",
|
||||
Address: &Address{
|
||||
Street: "123 Sesame St.",
|
||||
City: "Garden",
|
||||
State: "MIND",
|
||||
Zip: "12345",
|
||||
},
|
||||
Hideouts: []*Address{
|
||||
&Address{
|
||||
Street: "999 Gopher St.",
|
||||
City: "Denver",
|
||||
State: "CO",
|
||||
Zip: "86753",
|
||||
},
|
||||
&Address{
|
||||
Street: "88 Rusty Ln.",
|
||||
City: "Amsterdam",
|
||||
State: "CA",
|
||||
Zip: "09090",
|
||||
},
|
||||
},
|
||||
Tags: []string{"amped", "bogus", "gnarley", "tubed"},
|
||||
}
|
||||
|
||||
err = index.Index(&obj)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
)
|
||||
|
||||
type DocumentMapping struct {
|
||||
Enabled *bool `json:"enabled"`
|
||||
Dynamic *bool `json:"dynamic"`
|
||||
Properties map[string]*DocumentMapping `json:"properties"`
|
||||
Fields []*FieldMapping `json:"fields"`
|
||||
DefaultAnalyzer *string `json:"default_analyzer"`
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) GoString() string {
|
||||
return fmt.Sprintf(" &bleve.DocumentMapping{Enabled:%t, Dynamic:%t, Properties:%#v, Fields:%#v}", *dm.Enabled, *dm.Dynamic, dm.Properties, dm.Fields)
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) DocumentMappingForPath(path string) *DocumentMapping {
|
||||
pathElements := decodePath(path)
|
||||
current := dm
|
||||
for _, pathElement := range pathElements {
|
||||
var ok bool
|
||||
current, ok = current.Properties[pathElement]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
func NewDocumentMapping() *DocumentMapping {
|
||||
return &DocumentMapping{
|
||||
Enabled: &tRUE,
|
||||
Dynamic: &tRUE,
|
||||
}
|
||||
}
|
||||
|
||||
func NewDocumentStaticMapping() *DocumentMapping {
|
||||
return &DocumentMapping{
|
||||
Enabled: &tRUE,
|
||||
Dynamic: &fALSE,
|
||||
}
|
||||
}
|
||||
|
||||
func NewDocumentDisabledMapping() *DocumentMapping {
|
||||
return &DocumentMapping{
|
||||
Enabled: &fALSE,
|
||||
}
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) *DocumentMapping {
|
||||
if dm.Properties == nil {
|
||||
dm.Properties = make(map[string]*DocumentMapping)
|
||||
}
|
||||
dm.Properties[property] = sdm
|
||||
return dm
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) *DocumentMapping {
|
||||
if dm.Fields == nil {
|
||||
dm.Fields = make([]*FieldMapping, 0)
|
||||
}
|
||||
dm.Fields = append(dm.Fields, fm)
|
||||
return dm
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
|
||||
var tmp struct {
|
||||
Enabled *bool `json:"enabled"`
|
||||
Dynamic *bool `json:"dynamic"`
|
||||
Properties map[string]*DocumentMapping `json:"properties"`
|
||||
Fields []*FieldMapping `json:"fields"`
|
||||
DefaultAnalyzer *string `json:"default_analyzer"`
|
||||
}
|
||||
err := json.Unmarshal(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dm.Enabled = &tRUE
|
||||
if tmp.Enabled != nil {
|
||||
dm.Enabled = tmp.Enabled
|
||||
}
|
||||
dm.Dynamic = &tRUE
|
||||
if tmp.Dynamic != nil {
|
||||
dm.Dynamic = tmp.Dynamic
|
||||
}
|
||||
if tmp.DefaultAnalyzer != nil {
|
||||
dm.DefaultAnalyzer = tmp.DefaultAnalyzer
|
||||
}
|
||||
if tmp.Properties != nil {
|
||||
dm.Properties = make(map[string]*DocumentMapping, len(tmp.Properties))
|
||||
}
|
||||
for propName, propMapping := range tmp.Properties {
|
||||
dm.Properties[propName] = propMapping
|
||||
}
|
||||
if tmp.Fields != nil {
|
||||
dm.Fields = make([]*FieldMapping, len(tmp.Fields))
|
||||
}
|
||||
for i, field := range tmp.Fields {
|
||||
dm.Fields[i] = field
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) defaultAnalyzer(path []string) *analysis.Analyzer {
|
||||
var rv *analysis.Analyzer
|
||||
current := dm
|
||||
for _, pathElement := range path {
|
||||
var ok bool
|
||||
current, ok = current.Properties[pathElement]
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
if current.DefaultAnalyzer != nil {
|
||||
rv = config.Analysis.Analyzers[*current.DefaultAnalyzer]
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
)
|
||||
|
||||
type FieldMapping struct {
|
||||
Name *string `json:"name"`
|
||||
Type *string `json:"type"`
|
||||
Analyzer *string `json:"analyzer"`
|
||||
Store *bool `json:"store"`
|
||||
Index *bool `json:"index"`
|
||||
IncludeTermVectors *bool `json:"include_term_vectors"`
|
||||
IncludeInAll *bool `json:"include_in_all"`
|
||||
}
|
||||
|
||||
func NewFieldMapping(name, typ, analyzer string, store, index bool, includeTermVectors bool, includeInAll bool) *FieldMapping {
|
||||
return &FieldMapping{
|
||||
Name: &name,
|
||||
Type: &typ,
|
||||
Analyzer: &analyzer,
|
||||
Store: &store,
|
||||
Index: &index,
|
||||
IncludeTermVectors: &includeTermVectors,
|
||||
IncludeInAll: &includeInAll,
|
||||
}
|
||||
}
|
||||
|
||||
func (fm *FieldMapping) Options() document.IndexingOptions {
|
||||
var rv document.IndexingOptions
|
||||
if *fm.Store {
|
||||
rv |= document.STORE_FIELD
|
||||
}
|
||||
if *fm.Index {
|
||||
rv |= document.INDEX_FIELD
|
||||
}
|
||||
if *fm.IncludeTermVectors {
|
||||
rv |= document.INCLUDE_TERM_VECTORS
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (fm *FieldMapping) GoString() string {
|
||||
return fmt.Sprintf("&bleve.FieldMapping{Name:%s, Type:%s, Analyzer:%s, Store:%t, Index:%t}", *fm.Name, *fm.Type, *fm.Analyzer, *fm.Store, *fm.Index)
|
||||
}
|
|
@ -0,0 +1,316 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
)
|
||||
|
||||
var tRUE = true
|
||||
|
||||
var fALSE = false
|
||||
|
||||
var DEFAULT_ID_FIELD = "_id"
|
||||
var DEFAULT_TYPE_FIELD = "_type"
|
||||
var DEFAULT_TYPE = "_default"
|
||||
|
||||
type IndexMapping struct {
|
||||
TypeMapping map[string]*DocumentMapping `json:"types"`
|
||||
DefaultMapping *DocumentMapping `json:"default_mapping"`
|
||||
IdField *string `json:"id_field"`
|
||||
TypeField *string `json:"type_field"`
|
||||
DefaultType *string `json:"default_type"`
|
||||
DefaultAnalyzer *string `json:"default_analyzer"`
|
||||
}
|
||||
|
||||
func (im *IndexMapping) GoString() string {
|
||||
return fmt.Sprintf("&bleve.IndexMapping{TypeMapping:%#v, TypeField:%s, DefaultType:%s}", im.TypeMapping, *im.TypeField, *im.DefaultType)
|
||||
}
|
||||
|
||||
func NewIndexMapping() *IndexMapping {
|
||||
return &IndexMapping{
|
||||
TypeMapping: make(map[string]*DocumentMapping),
|
||||
DefaultMapping: NewDocumentMapping(),
|
||||
IdField: &DEFAULT_ID_FIELD,
|
||||
TypeField: &DEFAULT_TYPE_FIELD,
|
||||
DefaultType: &DEFAULT_TYPE,
|
||||
}
|
||||
}
|
||||
|
||||
func (im *IndexMapping) AddDocumentMapping(doctype string, dm *DocumentMapping) *IndexMapping {
|
||||
im.TypeMapping[doctype] = dm
|
||||
return im
|
||||
}
|
||||
|
||||
func (im *IndexMapping) SetTypeField(typeField string) *IndexMapping {
|
||||
im.TypeField = &typeField
|
||||
return im
|
||||
}
|
||||
|
||||
func (im *IndexMapping) SetDefaultAnalyzer(analyzer string) *IndexMapping {
|
||||
im.DefaultAnalyzer = &analyzer
|
||||
return im
|
||||
}
|
||||
|
||||
func (im *IndexMapping) MappingForType(docType string) *DocumentMapping {
|
||||
docMapping := im.TypeMapping[docType]
|
||||
if docMapping == nil {
|
||||
docMapping = im.DefaultMapping
|
||||
}
|
||||
return docMapping
|
||||
}
|
||||
|
||||
func (im *IndexMapping) UnmarshalJSON(data []byte) error {
|
||||
var tmp struct {
|
||||
TypeMapping map[string]*DocumentMapping `json:"types"`
|
||||
DefaultMapping *DocumentMapping `json:"default_mapping"`
|
||||
IdField *string `json:"id_field"`
|
||||
TypeField *string `json:"type_field"`
|
||||
DefaultType *string `json:"default_type"`
|
||||
DefaultAnalyzer *string `json:"default_analyzer"`
|
||||
}
|
||||
err := json.Unmarshal(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
im.IdField = &DEFAULT_ID_FIELD
|
||||
if tmp.IdField != nil {
|
||||
im.IdField = tmp.IdField
|
||||
}
|
||||
|
||||
im.TypeField = &DEFAULT_TYPE_FIELD
|
||||
if tmp.TypeField != nil {
|
||||
im.TypeField = tmp.TypeField
|
||||
}
|
||||
|
||||
im.DefaultType = &DEFAULT_TYPE
|
||||
if tmp.DefaultType != nil {
|
||||
im.DefaultType = tmp.DefaultType
|
||||
}
|
||||
|
||||
im.DefaultMapping = NewDocumentMapping()
|
||||
if tmp.DefaultMapping != nil {
|
||||
im.DefaultMapping = tmp.DefaultMapping
|
||||
}
|
||||
|
||||
if tmp.DefaultAnalyzer != nil {
|
||||
im.DefaultAnalyzer = tmp.DefaultAnalyzer
|
||||
}
|
||||
|
||||
im.TypeMapping = make(map[string]*DocumentMapping, len(tmp.TypeMapping))
|
||||
for typeName, typeDocMapping := range tmp.TypeMapping {
|
||||
im.TypeMapping[typeName] = typeDocMapping
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (im *IndexMapping) determineType(data interface{}) (string, bool) {
|
||||
// first see if the object implements Identifier
|
||||
classifier, ok := data.(Classifier)
|
||||
if ok {
|
||||
return classifier.Type(), true
|
||||
}
|
||||
|
||||
// now see if we can find type using the mapping
|
||||
if im.TypeField != nil {
|
||||
typ, ok := mustString(lookupPropertyPath(data, *im.TypeField))
|
||||
if ok {
|
||||
return typ, true
|
||||
}
|
||||
}
|
||||
|
||||
// fall back to default type if there was one
|
||||
if im.DefaultType != nil {
|
||||
return *im.DefaultType, true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (im *IndexMapping) MapDocument(doc *document.Document, data interface{}) error {
|
||||
docType, ok := im.determineType(data)
|
||||
if !ok {
|
||||
return ERROR_NO_TYPE
|
||||
}
|
||||
docMapping := im.MappingForType(docType)
|
||||
walkContext := newWalkContext(doc, docMapping)
|
||||
im.walkDocument(data, []string{}, walkContext)
|
||||
|
||||
// see if the _all field was disabled
|
||||
allMapping := docMapping.DocumentMappingForPath("_all")
|
||||
if allMapping == nil || (allMapping.Enabled != nil && *allMapping.Enabled != false) {
|
||||
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS)
|
||||
doc.AddField(field)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type walkContext struct {
|
||||
doc *document.Document
|
||||
dm *DocumentMapping
|
||||
excludedFromAll []string
|
||||
}
|
||||
|
||||
func newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
|
||||
return &walkContext{
|
||||
doc: doc,
|
||||
dm: dm,
|
||||
excludedFromAll: []string{},
|
||||
}
|
||||
}
|
||||
|
||||
func (im *IndexMapping) walkDocument(data interface{}, path []string, context *walkContext) {
|
||||
val := reflect.ValueOf(data)
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Map:
|
||||
// FIXME can add support for other map keys in the future
|
||||
if typ.Key().Kind() == reflect.String {
|
||||
for _, key := range val.MapKeys() {
|
||||
fieldName := key.String()
|
||||
fieldVal := val.MapIndex(key).Interface()
|
||||
im.processProperty(fieldVal, append(path, fieldName), context)
|
||||
}
|
||||
}
|
||||
case reflect.Struct:
|
||||
for i := 0; i < val.NumField(); i++ {
|
||||
field := typ.Field(i)
|
||||
fieldName := field.Name
|
||||
|
||||
// if the field has a JSON name, prefer that
|
||||
jsonTag := field.Tag.Get("json")
|
||||
jsonFieldName := parseJSONTagName(jsonTag)
|
||||
if jsonFieldName != "" {
|
||||
fieldName = jsonFieldName
|
||||
}
|
||||
|
||||
if val.Field(i).CanInterface() {
|
||||
fieldVal := val.Field(i).Interface()
|
||||
im.processProperty(fieldVal, append(path, fieldName), context)
|
||||
}
|
||||
}
|
||||
case reflect.Slice, reflect.Array:
|
||||
for i := 0; i < val.Len(); i++ {
|
||||
if val.Index(i).CanInterface() {
|
||||
fieldVal := val.Index(i).Interface()
|
||||
im.processProperty(fieldVal, path, context)
|
||||
}
|
||||
}
|
||||
case reflect.Ptr:
|
||||
ptrElem := val.Elem()
|
||||
if ptrElem.CanInterface() {
|
||||
im.walkDocument(ptrElem.Interface(), path, context)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (im *IndexMapping) processProperty(property interface{}, path []string, context *walkContext) {
|
||||
pathString := encodePath(path)
|
||||
// look to see if there is a mapping for this field
|
||||
subDocMapping := context.dm.DocumentMappingForPath(pathString)
|
||||
|
||||
// check tos see if we even need to do further processing
|
||||
if subDocMapping != nil && subDocMapping.Enabled != nil && !*subDocMapping.Enabled {
|
||||
return
|
||||
}
|
||||
|
||||
propertyValue := reflect.ValueOf(property)
|
||||
propertyType := propertyValue.Type()
|
||||
switch propertyType.Kind() {
|
||||
case reflect.String:
|
||||
propertyValueString := propertyValue.String()
|
||||
if subDocMapping != nil {
|
||||
// index by explicit mapping
|
||||
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
if *fieldMapping.Type == "text" {
|
||||
|
||||
fieldName := pathString
|
||||
if fieldMapping.Name != nil && *fieldMapping.Name != "" {
|
||||
parentName := ""
|
||||
if len(path) > 1 {
|
||||
parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR
|
||||
}
|
||||
fieldName = parentName + *fieldMapping.Name
|
||||
}
|
||||
options := fieldMapping.Options()
|
||||
analyzer := config.Analysis.Analyzers[*fieldMapping.Analyzer]
|
||||
if analyzer != nil {
|
||||
field := document.NewTextFieldCustom(fieldName, []byte(propertyValueString), options, analyzer)
|
||||
context.doc.AddField(field)
|
||||
|
||||
if fieldMapping.IncludeInAll != nil && !*fieldMapping.IncludeInAll {
|
||||
context.excludedFromAll = append(context.excludedFromAll, fieldName)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// automatic indexing behavior
|
||||
options := document.STORE_FIELD | document.INDEX_FIELD | document.INCLUDE_TERM_VECTORS
|
||||
analyzer := im.defaultAnalyzer(context.dm, path)
|
||||
field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
default:
|
||||
im.walkDocument(property, path, context)
|
||||
}
|
||||
}
|
||||
|
||||
func (im *IndexMapping) defaultAnalyzer(dm *DocumentMapping, path []string) *analysis.Analyzer {
|
||||
// first see if the document mapping has an analyzer
|
||||
rv := dm.defaultAnalyzer(path)
|
||||
if rv == nil {
|
||||
if im.DefaultAnalyzer != nil {
|
||||
rv = config.Analysis.Analyzers[*im.DefaultAnalyzer]
|
||||
} else if config.DefaultAnalyzer != nil {
|
||||
rv = config.Analysis.Analyzers[*config.DefaultAnalyzer]
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// attempts to find the best analyzer to use with only a field name
|
||||
// will walk all the document types, look for field mappings at the
|
||||
// provided path, if one exists and it has an explicit analyzer
|
||||
// that is returned
|
||||
// nil should be an acceptable return value meaning we don't know
|
||||
func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer {
|
||||
|
||||
// first we look for explicit mapping on the field
|
||||
for _, docMapping := range im.TypeMapping {
|
||||
pathMapping := docMapping.DocumentMappingForPath(path)
|
||||
if pathMapping != nil {
|
||||
if len(pathMapping.Fields) > 0 {
|
||||
if pathMapping.Fields[0].Analyzer != nil {
|
||||
return config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// next we will try default analyzers for the path
|
||||
for _, docMapping := range im.TypeMapping {
|
||||
rv := im.defaultAnalyzer(docMapping, decodePath(path))
|
||||
if rv != nil {
|
||||
return rv
|
||||
}
|
||||
}
|
||||
|
||||
// finally just return the system-wide default analyzer
|
||||
return config.Analysis.Analyzers[*config.DefaultAnalyzer]
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var mappingSource = []byte(`{
|
||||
"types": {
|
||||
"beer": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"fields": [
|
||||
{
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"analyzer": "standard",
|
||||
"store": true,
|
||||
"index": true,
|
||||
"include_term_vectors": true,
|
||||
"include_in_all": true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"brewery": {
|
||||
}
|
||||
},
|
||||
"type_field": "_type",
|
||||
"default_type": "_default"
|
||||
}`)
|
||||
|
||||
var nameField = NewFieldMapping("name", "text", "standard", true, true, true, true)
|
||||
var nameMapping = NewDocumentMapping().AddFieldMapping(nameField)
|
||||
var beerMapping = NewDocumentMapping().AddSubDocumentMapping("name", nameMapping)
|
||||
var breweryMapping = NewDocumentMapping()
|
||||
var mappingObject = NewIndexMapping().
|
||||
AddDocumentMapping("beer", beerMapping).
|
||||
AddDocumentMapping("brewery", breweryMapping)
|
||||
|
||||
func TestUnmarshalMappingJSON(t *testing.T) {
|
||||
var indexMapping IndexMapping
|
||||
err := json.Unmarshal(mappingSource, &indexMapping)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !reflect.DeepEqual(&indexMapping, mappingObject) {
|
||||
t.Errorf("expected %#v,\n got %#v", mappingObject, &indexMapping)
|
||||
}
|
||||
}
|
|
@ -6,24 +6,23 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type Query interface {
|
||||
Boost() float64
|
||||
Searcher(index index.Index) (Searcher, error)
|
||||
Searcher(i *indexImpl, explain bool) (search.Searcher, error)
|
||||
Validate() error
|
||||
}
|
||||
|
||||
func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
|
||||
func ParseQuery(input []byte) (Query, error) {
|
||||
var tmp map[string]interface{}
|
||||
err := json.Unmarshal(input, &tmp)
|
||||
if err != nil {
|
||||
|
@ -42,7 +41,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
|
|||
if isMatchQuery {
|
||||
log.Printf("detected match query")
|
||||
var rv MatchQuery
|
||||
rv.mapping = mapping
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -53,7 +51,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
|
|||
if isMatchPhraseQuery {
|
||||
log.Printf("detected match phrase query")
|
||||
var rv MatchPhraseQuery
|
||||
rv.mapping = mapping
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -64,8 +61,7 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
|
|||
_, hasShould := tmp["should"]
|
||||
_, hasMustNot := tmp["must_not"]
|
||||
if hasMust || hasShould || hasMustNot {
|
||||
var rv TermBooleanQuery
|
||||
rv.mapping = mapping
|
||||
var rv BooleanQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -84,7 +80,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
|
|||
_, hasSyntaxQuery := tmp["query"]
|
||||
if hasSyntaxQuery {
|
||||
var rv SyntaxQuery
|
||||
rv.mapping = mapping
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type BooleanQuery struct {
|
||||
Must *ConjunctionQuery `json:"must,omitempty"`
|
||||
Should *DisjunctionQuery `json:"should,omitempty"`
|
||||
MustNot *DisjunctionQuery `json:"must_not,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewBooleanQuery(must *ConjunctionQuery, should *DisjunctionQuery, mustNot *DisjunctionQuery) *BooleanQuery {
|
||||
return &BooleanQuery{
|
||||
Must: must,
|
||||
Should: should,
|
||||
MustNot: mustNot,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *BooleanQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *BooleanQuery) SetBoost(b float64) *BooleanQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *BooleanQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
|
||||
var err error
|
||||
var mustSearcher *search.TermConjunctionSearcher
|
||||
if q.Must != nil {
|
||||
mustSearcher, err = q.Must.Searcher(i, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var shouldSearcher *search.TermDisjunctionSearcher
|
||||
if q.Should != nil {
|
||||
shouldSearcher, err = q.Should.Searcher(i, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var mustNotSearcher *search.TermDisjunctionSearcher
|
||||
if q.MustNot != nil {
|
||||
mustNotSearcher, err = q.MustNot.Searcher(i, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return search.NewTermBooleanSearcher(i.i, mustSearcher, shouldSearcher, mustNotSearcher, explain)
|
||||
}
|
||||
|
||||
func (q *BooleanQuery) Validate() error {
|
||||
if q.Must == nil && q.Should == nil {
|
||||
return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause")
|
||||
}
|
||||
if q.Must != nil && len(q.Must.Conjuncts) == 0 && q.Should != nil && len(q.Should.Disjuncts) == 0 {
|
||||
return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type ConjunctionQuery struct {
|
||||
Conjuncts []Query `json:"terms"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewConjunctionQuery(conjuncts []Query) *ConjunctionQuery {
|
||||
return &ConjunctionQuery{
|
||||
Conjuncts: conjuncts,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *ConjunctionQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *ConjunctionQuery) SetBoost(b float64) *ConjunctionQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *ConjunctionQuery) AddQuery(aq Query) *ConjunctionQuery {
|
||||
q.Conjuncts = append(q.Conjuncts, aq)
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *ConjunctionQuery) Searcher(i *indexImpl, explain bool) (*search.TermConjunctionSearcher, error) {
|
||||
searchers := make([]search.Searcher, len(q.Conjuncts))
|
||||
for in, conjunct := range q.Conjuncts {
|
||||
var err error
|
||||
searchers[in], err = conjunct.Searcher(i, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return search.NewTermConjunctionSearcher(i.i, searchers, explain)
|
||||
}
|
||||
|
||||
func (q *ConjunctionQuery) Validate() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *ConjunctionQuery) UnmarshalJSON(data []byte) error {
|
||||
tmp := struct {
|
||||
Conjuncts []json.RawMessage `json:"terms"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}{}
|
||||
err := json.Unmarshal(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Conjuncts = make([]Query, len(tmp.Conjuncts))
|
||||
for i, term := range tmp.Conjuncts {
|
||||
query, err := ParseQuery(term)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Conjuncts[i] = query
|
||||
}
|
||||
q.BoostVal = tmp.BoostVal
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type DisjunctionQuery struct {
|
||||
Disjuncts []Query `json:"terms"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
MinVal float64 `json:"min"`
|
||||
}
|
||||
|
||||
func NewDisjunctionQuery(disjuncts []Query) *DisjunctionQuery {
|
||||
return &DisjunctionQuery{
|
||||
Disjuncts: disjuncts,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) SetBoost(b float64) *DisjunctionQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) AddQuery(aq Query) *DisjunctionQuery {
|
||||
q.Disjuncts = append(q.Disjuncts, aq)
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) Min() float64 {
|
||||
return q.MinVal
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) SetMin(m float64) *DisjunctionQuery {
|
||||
q.MinVal = m
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) Searcher(i *indexImpl, explain bool) (*search.TermDisjunctionSearcher, error) {
|
||||
searchers := make([]search.Searcher, len(q.Disjuncts))
|
||||
for in, disjunct := range q.Disjuncts {
|
||||
var err error
|
||||
searchers[in], err = disjunct.Searcher(i, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return search.NewTermDisjunctionSearcher(i.i, searchers, q.MinVal, explain)
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) Validate() error {
|
||||
if int(q.MinVal) > len(q.Disjuncts) {
|
||||
return fmt.Errorf("Minimum clauses in disjunction exceeds total number of clauses")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) UnmarshalJSON(data []byte) error {
|
||||
tmp := struct {
|
||||
Disjuncts []json.RawMessage `json:"terms"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
MinVal float64 `json:"min"`
|
||||
}{}
|
||||
err := json.Unmarshal(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Disjuncts = make([]Query, len(tmp.Disjuncts))
|
||||
for i, term := range tmp.Disjuncts {
|
||||
query, err := ParseQuery(term)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Disjuncts[i] = query
|
||||
}
|
||||
q.BoostVal = tmp.BoostVal
|
||||
q.MinVal = tmp.MinVal
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type MatchQuery struct {
|
||||
Match string `json:"match"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
Analyzer string `json:"analyzer,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewMatchQuery(match string) *MatchQuery {
|
||||
return &MatchQuery{
|
||||
Match: match,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *MatchQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *MatchQuery) SetBoost(b float64) *MatchQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *MatchQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *MatchQuery) SetField(f string) *MatchQuery {
|
||||
q.FieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *MatchQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
|
||||
var analyzer *analysis.Analyzer
|
||||
if q.Analyzer != "" {
|
||||
analyzer = config.Analysis.Analyzers[q.Analyzer]
|
||||
} else {
|
||||
analyzer = i.m.analyzerForPath(q.FieldVal)
|
||||
}
|
||||
if analyzer == nil {
|
||||
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
|
||||
}
|
||||
|
||||
tokens := analyzer.Analyze([]byte(q.Match))
|
||||
if len(tokens) > 0 {
|
||||
|
||||
tqs := make([]Query, len(tokens))
|
||||
for i, token := range tokens {
|
||||
tqs[i] = NewTermQuery(string(token.Term)).
|
||||
SetField(q.FieldVal).
|
||||
SetBoost(q.BoostVal)
|
||||
}
|
||||
|
||||
shouldQuery := NewDisjunctionQuery(tqs).
|
||||
SetBoost(q.BoostVal).
|
||||
SetMin(1)
|
||||
|
||||
return shouldQuery.Searcher(i, explain)
|
||||
} else {
|
||||
noneQuery := NewMatchNoneQuery()
|
||||
return noneQuery.Searcher(i, explain)
|
||||
}
|
||||
}
|
||||
|
||||
func (q *MatchQuery) Validate() error {
|
||||
return nil
|
||||
}
|
|
@ -6,23 +6,33 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type MatchAllQuery struct {
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
Explain bool `json:"explain,omitempty"`
|
||||
}
|
||||
|
||||
func NewMatchAllQuery() *MatchAllQuery {
|
||||
return &MatchAllQuery{
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *MatchAllQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *MatchAllQuery) Searcher(index index.Index) (Searcher, error) {
|
||||
return NewMatchAllSearcher(index, q)
|
||||
func (q *MatchAllQuery) SetBoost(b float64) *MatchAllQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *MatchAllQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
return search.NewMatchAllSearcher(i.i, q.BoostVal, explain)
|
||||
}
|
||||
|
||||
func (q *MatchAllQuery) Validate() error {
|
|
@ -6,23 +6,33 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type MatchNoneQuery struct {
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
Explain bool `json:"explain,omitempty"`
|
||||
}
|
||||
|
||||
func NewMatchNoneQuery() *MatchNoneQuery {
|
||||
return &MatchNoneQuery{
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *MatchNoneQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *MatchNoneQuery) Searcher(index index.Index) (Searcher, error) {
|
||||
return NewMatchNoneSearcher(index, q)
|
||||
func (q *MatchNoneQuery) SetBoost(b float64) *MatchNoneQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *MatchNoneQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
return search.NewMatchNoneSearcher(i.i)
|
||||
}
|
||||
|
||||
func (q *MatchNoneQuery) Validate() error {
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type MatchPhraseQuery struct {
|
||||
MatchPhrase string `json:"match_phrase"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
Analyzer string `json:"analyzer,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewMatchPhraseQuery(matchPhrase string) *MatchPhraseQuery {
|
||||
return &MatchPhraseQuery{
|
||||
MatchPhrase: matchPhrase,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *MatchPhraseQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *MatchPhraseQuery) SetBoost(b float64) *MatchPhraseQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *MatchPhraseQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *MatchPhraseQuery) SetField(f string) *MatchPhraseQuery {
|
||||
q.FieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *MatchPhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
|
||||
var analyzer *analysis.Analyzer
|
||||
if q.Analyzer != "" {
|
||||
analyzer = config.Analysis.Analyzers[q.Analyzer]
|
||||
} else {
|
||||
analyzer = i.m.analyzerForPath(q.FieldVal)
|
||||
}
|
||||
if analyzer == nil {
|
||||
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
|
||||
}
|
||||
|
||||
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
|
||||
if len(tokens) > 0 {
|
||||
tqs := make([]*TermQuery, len(tokens))
|
||||
for i, token := range tokens {
|
||||
tqs[i] = NewTermQuery(string(token.Term)).
|
||||
SetField(q.FieldVal).
|
||||
SetBoost(q.BoostVal)
|
||||
}
|
||||
|
||||
phraseQuery := NewPhraseQuery(tqs)
|
||||
|
||||
return phraseQuery.Searcher(i, explain)
|
||||
} else {
|
||||
noneQuery := NewMatchNoneQuery()
|
||||
return noneQuery.Searcher(i, explain)
|
||||
}
|
||||
}
|
||||
|
||||
func (q *MatchPhraseQuery) Validate() error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type PhraseQuery struct {
|
||||
Terms []*TermQuery `json:"terms"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewPhraseQuery(terms []*TermQuery) *PhraseQuery {
|
||||
return &PhraseQuery{
|
||||
Terms: terms,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) SetBoost(b float64) *PhraseQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
|
||||
terms := make([]string, len(q.Terms))
|
||||
conjuncts := make([]Query, len(q.Terms))
|
||||
for i, term := range q.Terms {
|
||||
conjuncts[i] = term
|
||||
terms[i] = term.Term
|
||||
}
|
||||
|
||||
conjunctionQuery := NewConjunctionQuery(conjuncts)
|
||||
conjunctionSearcher, err := conjunctionQuery.Searcher(i, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return search.NewPhraseSearcher(i.i, conjunctionSearcher, terms)
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) Validate() error {
|
||||
if q.Terms == nil {
|
||||
return fmt.Errorf("Phrase query must contain at least one term")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -6,31 +6,49 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type SyntaxQuery struct {
|
||||
Query string `json:"query"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
Explain bool `json:"explain,omitempty"`
|
||||
DefaultField string `json:"default_field,omitemtpy"`
|
||||
mapping document.Mapping
|
||||
Query string `json:"query"`
|
||||
DefaultFieldVal string `json:"default_field,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewSyntaxQuery(query string) *SyntaxQuery {
|
||||
return &SyntaxQuery{
|
||||
Query: query,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *SyntaxQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *SyntaxQuery) Searcher(index index.Index) (Searcher, error) {
|
||||
newQuery, err := ParseQuerySyntax(q.Query, q.mapping, q.DefaultField)
|
||||
func (q *SyntaxQuery) SetBoost(b float64) *SyntaxQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *SyntaxQuery) DefaultField() string {
|
||||
return q.DefaultFieldVal
|
||||
}
|
||||
|
||||
func (q *SyntaxQuery) SetField(f string) *SyntaxQuery {
|
||||
q.DefaultFieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *SyntaxQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
newQuery, err := ParseQuerySyntax(q.Query, i.m, q.DefaultFieldVal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newQuery.Searcher(index)
|
||||
return newQuery.Searcher(i, explain)
|
||||
}
|
||||
|
||||
func (q *SyntaxQuery) Validate() error {
|
|
@ -22,7 +22,7 @@
|
|||
return STRING
|
||||
}
|
||||
//
|
||||
package search
|
||||
package bleve
|
||||
|
||||
import("log")
|
||||
import("strconv")
|
|
@ -1,4 +1,4 @@
|
|||
package search
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
@ -23,382 +23,382 @@ a = make([]family, 1)
|
|||
{
|
||||
var acc [18]bool
|
||||
var fun [18]func(rune) int
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
case 47: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[14] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 3
|
||||
case 102: return 15
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 15
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 15
|
||||
case 65 <= r && r <= 70: return 15
|
||||
case 97 <= r && r <= 102: return 15
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[10] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[3] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
case 47: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[4] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 5
|
||||
case 102: return 6
|
||||
case 47: return 7
|
||||
case 34: return 8
|
||||
case 98: return 9
|
||||
case 110: return 10
|
||||
case 92: return 11
|
||||
case 116: return 12
|
||||
case 114: return 13
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[16] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 34: return 2
|
||||
case 98: return 17
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 17
|
||||
case 47: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 17
|
||||
case 65 <= r && r <= 70: return 17
|
||||
case 97 <= r && r <= 102: return 17
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[15] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 16
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 16
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 16
|
||||
case 65 <= r && r <= 70: return 16
|
||||
case 97 <= r && r <= 102: return 16
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[17] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[11] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 5
|
||||
case 102: return 6
|
||||
case 47: return 7
|
||||
case 34: return 8
|
||||
case 98: return 9
|
||||
case 110: return 10
|
||||
case 92: return 11
|
||||
case 116: return 12
|
||||
case 98: return 12
|
||||
case 114: return 13
|
||||
case 34: return 5
|
||||
case 117: return 6
|
||||
case 47: return 7
|
||||
case 116: return 8
|
||||
case 110: return 9
|
||||
case 102: return 10
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[12] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[8] = true
|
||||
fun[8] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[6] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[9] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[7] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[2] = true
|
||||
fun[2] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 110: return -1
|
||||
case 92: return -1
|
||||
case 116: return -1
|
||||
case 114: return -1
|
||||
case 117: return -1
|
||||
case 102: return -1
|
||||
case 47: return -1
|
||||
case 34: return -1
|
||||
case 98: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return -1
|
||||
case 65 <= r && r <= 70: return -1
|
||||
case 97 <= r && r <= 102: return -1
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[13] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 3
|
||||
case 102: return 3
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 98: return 3
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 3
|
||||
case 65 <= r && r <= 70: return 3
|
||||
case 97 <= r && r <= 102: return 3
|
||||
default: return 3
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return -1
|
||||
case 102: return -1
|
||||
case 47: return -1
|
||||
case 34: return 1
|
||||
case 98: return -1
|
||||
case 110: return -1
|
||||
case 92: return -1
|
||||
case 116: return -1
|
||||
case 114: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return -1
|
||||
case 65 <= r && r <= 70: return -1
|
||||
case 97 <= r && r <= 102: return -1
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[5] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 110: return 3
|
||||
case 92: return 4
|
||||
case 116: return 3
|
||||
case 114: return 3
|
||||
case 117: return 3
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 14
|
||||
case 47: return 3
|
||||
case 34: return 2
|
||||
case 92: return 3
|
||||
case 98: return 14
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 14
|
||||
case 65 <= r && r <= 70: return 14
|
||||
case 97 <= r && r <= 102: return 14
|
||||
default: return 3
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[4] = true
|
||||
fun[4] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 92: return -1
|
||||
case 98: return -1
|
||||
case 114: return -1
|
||||
case 34: return -1
|
||||
case 117: return -1
|
||||
case 47: return -1
|
||||
case 116: return -1
|
||||
case 110: return -1
|
||||
case 102: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return -1
|
||||
case 65 <= r && r <= 70: return -1
|
||||
case 97 <= r && r <= 102: return -1
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[17] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[5] = true
|
||||
fun[5] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 114: return -1
|
||||
case 34: return 1
|
||||
case 117: return -1
|
||||
case 47: return -1
|
||||
case 116: return -1
|
||||
case 110: return -1
|
||||
case 102: return -1
|
||||
case 92: return -1
|
||||
case 98: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return -1
|
||||
case 65 <= r && r <= 70: return -1
|
||||
case 97 <= r && r <= 102: return -1
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[10] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[16] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 102: return 17
|
||||
case 92: return 3
|
||||
case 98: return 17
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 17
|
||||
case 65 <= r && r <= 70: return 17
|
||||
case 97 <= r && r <= 102: return 17
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[13] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[7] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[2] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[8] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[3] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 34: return 5
|
||||
case 117: return 6
|
||||
case 47: return 7
|
||||
case 116: return 8
|
||||
case 110: return 9
|
||||
case 102: return 10
|
||||
case 92: return 11
|
||||
case 98: return 12
|
||||
case 114: return 13
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[12] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[14] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 15
|
||||
case 92: return 3
|
||||
case 98: return 15
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 15
|
||||
case 65 <= r && r <= 70: return 15
|
||||
case 97 <= r && r <= 102: return 15
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[15] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 16
|
||||
case 92: return 3
|
||||
case 98: return 16
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 16
|
||||
case 65 <= r && r <= 70: return 16
|
||||
case 97 <= r && r <= 102: return 16
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[9] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 92: return 3
|
||||
case 98: return 2
|
||||
case 114: return 2
|
||||
case 34: return 4
|
||||
case 117: return 2
|
||||
case 47: return 2
|
||||
case 116: return 2
|
||||
case 110: return 2
|
||||
case 102: return 2
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 57: return 2
|
||||
case 65 <= r && r <= 70: return 2
|
||||
case 97 <= r && r <= 102: return 2
|
||||
default: return 2
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
|
@ -410,9 +410,10 @@ a0[0].id = 0
|
|||
{
|
||||
var acc [2]bool
|
||||
var fun [2]func(rune) int
|
||||
fun[0] = func(r rune) int {
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 43: return 1
|
||||
case 43: return -1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -420,10 +421,9 @@ fun[0] = func(r rune) int {
|
|||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 43: return -1
|
||||
case 43: return 1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -466,9 +466,10 @@ a0[2].id = 2
|
|||
{
|
||||
var acc [2]bool
|
||||
var fun [2]func(rune) int
|
||||
fun[0] = func(r rune) int {
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 58: return 1
|
||||
case 58: return -1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -476,10 +477,9 @@ fun[0] = func(r rune) int {
|
|||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 58: return -1
|
||||
case 58: return 1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -494,9 +494,10 @@ a0[3].id = 3
|
|||
{
|
||||
var acc [2]bool
|
||||
var fun [2]func(rune) int
|
||||
fun[0] = func(r rune) int {
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 94: return 1
|
||||
case 94: return -1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -504,10 +505,9 @@ fun[0] = func(r rune) int {
|
|||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 94: return -1
|
||||
case 94: return 1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -578,18 +578,6 @@ a0[6].id = 6
|
|||
{
|
||||
var acc [5]bool
|
||||
var fun [5]func(rune) int
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 45: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 48: return 2
|
||||
case 49 <= r && r <= 57: return 2
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[2] = true
|
||||
fun[2] = func(r rune) int {
|
||||
switch(r) {
|
||||
|
@ -603,31 +591,6 @@ fun[2] = func(r rune) int {
|
|||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[4] = true
|
||||
fun[4] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 45: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 48: return 4
|
||||
case 49 <= r && r <= 57: return 4
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 45: return 1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 48: return 2
|
||||
case 49 <= r && r <= 57: return 3
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[3] = true
|
||||
fun[3] = func(r rune) int {
|
||||
switch(r) {
|
||||
|
@ -641,6 +604,43 @@ fun[3] = func(r rune) int {
|
|||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
acc[4] = true
|
||||
fun[4] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 45: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 48: return 4
|
||||
case 49 <= r && r <= 57: return 4
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 45: return -1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 48: return 2
|
||||
case 49 <= r && r <= 57: return 2
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 45: return 1
|
||||
default:
|
||||
switch {
|
||||
case 48 <= r && r <= 48: return 2
|
||||
case 49 <= r && r <= 57: return 3
|
||||
default: return -1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
a0[7].acc = acc[:]
|
||||
a0[7].f = fun[:]
|
||||
a0[7].id = 7
|
||||
|
@ -651,9 +651,9 @@ var fun [2]func(rune) int
|
|||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 9: return 1
|
||||
case 10: return 1
|
||||
case 32: return 1
|
||||
case 10: return 1
|
||||
case 9: return 1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -664,8 +664,8 @@ fun[1] = func(r rune) int {
|
|||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 9: return 1
|
||||
case 10: return 1
|
||||
case 32: return 1
|
||||
case 10: return 1
|
||||
default:
|
||||
switch {
|
||||
default: return -1
|
||||
|
@ -680,18 +680,17 @@ a0[8].id = 8
|
|||
{
|
||||
var acc [2]bool
|
||||
var fun [2]func(rune) int
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
fun[0] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 45: return -1
|
||||
case 32: return -1
|
||||
case 13: return -1
|
||||
case 94: return -1
|
||||
case 58: return -1
|
||||
case 9: return -1
|
||||
case 12: return -1
|
||||
case 10: return -1
|
||||
case 43: return -1
|
||||
case 9: return -1
|
||||
case 32: return -1
|
||||
case 12: return -1
|
||||
case 13: return -1
|
||||
case 58: return -1
|
||||
case 94: return -1
|
||||
case 45: return -1
|
||||
default:
|
||||
switch {
|
||||
default: return 1
|
||||
|
@ -699,17 +698,18 @@ fun[1] = func(r rune) int {
|
|||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
fun[0] = func(r rune) int {
|
||||
acc[1] = true
|
||||
fun[1] = func(r rune) int {
|
||||
switch(r) {
|
||||
case 9: return -1
|
||||
case 32: return -1
|
||||
case 12: return -1
|
||||
case 13: return -1
|
||||
case 58: return -1
|
||||
case 94: return -1
|
||||
case 45: return -1
|
||||
case 58: return -1
|
||||
case 9: return -1
|
||||
case 12: return -1
|
||||
case 10: return -1
|
||||
case 43: return -1
|
||||
case 45: return -1
|
||||
case 32: return -1
|
||||
case 13: return -1
|
||||
default:
|
||||
switch {
|
||||
default: return 1
|
|
@ -1,5 +1,5 @@
|
|||
%{
|
||||
package search
|
||||
package bleve
|
||||
import "log"
|
||||
|
||||
func logDebugGrammar(format string, v ...interface{}) {
|
||||
|
@ -62,46 +62,30 @@ searchBase:
|
|||
STRING {
|
||||
str := $1.s
|
||||
logDebugGrammar("STRING - %s", str)
|
||||
q := &MatchQuery{
|
||||
Match: str,
|
||||
Field: parsingDefaultField,
|
||||
BoostVal: 1.0,
|
||||
Explain: true,
|
||||
}
|
||||
if parsingMapping[parsingDefaultField] != nil {
|
||||
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
|
||||
}
|
||||
q := NewMatchQuery(str).SetField(parsingDefaultField)
|
||||
if parsingMust {
|
||||
parsingMustList.Terms = append(parsingMustList.Terms, q)
|
||||
parsingMustList.AddQuery(q)
|
||||
parsingMust = false
|
||||
} else if parsingMustNot {
|
||||
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
|
||||
parsingMustNotList.AddQuery(q)
|
||||
parsingMustNot = false
|
||||
} else {
|
||||
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
|
||||
parsingShouldList.AddQuery(q)
|
||||
}
|
||||
}
|
||||
|
|
||||
PHRASE {
|
||||
phrase := $1.s
|
||||
logDebugGrammar("PHRASE - %s", phrase)
|
||||
q := &MatchPhraseQuery{
|
||||
MatchPhrase: phrase,
|
||||
Field: parsingDefaultField,
|
||||
BoostVal: 1.0,
|
||||
Explain: true,
|
||||
}
|
||||
if parsingMapping[parsingDefaultField] != nil {
|
||||
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
|
||||
}
|
||||
q := NewMatchPhraseQuery(phrase).SetField(parsingDefaultField)
|
||||
if parsingMust {
|
||||
parsingMustList.Terms = append(parsingMustList.Terms, q)
|
||||
parsingMustList.AddQuery(q)
|
||||
parsingMust = false
|
||||
} else if parsingMustNot {
|
||||
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
|
||||
parsingMustNotList.AddQuery(q)
|
||||
parsingMustNot = false
|
||||
} else {
|
||||
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
|
||||
parsingShouldList.AddQuery(q)
|
||||
}
|
||||
}
|
||||
|
|
||||
|
@ -109,23 +93,15 @@ STRING COLON STRING {
|
|||
field := $1.s
|
||||
str := $3.s
|
||||
logDebugGrammar("FIELD - %s STRING - %s", field, str)
|
||||
q := &MatchQuery{
|
||||
Match: str,
|
||||
Field: field,
|
||||
BoostVal: 1.0,
|
||||
Explain: true,
|
||||
}
|
||||
if parsingMapping[field] != nil {
|
||||
q.Analyzer = parsingMapping[field].Analyzer
|
||||
}
|
||||
q := NewMatchQuery(str).SetField(field)
|
||||
if parsingMust {
|
||||
parsingMustList.Terms = append(parsingMustList.Terms, q)
|
||||
parsingMustList.AddQuery(q)
|
||||
parsingMust = false
|
||||
} else if parsingMustNot {
|
||||
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
|
||||
parsingMustNotList.AddQuery(q)
|
||||
parsingMustNot = false
|
||||
} else {
|
||||
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
|
||||
parsingShouldList.AddQuery(q)
|
||||
}
|
||||
}
|
||||
|
|
||||
|
@ -133,23 +109,15 @@ STRING COLON PHRASE {
|
|||
field := $1.s
|
||||
phrase := $3.s
|
||||
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
|
||||
q := &MatchPhraseQuery{
|
||||
MatchPhrase: phrase,
|
||||
Field: field,
|
||||
BoostVal: 1.0,
|
||||
Explain: true,
|
||||
}
|
||||
if parsingMapping[field] != nil {
|
||||
q.Analyzer = parsingMapping[field].Analyzer
|
||||
}
|
||||
q := NewMatchPhraseQuery(phrase).SetField(field)
|
||||
if parsingMust {
|
||||
parsingMustList.Terms = append(parsingMustList.Terms, q)
|
||||
parsingMustList.AddQuery(q)
|
||||
parsingMust = false
|
||||
} else if parsingMustNot {
|
||||
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
|
||||
parsingMustNotList.AddQuery(q)
|
||||
parsingMustNot = false
|
||||
} else {
|
||||
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
|
||||
parsingShouldList.AddQuery(q)
|
||||
}
|
||||
};
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var crashHard = false
|
||||
var parserMutex sync.Mutex
|
||||
var parsingDefaultField string
|
||||
var parsingMust bool
|
||||
var parsingMustNot bool
|
||||
var debugParser bool
|
||||
var debugLexer bool
|
||||
|
||||
var parsingMustList *ConjunctionQuery
|
||||
var parsingMustNotList *DisjunctionQuery
|
||||
var parsingShouldList *DisjunctionQuery
|
||||
var parsingIndexMapping *IndexMapping
|
||||
|
||||
func ParseQuerySyntax(query string, mapping *IndexMapping, defaultField string) (rq Query, err error) {
|
||||
parserMutex.Lock()
|
||||
defer parserMutex.Unlock()
|
||||
|
||||
parsingIndexMapping = mapping
|
||||
parsingDefaultField = defaultField
|
||||
parsingMustList = NewConjunctionQuery([]Query{})
|
||||
parsingMustNotList = NewDisjunctionQuery([]Query{})
|
||||
parsingShouldList = NewDisjunctionQuery([]Query{})
|
||||
|
||||
defer func() {
|
||||
r := recover()
|
||||
if r != nil && r == "syntax error" {
|
||||
// if we're panicing over a syntax error, chill
|
||||
err = fmt.Errorf("Parse Error - %v", r)
|
||||
} else if r != nil {
|
||||
// otherise continue to panic
|
||||
if crashHard {
|
||||
panic(r)
|
||||
} else {
|
||||
err = fmt.Errorf("Other Error - %v", r)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
yyParse(NewLexer(strings.NewReader(query)))
|
||||
parsingQuery := NewBooleanQuery(nil, nil, nil)
|
||||
if len(parsingMustList.Conjuncts) > 0 {
|
||||
parsingQuery.Must = parsingMustList
|
||||
}
|
||||
if len(parsingMustNotList.Disjuncts) > 0 {
|
||||
parsingQuery.MustNot = parsingMustNotList
|
||||
}
|
||||
if len(parsingShouldList.Disjuncts) > 0 {
|
||||
parsingQuery.Should = parsingShouldList
|
||||
}
|
||||
rq = parsingQuery
|
||||
return rq, err
|
||||
}
|
|
@ -6,25 +6,45 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type TermQuery struct {
|
||||
Term string `json:"term"`
|
||||
Field string `json:"field,omitempty"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
Explain bool `json:"explain,omitempty"`
|
||||
}
|
||||
|
||||
func NewTermQuery(term string) *TermQuery {
|
||||
return &TermQuery{
|
||||
Term: term,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *TermQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *TermQuery) Searcher(index index.Index) (Searcher, error) {
|
||||
return NewTermSearcher(index, q)
|
||||
func (q *TermQuery) SetBoost(b float64) *TermQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *TermQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *TermQuery) SetField(f string) *TermQuery {
|
||||
q.FieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *TermQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
return search.NewTermSearcher(i.i, q.Term, q.FieldVal, q.BoostVal, explain)
|
||||
}
|
||||
|
||||
func (q *TermQuery) Validate() error {
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func lookupPropertyPath(data interface{}, path string) interface{} {
|
||||
pathParts := decodePath(path)
|
||||
|
||||
current := data
|
||||
for _, part := range pathParts {
|
||||
current = lookupProptyPathPart(current, part)
|
||||
if current == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return current
|
||||
}
|
||||
|
||||
func lookupProptyPathPart(data interface{}, part string) interface{} {
|
||||
val := reflect.ValueOf(data)
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Map:
|
||||
// FIXME can add support for other map keys in the future
|
||||
if typ.Key().Kind() == reflect.String {
|
||||
key := reflect.ValueOf(part)
|
||||
entry := val.MapIndex(key)
|
||||
if entry.IsValid() {
|
||||
return entry.Interface()
|
||||
}
|
||||
}
|
||||
case reflect.Struct:
|
||||
field := val.FieldByName(part)
|
||||
if field.IsValid() && field.CanInterface() {
|
||||
return field.Interface()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const PATH_SEPARATOR = "."
|
||||
|
||||
func decodePath(path string) []string {
|
||||
return strings.Split(path, PATH_SEPARATOR)
|
||||
}
|
||||
|
||||
func encodePath(pathElements []string) string {
|
||||
return strings.Join(pathElements, PATH_SEPARATOR)
|
||||
}
|
||||
|
||||
func mustString(data interface{}) (string, bool) {
|
||||
if data != nil {
|
||||
str, ok := data.(string)
|
||||
if ok {
|
||||
return str, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// parseJSONTagName extracts the JSON field name from a struct tag
|
||||
func parseJSONTagName(tag string) string {
|
||||
if idx := strings.Index(tag, ","); idx != -1 {
|
||||
return tag[:idx]
|
||||
}
|
||||
return tag
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type HighlightRequest struct {
|
||||
Style *string `json:"style"`
|
||||
Fields []string `json:"fields"`
|
||||
}
|
||||
|
||||
func NewHighlight() *HighlightRequest {
|
||||
return &HighlightRequest{}
|
||||
}
|
||||
|
||||
func NewHighlightWithStyle(style string) *HighlightRequest {
|
||||
return &HighlightRequest{
|
||||
Style: &style,
|
||||
}
|
||||
}
|
||||
|
||||
type SearchRequest struct {
|
||||
Query Query `json:"query"`
|
||||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Explain bool `json:"explain"`
|
||||
}
|
||||
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
var temp struct {
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Explain bool `json:"explain"`
|
||||
}
|
||||
|
||||
err := json.Unmarshal(input, &temp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r.Size = temp.Size
|
||||
r.From = temp.From
|
||||
r.Explain = temp.Explain
|
||||
r.Highlight = temp.Highlight
|
||||
r.Query, err = ParseQuery(temp.Q)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if r.Size <= 0 {
|
||||
r.Size = 10
|
||||
}
|
||||
if r.From <= 0 {
|
||||
r.From = 0
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func NewSearchRequest(q Query, size, from int, explain bool) *SearchRequest {
|
||||
return &SearchRequest{
|
||||
Query: q,
|
||||
Size: size,
|
||||
From: from,
|
||||
Explain: explain,
|
||||
}
|
||||
}
|
||||
|
||||
type SearchResult struct {
|
||||
Request *SearchRequest `json:"request"`
|
||||
Hits search.DocumentMatchCollection `json:"hits"`
|
||||
Total uint64 `json:"total_hits"`
|
||||
MaxScore float64 `json:"max_score"`
|
||||
Took time.Duration `json:"took"`
|
||||
}
|
||||
|
||||
func (sr *SearchResult) String() string {
|
||||
rv := ""
|
||||
if len(sr.Hits) > 0 {
|
||||
rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
|
||||
for i, hit := range sr.Hits {
|
||||
rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
|
||||
for fragmentField, fragments := range hit.Fragments {
|
||||
rv += fmt.Sprintf("\t%s\n", fragmentField)
|
||||
for _, fragment := range fragments {
|
||||
rv += fmt.Sprintf("\t\t%s\n", fragment)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rv = "No matches"
|
||||
}
|
||||
return rv
|
||||
}
|
|
@ -10,7 +10,10 @@ package search
|
|||
|
||||
import (
|
||||
"math"
|
||||
"regexp"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/index/store/inmem"
|
||||
|
@ -27,6 +30,11 @@ func init() {
|
|||
}
|
||||
}
|
||||
|
||||
// create a simpler analyzer which will support these tests
|
||||
var testAnalyzer = &analysis.Analyzer{
|
||||
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
|
||||
}
|
||||
|
||||
// sets up some mock data used in many tests in this package
|
||||
var twoDocIndexDescIndexingOptions = document.DEFAULT_TEXT_INDEXING_OPTIONS | document.INCLUDE_TERM_VECTORS
|
||||
|
||||
|
@ -34,28 +42,28 @@ var twoDocIndexDocs = []*document.Document{
|
|||
// must have 4/4 beer
|
||||
document.NewDocument("1").
|
||||
AddField(document.NewTextField("name", []byte("marty"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("street", []byte("couchbase way"))),
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)),
|
||||
// must have 1/4 beer
|
||||
document.NewDocument("2").
|
||||
AddField(document.NewTextField("name", []byte("steve"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("street", []byte("couchbase way"))).
|
||||
AddField(document.NewTextField("title", []byte("mister"))),
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
|
||||
// must have 1/4 beer
|
||||
document.NewDocument("3").
|
||||
AddField(document.NewTextField("name", []byte("dustin"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("title", []byte("mister"))),
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
|
||||
// must have 65/65 beer
|
||||
document.NewDocument("4").
|
||||
AddField(document.NewTextField("name", []byte("ravi"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions)),
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
|
||||
// must have 0/x beer
|
||||
document.NewDocument("5").
|
||||
AddField(document.NewTextField("name", []byte("bobert"))).
|
||||
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("water"), twoDocIndexDescIndexingOptions)).
|
||||
AddField(document.NewTextField("title", []byte("mister"))),
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
|
||||
}
|
||||
|
||||
func scoresCloseEnough(a, b float64) bool {
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
|
||||
type TopScoreCollector struct {
|
||||
k int
|
||||
skip int
|
||||
results *list.List
|
||||
took time.Duration
|
||||
maxScore float64
|
||||
|
@ -24,6 +25,15 @@ type TopScoreCollector struct {
|
|||
func NewTopScorerCollector(k int) *TopScoreCollector {
|
||||
return &TopScoreCollector{
|
||||
k: k,
|
||||
skip: 0,
|
||||
results: list.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
|
||||
return &TopScoreCollector{
|
||||
k: k,
|
||||
skip: skip,
|
||||
results: list.New(),
|
||||
}
|
||||
}
|
||||
|
@ -70,7 +80,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) {
|
|||
|
||||
tksc.results.InsertBefore(dm, e)
|
||||
// if we just made the list too long
|
||||
if tksc.results.Len() > tksc.k {
|
||||
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
||||
// remove the head
|
||||
tksc.results.Remove(tksc.results.Front())
|
||||
}
|
||||
|
@ -79,18 +89,26 @@ func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) {
|
|||
}
|
||||
// if we got to the end, we still have to add it
|
||||
tksc.results.PushBack(dm)
|
||||
if tksc.results.Len() > tksc.k {
|
||||
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
||||
// remove the head
|
||||
tksc.results.Remove(tksc.results.Front())
|
||||
}
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) Results() DocumentMatchCollection {
|
||||
rv := make(DocumentMatchCollection, tksc.results.Len())
|
||||
i := 0
|
||||
for e := tksc.results.Back(); e != nil; e = e.Prev() {
|
||||
rv[i] = e.Value.(*DocumentMatch)
|
||||
i++
|
||||
if tksc.results.Len()-tksc.skip > 0 {
|
||||
rv := make(DocumentMatchCollection, tksc.results.Len()-tksc.skip)
|
||||
i := 0
|
||||
skipped := 0
|
||||
for e := tksc.results.Back(); e != nil; e = e.Prev() {
|
||||
if skipped < tksc.skip {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
rv[i] = e.Value.(*DocumentMatch)
|
||||
i++
|
||||
}
|
||||
return rv
|
||||
}
|
||||
return rv
|
||||
return DocumentMatchCollection{}
|
||||
}
|
||||
|
|
|
@ -116,3 +116,97 @@ func TestTop10Scores(t *testing.T) {
|
|||
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTop10ScoresSkip10(t *testing.T) {
|
||||
|
||||
// a stub search with more than 10 matches
|
||||
// the top-10 scores are > 10
|
||||
// everything else is less than 10
|
||||
searcher := &stubSearcher{
|
||||
matches: DocumentMatchCollection{
|
||||
&DocumentMatch{
|
||||
ID: "a",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "b",
|
||||
Score: 9.5,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "c",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "d",
|
||||
Score: 9,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "e",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "f",
|
||||
Score: 9,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "g",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "h",
|
||||
Score: 9,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "i",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "j",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "k",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "l",
|
||||
Score: 99,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "m",
|
||||
Score: 11,
|
||||
},
|
||||
&DocumentMatch{
|
||||
ID: "n",
|
||||
Score: 11,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
collector := NewTopScorerSkipCollector(10, 10)
|
||||
collector.Collect(searcher)
|
||||
|
||||
maxScore := collector.MaxScore()
|
||||
if maxScore != 99.0 {
|
||||
t.Errorf("expected max score 99.0, got %f", maxScore)
|
||||
}
|
||||
|
||||
total := collector.Total()
|
||||
if total != 14 {
|
||||
t.Errorf("expected 14 total results, got %d", total)
|
||||
}
|
||||
|
||||
results := collector.Results()
|
||||
|
||||
if len(results) != 4 {
|
||||
t.Fatalf("expected 4 results, got %d", len(results))
|
||||
}
|
||||
|
||||
if results[0].ID != "b" {
|
||||
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
|
||||
}
|
||||
|
||||
if results[0].Score != 9.5 {
|
||||
t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import ()
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import ()
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import ()
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import ()
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue