0
0
Fork 0

major refactor, apologies for the large commit

removed analyzers (these are now built as needed through config)
removed html chacter filter (now built as needed through config)
added missing license header
changed constructor signature of filters that cannot return errors
filter constructors that can have errors, now have Must variant which panics
change cdl2 tokenizer into filter (should only see lower-case input)
new top level index api, closes #5
refactored index tests to not rely directly on analyzers
moved query objects to top-level
new top level search api, closes #12
top score collector allows skipping results
index mapping supports _all by default, closes #3 and closes #6
index mapping supports disabled sections, closes #7
new http sub package with reusable http.Handler's, closes #22
This commit is contained in:
Marty Schoch 2014-07-30 12:30:38 -04:00
parent 8150146dc7
commit 2968d3538a
132 changed files with 4059 additions and 2821 deletions

3
.gitignore vendored
View File

@ -5,8 +5,9 @@
.project
.settings
.DS_Store
/analysis/tokenizers/cld2/cld2-read-only
/analysis/token_filters/cld2/cld2-read-only
/examples/bleve_index_json/bleve_index_json
/examples/bleve_index_json/index/
/examples/bleve_query/bleve_query
/utils/bleve_dump/bleve_dump
/y.output

View File

@ -1,39 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package standard_analyzer
import (
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter"
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
)
func NewStandardAnalyzer() (*analysis.Analyzer, error) {
lower_case_filter, err := lower_case_filter.NewLowerCaseFilter()
if err != nil {
return nil, err
}
stop_words_filter, err := stop_words_filter.NewStopWordsFilter()
if err != nil {
return nil, err
}
standard := analysis.Analyzer{
CharFilters: []analysis.CharFilter{},
Tokenizer: unicode_word_boundary.NewUnicodeWordBoundaryTokenizer(),
TokenFilters: []analysis.TokenFilter{
lower_case_filter,
stop_words_filter,
},
}
return &standard, nil
}

View File

@ -1,32 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package html_char_filter
import (
"regexp"
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
)
// the origin of this regex is here:
// http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/
// slightly modified by me to also match the DOCTYPE
const htmlTagPattern = `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
var htmlRegex = regexp.MustCompile(htmlTagPattern)
type HtmlCharFilter struct {
*regexp_char_filter.RegexpCharFilter
}
func NewHtmlCharFilter() *HtmlCharFilter {
return &HtmlCharFilter{
regexp_char_filter.NewRegexpCharFilter(htmlRegex, []byte{' '}),
}
}

View File

@ -6,14 +6,19 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package html_char_filter
package regexp_char_filter
import (
"reflect"
"regexp"
"testing"
)
func TestHtmlCharFilter(t *testing.T) {
func TestRegexpCharFilter(t *testing.T) {
htmlTagPattern := `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
htmlRegex := regexp.MustCompile(htmlTagPattern)
tests := []struct {
input []byte
output []byte
@ -43,7 +48,7 @@ func TestHtmlCharFilter(t *testing.T) {
}
for _, test := range tests {
filter := NewHtmlCharFilter()
filter := NewRegexpCharFilter(htmlRegex, []byte{' '})
output := filter.Filter(test.input)
if !reflect.DeepEqual(output, test.output) {
t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package analysis
import (

View File

@ -0,0 +1,30 @@
# cld2 token filter
A bleve token filter which passes the text of each token and passes it to the cld2 library. The library determines what it thinks the language most likely is. The ISO-639 language code replaces the token term.
In normal usage, you use this with the "single" tokenizer, so there is only one input token. Further, you should precede it with the "to_lower" filter so that the input term is in all lower-case unicode characters.
# Building
1. Acquire the source to cld2 in this directory.
$ svn checkout http://cld2.googlecode.com/svn/trunk/ cld2-read-only
2. Build cld2
$ cd cld2-read-only/internal/
$ ./compile_libs.sh
3. Put the resulting libraries somewhere your dynamic linker can find.
$ cp *.so /usr/local/lib
4. Run the unit tests
$ cd ../..
$ go test -v
=== RUN TestCld2Filter
--- PASS: TestCld2Filter (0.00 seconds)
PASS
ok github.com/couchbaselabs/bleve/analysis/token_filters/cld2 0.033s

View File

@ -1,9 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
#include <cstddef>
#include <string.h>
#include <stdio.h>
#include <string>
#include "cld2_tokenizer.h"
#include "cld2_filter.h"
#include "cld2-read-only/public/compact_lang_det.h"
const char* DetectLang(const char *buffer) {

View File

@ -0,0 +1,52 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package cld2
// #cgo LDFLAGS: -lcld2_full
// #include "cld2_filter.h"
// #include <string.h>
import "C"
import (
"unsafe"
"github.com/couchbaselabs/bleve/analysis"
)
type Cld2Filter struct {
}
func NewCld2Filter() *Cld2Filter {
return &Cld2Filter{}
}
func (f *Cld2Filter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
offset := 0
for _, token := range input {
var err error
token.Term, err = f.detectLanguage(token.Term)
if err != nil {
token.Term = []byte("error")
}
token.Start = offset
token.End = token.Start + len(token.Term)
rv = append(rv, token)
offset = token.End + 1
}
return rv
}
func (f *Cld2Filter) detectLanguage(input []byte) ([]byte, error) {
cstr := C.CString(string(input))
res := C.DetectLang(cstr)
return C.GoBytes(unsafe.Pointer(res), C.int(C.strlen(res))), nil
}

View File

@ -6,12 +6,13 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package shredder
#ifdef __cplusplus
extern "C" {
#endif
import (
"github.com/couchbaselabs/bleve/document"
)
const char* DetectLang(const char *buffer);
#ifdef __cplusplus
} /* extern "C" */
#endif
type Shredder interface {
Shred(id string, body []byte) (document.Document, error)
}

View File

@ -0,0 +1,112 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package cld2
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestCld2Filter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("the quick brown fox"),
Start: 0,
End: 19,
Position: 1,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("en"),
Start: 0,
End: 2,
Position: 1,
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("こんにちは世界"),
Start: 0,
End: 21,
Position: 1,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ja"),
Start: 0,
End: 2,
Position: 1,
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
Start: 0,
End: 72,
Position: 1,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("th"),
Start: 0,
End: 2,
Position: 1,
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("مرحبا، العالم!"),
Start: 0,
End: 26,
Position: 1,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ar"),
Start: 0,
End: 2,
Position: 1,
},
},
},
}
filter := NewCld2Filter()
for _, test := range tests {
res := filter.Filter(test.input)
if !reflect.DeepEqual(res, test.output) {
t.Errorf("expected:")
for _, token := range test.output {
t.Errorf("%#v - %s", token, token.Term)
}
t.Errorf("got:")
for _, token := range res {
t.Errorf("%#v - %s", token, token.Term)
}
}
}
}

View File

@ -19,11 +19,11 @@ type LengthFilter struct {
max int
}
func NewLengthFilter(min, max int) (*LengthFilter, error) {
func NewLengthFilter(min, max int) *LengthFilter {
return &LengthFilter{
min: min,
max: max,
}, nil
}
}
func (f *LengthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {

View File

@ -28,10 +28,7 @@ func TestLengthFilter(t *testing.T) {
},
}
lengthFilter, err := NewLengthFilter(3, 4)
if err != nil {
t.Fatal(err)
}
lengthFilter := NewLengthFilter(3, 4)
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 1 {
t.Fatalf("expected 1 output token")
@ -55,10 +52,7 @@ func TestLengthFilterNoMax(t *testing.T) {
},
}
lengthFilter, err := NewLengthFilter(3, -1)
if err != nil {
t.Fatal(err)
}
lengthFilter := NewLengthFilter(3, -1)
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 2 {
t.Fatalf("expected 2 output token")
@ -85,10 +79,7 @@ func TestLengthFilterNoMin(t *testing.T) {
},
}
lengthFilter, err := NewLengthFilter(-1, 4)
if err != nil {
t.Fatal(err)
}
lengthFilter := NewLengthFilter(-1, 4)
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 2 {
t.Fatalf("expected 2 output token")

View File

@ -17,8 +17,8 @@ import (
type LowerCaseFilter struct {
}
func NewLowerCaseFilter() (*LowerCaseFilter, error) {
return &LowerCaseFilter{}, nil
func NewLowerCaseFilter() *LowerCaseFilter {
return &LowerCaseFilter{}
}
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {

View File

@ -41,10 +41,7 @@ func TestLowerCaseFilter(t *testing.T) {
},
}
filter, err := NewLowerCaseFilter()
if err != nil {
t.Fatal(err)
}
filter := NewLowerCaseFilter()
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)

View File

@ -0,0 +1,18 @@
## Languages supported
"danish",
"dutch",
"english",
"finnish",
"french",
"german",
"hungarian",
"italian",
"norwegian",
"porter",
"portuguese",
"romanian",
"russian",
"spanish",
"swedish",
"turkish"

View File

@ -29,6 +29,14 @@ func NewStemmerFilter(lang string) (*StemmerFilter, error) {
}, nil
}
func MustNewStemmerFilter(lang string) *StemmerFilter {
sf, err := NewStemmerFilter(lang)
if err != nil {
panic(err)
}
return sf
}
func (s *StemmerFilter) List() []string {
return snowball.LangList()
}

View File

@ -24,10 +24,10 @@ type StopWordsFilter struct {
stopWords map[string]bool
}
func NewStopWordsFilter() (*StopWordsFilter, error) {
func NewStopWordsFilter() *StopWordsFilter {
return &StopWordsFilter{
stopWords: buildStopWordMap(DEFAULT_STOP_WORDS),
}, nil
}
}
func (f *StopWordsFilter) Filter(input analysis.TokenStream) analysis.TokenStream {

View File

@ -44,10 +44,7 @@ func TestStopWordsFilter(t *testing.T) {
},
}
filter, err := NewStopWordsFilter()
if err != nil {
t.Fatal(err)
}
filter := NewStopWordsFilter()
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)

View File

@ -1,28 +0,0 @@
# cld2 tokenizer
A bleve tokenizer which passes the input text to the cld2 library. The library determines what it thinks the language most likely is. The ISO-639 language code is returned as the single token resulting from the analysis.
# Building
1. Acquire the source to cld2 in this directory.
$ svn checkout http://cld2.googlecode.com/svn/trunk/ cld2-read-only
2. Build cld2
$ cd cld2-read-only/internal/
$ ./compile_libs.sh
3. Put the resulting libraries somewhere your dynamic linker can find.
$ cp *.so /usr/local/lib
4. Run the unit tests
$ cd ../..
$ go test -v
=== RUN TestCld2Tokenizer
--- PASS: TestCld2Tokenizer (0.03 seconds)
PASS
ok github.com/couchbaselabs/bleve/analysis/tokenizers/cld2 0.067s

View File

@ -1,41 +0,0 @@
package cld2
// #cgo LDFLAGS: -Lcld2-read-only/internal/ -lcld2_full
// #include "cld2_tokenizer.h"
// #include <string.h>
import "C"
import (
"unsafe"
"github.com/couchbaselabs/bleve/analysis"
)
type Cld2Tokenizer struct {
}
func NewCld2Tokenizer() *Cld2Tokenizer {
return &Cld2Tokenizer{}
}
func (rt *Cld2Tokenizer) Tokenize(input []byte) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
lang, err := rt.detectLanguage(input)
if err != nil {
return rv
}
token := analysis.Token{
Term: lang,
Start: 0,
End: len(lang),
Position: 1,
}
rv = append(rv, &token)
return rv
}
func (rt *Cld2Tokenizer) detectLanguage(input []byte) ([]byte, error) {
cstr := C.CString(string(input))
res := C.DetectLang(cstr)
return C.GoBytes(unsafe.Pointer(res), C.int(C.strlen(res))), nil
}

View File

@ -1,10 +0,0 @@
#ifdef __cplusplus
extern "C" {
#endif
const char* DetectLang(const char *buffer);
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -1,76 +0,0 @@
package cld2
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestCld2Tokenizer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
input: []byte("the quick brown fox"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("en"),
Start: 0,
End: 2,
Position: 1,
},
},
},
{
input: []byte("こんにちは世界"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ja"),
Start: 0,
End: 2,
Position: 1,
},
},
},
{
input: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("th"),
Start: 0,
End: 2,
Position: 1,
},
},
},
{
input: []byte("مرحبا، العالم!"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ar"),
Start: 0,
End: 2,
Position: 1,
},
},
},
}
tokenizer := NewCld2Tokenizer()
for _, test := range tests {
res := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(res, test.output) {
t.Errorf("expected:")
for _, token := range test.output {
t.Errorf("%#v - %s", token, token.Term)
}
t.Errorf("got:")
for _, token := range res {
t.Errorf("%#v - %s", token, token.Term)
}
}
}
}

172
config.go Normal file
View File

@ -0,0 +1,172 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"regexp"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
"github.com/couchbaselabs/bleve/analysis/token_filters/cld2"
"github.com/couchbaselabs/bleve/analysis/token_filters/length_filter"
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter"
"github.com/couchbaselabs/bleve/search"
)
type AnalysisConfig struct {
CharFilters map[string]analysis.CharFilter
Tokenizers map[string]analysis.Tokenizer
TokenFilters map[string]analysis.TokenFilter
Analyzers map[string]*analysis.Analyzer
}
type HighlightConfig struct {
Highlighters map[string]search.Highlighter
}
type Config struct {
Analysis *AnalysisConfig
DefaultAnalyzer *string
Highlight *HighlightConfig
DefaultHighlighter *string
}
func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
rv := analysis.Analyzer{}
if len(charFilterNames) > 0 {
rv.CharFilters = make([]analysis.CharFilter, len(charFilterNames))
for i, charFilterName := range charFilterNames {
charFilter := c.Analysis.CharFilters[charFilterName]
if charFilter == nil {
return nil, fmt.Errorf("no character filter named `%s` registered", charFilterName)
}
rv.CharFilters[i] = charFilter
}
}
rv.Tokenizer = c.Analysis.Tokenizers[tokenizerName]
if rv.Tokenizer == nil {
return nil, fmt.Errorf("no tokenizer named `%s` registered", tokenizerName)
}
if len(tokenFilterNames) > 0 {
rv.TokenFilters = make([]analysis.TokenFilter, len(tokenFilterNames))
for i, tokenFilterName := range tokenFilterNames {
tokenFilter := c.Analysis.TokenFilters[tokenFilterName]
if tokenFilter == nil {
return nil, fmt.Errorf("no token filter named `%s` registered", tokenFilterName)
}
rv.TokenFilters[i] = tokenFilter
}
}
return &rv, nil
}
func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer {
analyzer, err := c.BuildNewAnalyzer(charFilterNames, tokenizerName, tokenFilterNames)
if err != nil {
panic(err)
}
return analyzer
}
func NewConfig() *Config {
return &Config{
Analysis: &AnalysisConfig{
CharFilters: make(map[string]analysis.CharFilter),
Tokenizers: make(map[string]analysis.Tokenizer),
TokenFilters: make(map[string]analysis.TokenFilter),
Analyzers: make(map[string]*analysis.Analyzer),
},
Highlight: &HighlightConfig{
Highlighters: make(map[string]search.Highlighter),
},
}
}
var config *Config
func init() {
// build the default configuration
config = NewConfig()
// register char filters
htmlCharFilterRegexp := regexp.MustCompile(`</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
htmlCharFilter := regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, []byte{' '})
config.Analysis.CharFilters["html"] = htmlCharFilter
// register tokenizers
whitespaceTokenizerRegexp := regexp.MustCompile(`\w+`)
config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer()
config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer()
config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH")
config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp)
// register token filters
config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter()
config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1)
config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255)
config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter()
config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish")
config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch")
config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english")
config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish")
config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french")
config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german")
config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian")
config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian")
config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian")
config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter")
config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese")
config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian")
config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian")
config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish")
config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish")
config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish")
config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter()
// register analyzers
keywordAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{})
config.Analysis.Analyzers["keyword"] = keywordAnalyzer
simpleAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"})
config.Analysis.Analyzers["simple"] = simpleAnalyzer
standardAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"})
config.Analysis.Analyzers["standard"] = standardAnalyzer
englishAnalyzer := config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"})
config.Analysis.Analyzers["english"] = englishAnalyzer
detectLangAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"})
config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer
// register ansi highlighter
config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter()
// register html highlighter
htmlFormatter := search.NewHTMLFragmentFormatterCustom(`<span class="highlight">`, `</span>`)
htmlHighlighter := search.NewSimpleHighlighter()
htmlHighlighter.SetFragmentFormatter(htmlFormatter)
config.Highlight.Highlighters["html"] = htmlHighlighter
// set the default analyzer
simpleAnalyzerName := "simple"
config.DefaultAnalyzer = &simpleAnalyzerName
// set the default highlighter
htmlHighlighterName := "html"
config.DefaultHighlighter = &htmlHighlighterName
}

View File

@ -9,7 +9,8 @@
package document
import (
"encoding/json"
"fmt"
"log"
)
type Document struct {
@ -36,7 +37,21 @@ func (d *Document) AddField(f Field) *Document {
return d
}
func (d *Document) String() string {
bytes, _ := json.MarshalIndent(d, "", " ")
return string(bytes)
func (d *Document) GoString() string {
fields := ""
for i, field := range d.Fields {
if i != 0 {
fields += ", "
}
fields += fmt.Sprintf("%#v", field)
}
compositeFields := ""
for i, field := range d.CompositeFields {
log.Printf("see composite field")
if i != 0 {
compositeFields += ", "
}
compositeFields += fmt.Sprintf("%#v", field)
}
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields)
}

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (

View File

@ -9,22 +9,11 @@
package document
import (
"log"
"fmt"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
)
var standardAnalyzer *analysis.Analyzer
func init() {
var err error
standardAnalyzer, err = standard_analyzer.NewStandardAnalyzer()
if err != nil {
log.Fatal(err)
}
}
const DEFAULT_TEXT_INDEXING_OPTIONS = INDEX_FIELD
type TextField struct {
@ -43,7 +32,19 @@ func (t *TextField) Options() IndexingOptions {
}
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
tokens := t.analyzer.Analyze(t.Value())
var tokens analysis.TokenStream
if t.analyzer != nil {
tokens = t.analyzer.Analyze(t.Value())
} else {
tokens = analysis.TokenStream{
&analysis.Token{
Start: 0,
End: len(t.value),
Term: t.value,
Position: 1,
},
}
}
fieldLength := len(tokens) // number of tokens in this doc field
tokenFreqs := analysis.TokenFrequency(tokens)
return fieldLength, tokenFreqs
@ -53,15 +54,27 @@ func (t *TextField) Value() []byte {
return t.value
}
func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s}", t.name, t.options, t.analyzer, t.value)
}
func NewTextField(name string, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, value, DEFAULT_TEXT_INDEXING_OPTIONS)
}
func NewTextFieldWithIndexingOptions(name string, value []byte, options IndexingOptions) *TextField {
return &TextField{
name: name,
options: options,
value: value,
}
}
func NewTextFieldWithAnalyzer(name string, value []byte, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
options: options,
analyzer: standardAnalyzer,
options: DEFAULT_TEXT_INDEXING_OPTIONS,
analyzer: analyzer,
value: value,
}
}

View File

@ -27,3 +27,23 @@ func (o IndexingOptions) IsStored() bool {
func (o IndexingOptions) IncludeTermVectors() bool {
return o&INCLUDE_TERM_VECTORS != 0
}
func (o IndexingOptions) String() string {
rv := ""
if o.IsIndexed() {
rv += "INDEXED"
}
if o.IsStored() {
if rv != "" {
rv += ", "
}
rv += "STORE"
}
if o.IncludeTermVectors() {
if rv != "" {
rv += ", "
}
rv += "TV"
}
return rv
}

View File

@ -1,13 +0,0 @@
package document
import (
"github.com/couchbaselabs/bleve/analysis"
)
type FieldMapping struct {
Name string
Options IndexingOptions
Analyzer *analysis.Analyzer
}
type Mapping map[string]*FieldMapping

View File

@ -6,19 +6,19 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package keyword_analyzer
package bleve
import (
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
const (
ERROR_NO_ID Error = iota
ERROR_NO_TYPE
)
func NewKeywordAnalyzer() (*analysis.Analyzer, error) {
keyword := analysis.Analyzer{
CharFilters: []analysis.CharFilter{},
Tokenizer: single_token.NewSingleTokenTokenizer(),
TokenFilters: []analysis.TokenFilter{},
}
type Error int
return &keyword, nil
func (e Error) Error() string {
return errorMessages[int(e)]
}
var errorMessages = map[int]string{
0: "unable to determine document id",
}

View File

@ -13,67 +13,64 @@ import (
"io/ioutil"
"log"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index/store/leveldb"
"github.com/couchbaselabs/bleve/index/upside_down"
"github.com/couchbaselabs/bleve/shredder"
"github.com/couchbaselabs/bleve"
)
var jsonDir = flag.String("jsonDir", "json", "json directory")
var indexDir = flag.String("indexDir", "index", "index directory")
var storeFields = flag.Bool("storeFields", false, "store field data")
var includeTermVectors = flag.Bool("includeTermVectors", false, "include term vectors")
func main() {
flag.Parse()
indexOptions := document.INDEX_FIELD
if *storeFields {
indexOptions |= document.STORE_FIELD
}
if *includeTermVectors {
indexOptions |= document.INCLUDE_TERM_VECTORS
}
// create a new default mapping
mapping := bleve.NewIndexMapping()
// create a automatic JSON document shredder
jsonShredder := shredder.NewAutoJsonShredderWithOptions(indexOptions)
// create a new index
store, err := leveldb.Open(*indexDir)
if err != nil {
log.Fatal(err)
}
index := upside_down.NewUpsideDownCouch(store)
err = index.Open()
// open the index
index, err := bleve.Open(*indexDir, mapping)
if err != nil {
log.Fatal(err)
}
defer index.Close()
// open the directory
dirEntries, err := ioutil.ReadDir(*jsonDir)
if err != nil {
log.Fatal(err)
}
// walk the directory entries
for _, dirEntry := range dirEntries {
// read the bytes
jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name())
if err != nil {
log.Fatal(err)
}
// shred them into a document
doc, err := jsonShredder.Shred(dirEntry.Name(), jsonBytes)
if err != nil {
log.Fatal(err)
}
//log.Printf("%+v", doc)
// update the index
err = index.Update(doc)
for jsonFile := range walkDirectory(*jsonDir) {
// index the json files
err = index.IndexJSONID(jsonFile.filename, jsonFile.contents)
if err != nil {
log.Fatal(err)
}
}
}
type jsonFile struct {
filename string
contents []byte
}
func walkDirectory(dir string) chan jsonFile {
rv := make(chan jsonFile)
go func() {
defer close(rv)
// open the directory
dirEntries, err := ioutil.ReadDir(*jsonDir)
if err != nil {
log.Fatal(err)
}
// walk the directory entries
for _, dirEntry := range dirEntries {
// read the bytes
jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name())
if err != nil {
log.Fatal(err)
}
rv <- jsonFile{
filename: dirEntry.Name(),
contents: jsonBytes,
}
}
}()
return rv
}

View File

@ -12,85 +12,50 @@ import (
"flag"
"fmt"
"log"
"strings"
"github.com/couchbaselabs/bleve/index/store/leveldb"
"github.com/couchbaselabs/bleve/index/upside_down"
"github.com/couchbaselabs/bleve/search"
"github.com/couchbaselabs/bleve"
)
var field = flag.String("field", "description", "field to query")
var field = flag.String("field", "_all", "field to query")
var indexDir = flag.String("indexDir", "index", "index directory")
var limit = flag.Int("limit", 10, "limit to first N results")
var includeHighlights = flag.Bool("highlight", false, "highlight matches")
var skip = flag.Int("skip", 0, "skip the first N results")
var explain = flag.Bool("explain", false, "explain scores")
var includeHighlights = flag.Bool("highlight", true, "highlight matches")
func main() {
flag.Parse()
if flag.NArg() < 1 {
log.Fatal("Specify search term")
log.Fatal("Specify search query")
}
// create a new default mapping
mapping := bleve.NewIndexMapping()
// open index
store, err := leveldb.Open(*indexDir)
if err != nil {
log.Fatal(err)
}
index := upside_down.NewUpsideDownCouch(store)
err = index.Open()
index, err := bleve.Open(*indexDir, mapping)
if err != nil {
log.Fatal(err)
}
defer index.Close()
tq := search.TermQuery{
Term: flag.Arg(0),
Field: *field,
BoostVal: 1.0,
Explain: true,
// build a search with the provided parameters
queryString := strings.Join(flag.Args(), " ")
query := bleve.NewSyntaxQuery(queryString)
searchRequest := bleve.NewSearchRequest(query, *limit, *skip, *explain)
// enable highlights if requested
if *includeHighlights {
searchRequest.Highlight = bleve.NewHighlightWithStyle("ansi")
}
collector := search.NewTopScorerCollector(*limit)
searcher, err := tq.Searcher(index)
if err != nil {
log.Fatalf("searcher error: %v", err)
return
}
err = collector.Collect(searcher)
// execute the search
searchResult, err := index.Search(searchRequest)
if err != nil {
log.Fatalf("search error: %v", err)
return
}
results := collector.Results()
if len(results) == 0 {
fmt.Printf("No matches\n")
} else {
last := uint64(*limit)
if searcher.Count() < last {
last = searcher.Count()
}
fmt.Printf("%d matches, showing %d through %d\n", searcher.Count(), 1, last)
for i, result := range results {
fmt.Printf("%2d. %s (%f)\n", i+1, result.ID, result.Score)
if *includeHighlights {
highlighter := search.NewSimpleHighlighter()
doc, err := index.Document(result.ID)
if err != nil {
fmt.Print(err)
return
}
fragments := highlighter.BestFragmentsInField(result, doc, *field, 5)
for _, fragment := range fragments {
fmt.Printf("\t%s\n", fragment)
}
if len(fragments) == 0 {
for _, f := range doc.Fields {
fmt.Printf("\tfield: %s\n", f)
}
}
}
}
}
fmt.Println(searchResult)
}

View File

@ -4,5 +4,3 @@ echo Running nex...
nex query_syntax.nex
echo Running goyacc...
go tool yacc query_syntax.y
echo Running go build...
go build

67
http/debug.go Normal file
View File

@ -0,0 +1,67 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package http
import (
"fmt"
"net/http"
"github.com/couchbaselabs/bleve/index/upside_down"
"github.com/gorilla/mux"
)
// DebugDocumentHandler allows you to debug the index content
// for a given document id. the document ID should be mapped
// to the mux router URL with name "docId"
type DebugDocumentHandler struct {
defaultIndexName string
}
func NewDebugDocumentHandler(defaultIndexName string) *DebugDocumentHandler {
return &DebugDocumentHandler{
defaultIndexName: defaultIndexName,
}
}
func (h *DebugDocumentHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// find the index to operate on
indexName := mux.Vars(req)["indexName"]
if indexName == "" {
indexName = h.defaultIndexName
}
index := IndexByName(indexName)
if index == nil {
showError(w, req, fmt.Sprintf("no such index '%s'", indexName), 404)
return
}
// find the docID
docID := mux.Vars(req)["docID"]
rows, err := index.DumpDoc(docID)
if err != nil {
showError(w, req, fmt.Sprintf("error debugging document: %v", err), 500)
return
}
rv := make([]interface{}, 0)
for _, row := range rows {
udcRow, ok := row.(upside_down.UpsideDownCouchRow)
if ok {
tmp := struct {
Key []byte `json:"key"`
Val []byte `json:"val"`
}{
Key: udcRow.Key(),
Val: udcRow.Value(),
}
rv = append(rv, tmp)
}
}
mustEncode(w, rv)
}

35
http/registry.go Normal file
View File

@ -0,0 +1,35 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package http
import (
"sync"
"github.com/couchbaselabs/bleve"
)
var indexNameMapping map[string]bleve.Index
var indexNameMappingLock sync.RWMutex
func RegisterIndexName(name string, index bleve.Index) {
indexNameMappingLock.Lock()
defer indexNameMappingLock.Unlock()
if indexNameMapping == nil {
indexNameMapping = make(map[string]bleve.Index)
}
indexNameMapping[name] = index
}
func IndexByName(name string) bleve.Index {
indexNameMappingLock.RLock()
defer indexNameMappingLock.RUnlock()
return indexNameMapping[name]
}

85
http/search.go Normal file
View File

@ -0,0 +1,85 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package http
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"github.com/gorilla/mux"
"github.com/couchbaselabs/bleve"
)
// SearchHandler can handle search requests sent over HTTP
// the index name can be selected in the URL by mapping a
// gorilla mux var, or it can be set manually with by
// setting the defaultIndex value
type SearchHandler struct {
defaultIndexName string
}
func NewSearchHandler(defaultIndexName string) *SearchHandler {
return &SearchHandler{
defaultIndexName: defaultIndexName,
}
}
func (h *SearchHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// find the index to operate on
indexName := mux.Vars(req)["indexName"]
if indexName == "" {
indexName = h.defaultIndexName
}
index := IndexByName(indexName)
if index == nil {
showError(w, req, fmt.Sprintf("no such index '%s'", indexName), 404)
return
}
// read the request body
requestBody, err := ioutil.ReadAll(req.Body)
if err != nil {
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
return
}
log.Printf("request body: %s", requestBody)
// parse the request
var searchRequest bleve.SearchRequest
err = json.Unmarshal(requestBody, &searchRequest)
if err != nil {
showError(w, req, fmt.Sprintf("error parsing query: %v", err), 400)
return
}
log.Printf("parsed request %#v", searchRequest)
// varlidate the query
err = searchRequest.Query.Validate()
if err != nil {
showError(w, req, fmt.Sprintf("error validating query: %v", err), 400)
return
}
// execute the query
searchResponse, err := index.Search(&searchRequest)
if err != nil {
showError(w, req, fmt.Sprintf("error executing query: %v", err), 500)
return
}
// encode the response
mustEncode(w, searchResponse)
}

34
http/util.go Normal file
View File

@ -0,0 +1,34 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package http
import (
"encoding/json"
"io"
"log"
"net/http"
)
func showError(w http.ResponseWriter, r *http.Request,
msg string, code int) {
log.Printf("Reporting error %v/%v", code, msg)
http.Error(w, msg, code)
}
func mustEncode(w io.Writer, i interface{}) {
if headered, ok := w.(http.ResponseWriter); ok {
headered.Header().Set("Cache-Control", "no-cache")
headered.Header().Set("Content-type", "application/json")
}
e := json.NewEncoder(w)
if err := e.Encode(i); err != nil {
panic(err)
}
}

47
index.go Normal file
View File

@ -0,0 +1,47 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"github.com/couchbaselabs/bleve/document"
)
type Identifier interface {
ID() string
}
type Classifier interface {
Type() string
}
type Index interface {
Index(data interface{}) error
IndexID(id string, data interface{}) error
IndexJSON(data []byte) error
IndexJSONID(id string, data []byte) error
Delete(data interface{}) error
DeleteID(id string) error
Document(id string) (*document.Document, error)
DocCount() uint64
Search(req *SearchRequest) (*SearchResult, error)
DumpDoc(id string) ([]interface{}, error)
Close()
}
// Open the index at the specified path, and create it if it does not exist.
// The provided mapping will be used for all Index/Search operations.
func Open(path string, mapping *IndexMapping) (Index, error) {
return newIndex(path, mapping)
}

View File

@ -1,3 +1,12 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// +build forestdb
package goforestdb

View File

@ -1,3 +1,12 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// +build forestdb
package goforestdb

View File

@ -1,3 +1,12 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// +build forestdb
package goforestdb

View File

@ -1,3 +1,12 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// +build forestdb
package goforestdb

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package gouchstore
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package gouchstore
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package gouchstore
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package gouchstore
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package gouchstore
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package inmem
type InMemBatch struct {

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package inmem
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package inmem
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package inmem
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package store
type KVBatch interface {

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package leveldb
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package leveldb
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package leveldb
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package leveldb
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package leveldb
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package store_test
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (

View File

@ -1,3 +1,12 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// +build forestdb
package upside_down

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (

View File

@ -13,7 +13,6 @@ import (
"reflect"
"testing"
_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/index/store/gouchstore"
@ -40,8 +39,8 @@ func TestIndexReader(t *testing.T) {
expectedCount += 1
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []byte("test test test")))
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)

View File

@ -486,6 +486,30 @@ func (udc *UpsideDownCouch) Dump() {
}
}
func (udc *UpsideDownCouch) DumpFields() {
it := udc.store.Iterator([]byte{'f'})
defer it.Close()
key, val, valid := it.Current()
for valid {
if !bytes.HasPrefix(key, []byte{'f'}) {
break
}
row, err := ParseFromKeyValue(key, val)
if err != nil {
fmt.Printf("error parsing key/value: %v", err)
return
}
if row != nil {
fmt.Printf("%v\n", row)
fmt.Printf("Key: % -100x\nValue: % -100x\n\n", key, val)
}
it.Next()
key, val, valid = it.Current()
}
}
type keyset [][]byte
func (k keyset) Len() int { return len(k) }

View File

@ -10,13 +10,19 @@ package upside_down
import (
"os"
"regexp"
"testing"
_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index/store/gouchstore"
)
var testAnalyzer = &analysis.Analyzer{
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
func TestIndexOpenReopen(t *testing.T) {
defer os.RemoveAll("test")
@ -180,7 +186,7 @@ func TestIndexInsertThenUpdate(t *testing.T) {
// this update should overwrite one term, and introduce one new one
doc = document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test fail")))
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test fail"), testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)

187
index_impl.go Normal file
View File

@ -0,0 +1,187 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/index/store"
"github.com/couchbaselabs/bleve/index/store/leveldb"
"github.com/couchbaselabs/bleve/index/upside_down"
"github.com/couchbaselabs/bleve/search"
)
type indexImpl struct {
s store.KVStore
i index.Index
m *IndexMapping
}
func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) {
store, err := leveldb.Open(path)
if err != nil {
return nil, err
}
idx := upside_down.NewUpsideDownCouch(store)
err = idx.Open()
if err != nil {
return nil, err
}
return &indexImpl{
s: store,
i: idx,
m: mapping,
}, nil
}
// Index the provided data.
func (i *indexImpl) Index(data interface{}) error {
id, ok := i.determineID(data)
if ok {
return i.IndexID(id, data)
}
return ERROR_NO_ID
}
func (i *indexImpl) IndexID(id string, data interface{}) error {
doc := document.NewDocument(id)
err := i.m.MapDocument(doc, data)
if err != nil {
return err
}
err = i.i.Update(doc)
if err != nil {
return err
}
return nil
}
func (i *indexImpl) IndexJSON(data []byte) error {
var obj interface{}
err := json.Unmarshal(data, &obj)
if err != nil {
return err
}
return i.Index(obj)
}
func (i *indexImpl) IndexJSONID(id string, data []byte) error {
var obj interface{}
err := json.Unmarshal(data, &obj)
if err != nil {
return err
}
return i.IndexID(id, obj)
}
func (i *indexImpl) Delete(data interface{}) error {
id, ok := i.determineID(data)
if ok {
return i.DeleteID(id)
}
return ERROR_NO_ID
}
func (i *indexImpl) DeleteID(id string) error {
err := i.i.Delete(id)
if err != nil {
return err
}
return nil
}
func (i *indexImpl) Document(id string) (*document.Document, error) {
return i.i.Document(id)
}
func (i *indexImpl) DocCount() uint64 {
return i.i.DocCount()
}
func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
collector := search.NewTopScorerSkipCollector(req.Size, req.From)
searcher, err := req.Query.Searcher(i, req.Explain)
if err != nil {
return nil, err
}
err = collector.Collect(searcher)
if err != nil {
return nil, err
}
hits := collector.Results()
if req.Highlight != nil {
// get the right highlighter
highlighter := config.Highlight.Highlighters[*config.DefaultHighlighter]
if req.Highlight.Style != nil {
highlighter = config.Highlight.Highlighters[*req.Highlight.Style]
if highlighter == nil {
return nil, fmt.Errorf("no highlighter named `%s` registered", req.Highlight.Style)
}
}
for _, hit := range hits {
doc, err := i.Document(hit.ID)
if err == nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k, _ := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 3)
}
}
}
}
return &SearchResult{
Request: req,
Hits: hits,
Total: collector.Total(),
MaxScore: collector.MaxScore(),
Took: collector.Took(),
}, nil
}
func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) {
return i.i.DumpDoc(id)
}
func (i *indexImpl) Close() {
i.i.Close()
}
func (i *indexImpl) determineID(data interface{}) (string, bool) {
// first see if the object implements Identifier
identifier, ok := data.(Identifier)
if ok {
return identifier.ID(), true
}
// now see if we can find an ID using the mapping
if i.m.IdField != nil {
id, ok := mustString(lookupPropertyPath(data, *i.m.IdField))
if ok {
return id, true
}
}
return "", false
}

90
index_test.go Normal file
View File

@ -0,0 +1,90 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"os"
"testing"
)
type Address struct {
Street string `json:"street"`
City string `json:"city"`
State string `json:"state"`
Zip string `json:"zip"`
}
type Person struct {
Identifier string `json:"id"`
Name string `json:"name"`
Address *Address `json:"address"`
Hideouts []*Address `json:"hideouts"`
Tags []string `json:"tags"`
}
func (p *Person) ID() string {
return p.Identifier
}
func (p *Person) Type() string {
return "person"
}
// FIXME needs more assertions
func TestIndex(t *testing.T) {
defer os.RemoveAll("testidx")
nameMapping := NewDocumentMapping().
AddFieldMapping(NewFieldMapping("", "text", "standard", true, true, true, true))
tagsMapping := NewDocumentMapping().
AddFieldMapping(NewFieldMapping("", "text", "standard", true, true, true, false))
personMapping := NewDocumentMapping().
AddSubDocumentMapping("name", nameMapping).
AddSubDocumentMapping("id", NewDocumentDisabledMapping()).
AddSubDocumentMapping("tags", tagsMapping)
mapping := NewIndexMapping().
AddDocumentMapping("person", personMapping)
index, err := Open("testidx", mapping)
if err != nil {
t.Fatal(err)
}
obj := Person{
Identifier: "a",
Name: "marty",
Address: &Address{
Street: "123 Sesame St.",
City: "Garden",
State: "MIND",
Zip: "12345",
},
Hideouts: []*Address{
&Address{
Street: "999 Gopher St.",
City: "Denver",
State: "CO",
Zip: "86753",
},
&Address{
Street: "88 Rusty Ln.",
City: "Amsterdam",
State: "CA",
Zip: "09090",
},
},
Tags: []string{"amped", "bogus", "gnarley", "tubed"},
}
err = index.Index(&obj)
if err != nil {
t.Error(err)
}
}

131
mapping_document.go Normal file
View File

@ -0,0 +1,131 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"github.com/couchbaselabs/bleve/analysis"
)
type DocumentMapping struct {
Enabled *bool `json:"enabled"`
Dynamic *bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties"`
Fields []*FieldMapping `json:"fields"`
DefaultAnalyzer *string `json:"default_analyzer"`
}
func (dm *DocumentMapping) GoString() string {
return fmt.Sprintf(" &bleve.DocumentMapping{Enabled:%t, Dynamic:%t, Properties:%#v, Fields:%#v}", *dm.Enabled, *dm.Dynamic, dm.Properties, dm.Fields)
}
func (dm *DocumentMapping) DocumentMappingForPath(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
for _, pathElement := range pathElements {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
return nil
}
}
return current
}
func NewDocumentMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: &tRUE,
Dynamic: &tRUE,
}
}
func NewDocumentStaticMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: &tRUE,
Dynamic: &fALSE,
}
}
func NewDocumentDisabledMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: &fALSE,
}
}
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) *DocumentMapping {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
dm.Properties[property] = sdm
return dm
}
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) *DocumentMapping {
if dm.Fields == nil {
dm.Fields = make([]*FieldMapping, 0)
}
dm.Fields = append(dm.Fields, fm)
return dm
}
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
var tmp struct {
Enabled *bool `json:"enabled"`
Dynamic *bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties"`
Fields []*FieldMapping `json:"fields"`
DefaultAnalyzer *string `json:"default_analyzer"`
}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
dm.Enabled = &tRUE
if tmp.Enabled != nil {
dm.Enabled = tmp.Enabled
}
dm.Dynamic = &tRUE
if tmp.Dynamic != nil {
dm.Dynamic = tmp.Dynamic
}
if tmp.DefaultAnalyzer != nil {
dm.DefaultAnalyzer = tmp.DefaultAnalyzer
}
if tmp.Properties != nil {
dm.Properties = make(map[string]*DocumentMapping, len(tmp.Properties))
}
for propName, propMapping := range tmp.Properties {
dm.Properties[propName] = propMapping
}
if tmp.Fields != nil {
dm.Fields = make([]*FieldMapping, len(tmp.Fields))
}
for i, field := range tmp.Fields {
dm.Fields[i] = field
}
return nil
}
func (dm *DocumentMapping) defaultAnalyzer(path []string) *analysis.Analyzer {
var rv *analysis.Analyzer
current := dm
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultAnalyzer != nil {
rv = config.Analysis.Analyzers[*current.DefaultAnalyzer]
}
}
return rv
}

55
mapping_field.go Normal file
View File

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"github.com/couchbaselabs/bleve/document"
)
type FieldMapping struct {
Name *string `json:"name"`
Type *string `json:"type"`
Analyzer *string `json:"analyzer"`
Store *bool `json:"store"`
Index *bool `json:"index"`
IncludeTermVectors *bool `json:"include_term_vectors"`
IncludeInAll *bool `json:"include_in_all"`
}
func NewFieldMapping(name, typ, analyzer string, store, index bool, includeTermVectors bool, includeInAll bool) *FieldMapping {
return &FieldMapping{
Name: &name,
Type: &typ,
Analyzer: &analyzer,
Store: &store,
Index: &index,
IncludeTermVectors: &includeTermVectors,
IncludeInAll: &includeInAll,
}
}
func (fm *FieldMapping) Options() document.IndexingOptions {
var rv document.IndexingOptions
if *fm.Store {
rv |= document.STORE_FIELD
}
if *fm.Index {
rv |= document.INDEX_FIELD
}
if *fm.IncludeTermVectors {
rv |= document.INCLUDE_TERM_VECTORS
}
return rv
}
func (fm *FieldMapping) GoString() string {
return fmt.Sprintf("&bleve.FieldMapping{Name:%s, Type:%s, Analyzer:%s, Store:%t, Index:%t}", *fm.Name, *fm.Type, *fm.Analyzer, *fm.Store, *fm.Index)
}

316
mapping_index.go Normal file
View File

@ -0,0 +1,316 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"reflect"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/document"
)
var tRUE = true
var fALSE = false
var DEFAULT_ID_FIELD = "_id"
var DEFAULT_TYPE_FIELD = "_type"
var DEFAULT_TYPE = "_default"
type IndexMapping struct {
TypeMapping map[string]*DocumentMapping `json:"types"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
IdField *string `json:"id_field"`
TypeField *string `json:"type_field"`
DefaultType *string `json:"default_type"`
DefaultAnalyzer *string `json:"default_analyzer"`
}
func (im *IndexMapping) GoString() string {
return fmt.Sprintf("&bleve.IndexMapping{TypeMapping:%#v, TypeField:%s, DefaultType:%s}", im.TypeMapping, *im.TypeField, *im.DefaultType)
}
func NewIndexMapping() *IndexMapping {
return &IndexMapping{
TypeMapping: make(map[string]*DocumentMapping),
DefaultMapping: NewDocumentMapping(),
IdField: &DEFAULT_ID_FIELD,
TypeField: &DEFAULT_TYPE_FIELD,
DefaultType: &DEFAULT_TYPE,
}
}
func (im *IndexMapping) AddDocumentMapping(doctype string, dm *DocumentMapping) *IndexMapping {
im.TypeMapping[doctype] = dm
return im
}
func (im *IndexMapping) SetTypeField(typeField string) *IndexMapping {
im.TypeField = &typeField
return im
}
func (im *IndexMapping) SetDefaultAnalyzer(analyzer string) *IndexMapping {
im.DefaultAnalyzer = &analyzer
return im
}
func (im *IndexMapping) MappingForType(docType string) *DocumentMapping {
docMapping := im.TypeMapping[docType]
if docMapping == nil {
docMapping = im.DefaultMapping
}
return docMapping
}
func (im *IndexMapping) UnmarshalJSON(data []byte) error {
var tmp struct {
TypeMapping map[string]*DocumentMapping `json:"types"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
IdField *string `json:"id_field"`
TypeField *string `json:"type_field"`
DefaultType *string `json:"default_type"`
DefaultAnalyzer *string `json:"default_analyzer"`
}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
im.IdField = &DEFAULT_ID_FIELD
if tmp.IdField != nil {
im.IdField = tmp.IdField
}
im.TypeField = &DEFAULT_TYPE_FIELD
if tmp.TypeField != nil {
im.TypeField = tmp.TypeField
}
im.DefaultType = &DEFAULT_TYPE
if tmp.DefaultType != nil {
im.DefaultType = tmp.DefaultType
}
im.DefaultMapping = NewDocumentMapping()
if tmp.DefaultMapping != nil {
im.DefaultMapping = tmp.DefaultMapping
}
if tmp.DefaultAnalyzer != nil {
im.DefaultAnalyzer = tmp.DefaultAnalyzer
}
im.TypeMapping = make(map[string]*DocumentMapping, len(tmp.TypeMapping))
for typeName, typeDocMapping := range tmp.TypeMapping {
im.TypeMapping[typeName] = typeDocMapping
}
return nil
}
func (im *IndexMapping) determineType(data interface{}) (string, bool) {
// first see if the object implements Identifier
classifier, ok := data.(Classifier)
if ok {
return classifier.Type(), true
}
// now see if we can find type using the mapping
if im.TypeField != nil {
typ, ok := mustString(lookupPropertyPath(data, *im.TypeField))
if ok {
return typ, true
}
}
// fall back to default type if there was one
if im.DefaultType != nil {
return *im.DefaultType, true
}
return "", false
}
func (im *IndexMapping) MapDocument(doc *document.Document, data interface{}) error {
docType, ok := im.determineType(data)
if !ok {
return ERROR_NO_TYPE
}
docMapping := im.MappingForType(docType)
walkContext := newWalkContext(doc, docMapping)
im.walkDocument(data, []string{}, walkContext)
// see if the _all field was disabled
allMapping := docMapping.DocumentMappingForPath("_all")
if allMapping == nil || (allMapping.Enabled != nil && *allMapping.Enabled != false) {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS)
doc.AddField(field)
}
return nil
}
type walkContext struct {
doc *document.Document
dm *DocumentMapping
excludedFromAll []string
}
func newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
return &walkContext{
doc: doc,
dm: dm,
excludedFromAll: []string{},
}
}
func (im *IndexMapping) walkDocument(data interface{}, path []string, context *walkContext) {
val := reflect.ValueOf(data)
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
im.processProperty(fieldVal, append(path, fieldName), context)
}
}
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// if the field has a JSON name, prefer that
jsonTag := field.Tag.Get("json")
jsonFieldName := parseJSONTagName(jsonTag)
if jsonFieldName != "" {
fieldName = jsonFieldName
}
if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
im.processProperty(fieldVal, append(path, fieldName), context)
}
}
case reflect.Slice, reflect.Array:
for i := 0; i < val.Len(); i++ {
if val.Index(i).CanInterface() {
fieldVal := val.Index(i).Interface()
im.processProperty(fieldVal, path, context)
}
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.CanInterface() {
im.walkDocument(ptrElem.Interface(), path, context)
}
}
}
func (im *IndexMapping) processProperty(property interface{}, path []string, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := context.dm.DocumentMappingForPath(pathString)
// check tos see if we even need to do further processing
if subDocMapping != nil && subDocMapping.Enabled != nil && !*subDocMapping.Enabled {
return
}
propertyValue := reflect.ValueOf(property)
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if *fieldMapping.Type == "text" {
fieldName := pathString
if fieldMapping.Name != nil && *fieldMapping.Name != "" {
parentName := ""
if len(path) > 1 {
parentName = encodePath(path[:len(path)-1]) + PATH_SEPARATOR
}
fieldName = parentName + *fieldMapping.Name
}
options := fieldMapping.Options()
analyzer := config.Analysis.Analyzers[*fieldMapping.Analyzer]
if analyzer != nil {
field := document.NewTextFieldCustom(fieldName, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
if fieldMapping.IncludeInAll != nil && !*fieldMapping.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
}
} else {
// automatic indexing behavior
options := document.STORE_FIELD | document.INDEX_FIELD | document.INCLUDE_TERM_VECTORS
analyzer := im.defaultAnalyzer(context.dm, path)
field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
}
default:
im.walkDocument(property, path, context)
}
}
func (im *IndexMapping) defaultAnalyzer(dm *DocumentMapping, path []string) *analysis.Analyzer {
// first see if the document mapping has an analyzer
rv := dm.defaultAnalyzer(path)
if rv == nil {
if im.DefaultAnalyzer != nil {
rv = config.Analysis.Analyzers[*im.DefaultAnalyzer]
} else if config.DefaultAnalyzer != nil {
rv = config.Analysis.Analyzers[*config.DefaultAnalyzer]
}
}
return rv
}
// attempts to find the best analyzer to use with only a field name
// will walk all the document types, look for field mappings at the
// provided path, if one exists and it has an explicit analyzer
// that is returned
// nil should be an acceptable return value meaning we don't know
func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
pathMapping := docMapping.DocumentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != nil {
return config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer]
}
}
}
}
// next we will try default analyzers for the path
for _, docMapping := range im.TypeMapping {
rv := im.defaultAnalyzer(docMapping, decodePath(path))
if rv != nil {
return rv
}
}
// finally just return the system-wide default analyzer
return config.Analysis.Analyzers[*config.DefaultAnalyzer]
}

60
mapping_test.go Normal file
View File

@ -0,0 +1,60 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"reflect"
"testing"
)
var mappingSource = []byte(`{
"types": {
"beer": {
"properties": {
"name": {
"fields": [
{
"name": "name",
"type": "text",
"analyzer": "standard",
"store": true,
"index": true,
"include_term_vectors": true,
"include_in_all": true
}
]
}
}
},
"brewery": {
}
},
"type_field": "_type",
"default_type": "_default"
}`)
var nameField = NewFieldMapping("name", "text", "standard", true, true, true, true)
var nameMapping = NewDocumentMapping().AddFieldMapping(nameField)
var beerMapping = NewDocumentMapping().AddSubDocumentMapping("name", nameMapping)
var breweryMapping = NewDocumentMapping()
var mappingObject = NewIndexMapping().
AddDocumentMapping("beer", beerMapping).
AddDocumentMapping("brewery", breweryMapping)
func TestUnmarshalMappingJSON(t *testing.T) {
var indexMapping IndexMapping
err := json.Unmarshal(mappingSource, &indexMapping)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(&indexMapping, mappingObject) {
t.Errorf("expected %#v,\n got %#v", mappingObject, &indexMapping)
}
}

View File

@ -6,24 +6,23 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
package bleve
import (
"encoding/json"
"fmt"
"log"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/search"
)
type Query interface {
Boost() float64
Searcher(index index.Index) (Searcher, error)
Searcher(i *indexImpl, explain bool) (search.Searcher, error)
Validate() error
}
func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
func ParseQuery(input []byte) (Query, error) {
var tmp map[string]interface{}
err := json.Unmarshal(input, &tmp)
if err != nil {
@ -42,7 +41,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
if isMatchQuery {
log.Printf("detected match query")
var rv MatchQuery
rv.mapping = mapping
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
@ -53,7 +51,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
if isMatchPhraseQuery {
log.Printf("detected match phrase query")
var rv MatchPhraseQuery
rv.mapping = mapping
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
@ -64,8 +61,7 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
_, hasShould := tmp["should"]
_, hasMustNot := tmp["must_not"]
if hasMust || hasShould || hasMustNot {
var rv TermBooleanQuery
rv.mapping = mapping
var rv BooleanQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
@ -84,7 +80,6 @@ func ParseQuery(input []byte, mapping document.Mapping) (Query, error) {
_, hasSyntaxQuery := tmp["query"]
if hasSyntaxQuery {
var rv SyntaxQuery
rv.mapping = mapping
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err

80
query_boolean.go Normal file
View File

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"github.com/couchbaselabs/bleve/search"
)
type BooleanQuery struct {
Must *ConjunctionQuery `json:"must,omitempty"`
Should *DisjunctionQuery `json:"should,omitempty"`
MustNot *DisjunctionQuery `json:"must_not,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
}
func NewBooleanQuery(must *ConjunctionQuery, should *DisjunctionQuery, mustNot *DisjunctionQuery) *BooleanQuery {
return &BooleanQuery{
Must: must,
Should: should,
MustNot: mustNot,
BoostVal: 1.0,
}
}
func (q *BooleanQuery) Boost() float64 {
return q.BoostVal
}
func (q *BooleanQuery) SetBoost(b float64) *BooleanQuery {
q.BoostVal = b
return q
}
func (q *BooleanQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
var err error
var mustSearcher *search.TermConjunctionSearcher
if q.Must != nil {
mustSearcher, err = q.Must.Searcher(i, explain)
if err != nil {
return nil, err
}
}
var shouldSearcher *search.TermDisjunctionSearcher
if q.Should != nil {
shouldSearcher, err = q.Should.Searcher(i, explain)
if err != nil {
return nil, err
}
}
var mustNotSearcher *search.TermDisjunctionSearcher
if q.MustNot != nil {
mustNotSearcher, err = q.MustNot.Searcher(i, explain)
if err != nil {
return nil, err
}
}
return search.NewTermBooleanSearcher(i.i, mustSearcher, shouldSearcher, mustNotSearcher, explain)
}
func (q *BooleanQuery) Validate() error {
if q.Must == nil && q.Should == nil {
return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause")
}
if q.Must != nil && len(q.Must.Conjuncts) == 0 && q.Should != nil && len(q.Should.Disjuncts) == 0 {
return fmt.Errorf("Boolean query must contain at least one MUST or SHOULD clause")
}
return nil
}

78
query_conjunction.go Normal file
View File

@ -0,0 +1,78 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"github.com/couchbaselabs/bleve/search"
)
type ConjunctionQuery struct {
Conjuncts []Query `json:"terms"`
BoostVal float64 `json:"boost,omitempty"`
}
func NewConjunctionQuery(conjuncts []Query) *ConjunctionQuery {
return &ConjunctionQuery{
Conjuncts: conjuncts,
BoostVal: 1.0,
}
}
func (q *ConjunctionQuery) Boost() float64 {
return q.BoostVal
}
func (q *ConjunctionQuery) SetBoost(b float64) *ConjunctionQuery {
q.BoostVal = b
return q
}
func (q *ConjunctionQuery) AddQuery(aq Query) *ConjunctionQuery {
q.Conjuncts = append(q.Conjuncts, aq)
return q
}
func (q *ConjunctionQuery) Searcher(i *indexImpl, explain bool) (*search.TermConjunctionSearcher, error) {
searchers := make([]search.Searcher, len(q.Conjuncts))
for in, conjunct := range q.Conjuncts {
var err error
searchers[in], err = conjunct.Searcher(i, explain)
if err != nil {
return nil, err
}
}
return search.NewTermConjunctionSearcher(i.i, searchers, explain)
}
func (q *ConjunctionQuery) Validate() error {
return nil
}
func (q *ConjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Conjuncts []json.RawMessage `json:"terms"`
BoostVal float64 `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Conjuncts = make([]Query, len(tmp.Conjuncts))
for i, term := range tmp.Conjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Conjuncts[i] = query
}
q.BoostVal = tmp.BoostVal
return nil
}

94
query_disjunction.go Normal file
View File

@ -0,0 +1,94 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"github.com/couchbaselabs/bleve/search"
)
type DisjunctionQuery struct {
Disjuncts []Query `json:"terms"`
BoostVal float64 `json:"boost,omitempty"`
MinVal float64 `json:"min"`
}
func NewDisjunctionQuery(disjuncts []Query) *DisjunctionQuery {
return &DisjunctionQuery{
Disjuncts: disjuncts,
BoostVal: 1.0,
}
}
func (q *DisjunctionQuery) Boost() float64 {
return q.BoostVal
}
func (q *DisjunctionQuery) SetBoost(b float64) *DisjunctionQuery {
q.BoostVal = b
return q
}
func (q *DisjunctionQuery) AddQuery(aq Query) *DisjunctionQuery {
q.Disjuncts = append(q.Disjuncts, aq)
return q
}
func (q *DisjunctionQuery) Min() float64 {
return q.MinVal
}
func (q *DisjunctionQuery) SetMin(m float64) *DisjunctionQuery {
q.MinVal = m
return q
}
func (q *DisjunctionQuery) Searcher(i *indexImpl, explain bool) (*search.TermDisjunctionSearcher, error) {
searchers := make([]search.Searcher, len(q.Disjuncts))
for in, disjunct := range q.Disjuncts {
var err error
searchers[in], err = disjunct.Searcher(i, explain)
if err != nil {
return nil, err
}
}
return search.NewTermDisjunctionSearcher(i.i, searchers, q.MinVal, explain)
}
func (q *DisjunctionQuery) Validate() error {
if int(q.MinVal) > len(q.Disjuncts) {
return fmt.Errorf("Minimum clauses in disjunction exceeds total number of clauses")
}
return nil
}
func (q *DisjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Disjuncts []json.RawMessage `json:"terms"`
BoostVal float64 `json:"boost,omitempty"`
MinVal float64 `json:"min"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Disjuncts = make([]Query, len(tmp.Disjuncts))
for i, term := range tmp.Disjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Disjuncts[i] = query
}
q.BoostVal = tmp.BoostVal
q.MinVal = tmp.MinVal
return nil
}

85
query_match.go Normal file
View File

@ -0,0 +1,85 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/search"
)
type MatchQuery struct {
Match string `json:"match"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
}
func NewMatchQuery(match string) *MatchQuery {
return &MatchQuery{
Match: match,
BoostVal: 1.0,
}
}
func (q *MatchQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchQuery) SetBoost(b float64) *MatchQuery {
q.BoostVal = b
return q
}
func (q *MatchQuery) Field() string {
return q.FieldVal
}
func (q *MatchQuery) SetField(f string) *MatchQuery {
q.FieldVal = f
return q
}
func (q *MatchQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
var analyzer *analysis.Analyzer
if q.Analyzer != "" {
analyzer = config.Analysis.Analyzers[q.Analyzer]
} else {
analyzer = i.m.analyzerForPath(q.FieldVal)
}
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.Match))
if len(tokens) > 0 {
tqs := make([]Query, len(tokens))
for i, token := range tokens {
tqs[i] = NewTermQuery(string(token.Term)).
SetField(q.FieldVal).
SetBoost(q.BoostVal)
}
shouldQuery := NewDisjunctionQuery(tqs).
SetBoost(q.BoostVal).
SetMin(1)
return shouldQuery.Searcher(i, explain)
} else {
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, explain)
}
}
func (q *MatchQuery) Validate() error {
return nil
}

View File

@ -6,23 +6,33 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
package bleve
import (
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/search"
)
type MatchAllQuery struct {
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}
func NewMatchAllQuery() *MatchAllQuery {
return &MatchAllQuery{
BoostVal: 1.0,
}
}
func (q *MatchAllQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchAllQuery) Searcher(index index.Index) (Searcher, error) {
return NewMatchAllSearcher(index, q)
func (q *MatchAllQuery) SetBoost(b float64) *MatchAllQuery {
q.BoostVal = b
return q
}
func (q *MatchAllQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
return search.NewMatchAllSearcher(i.i, q.BoostVal, explain)
}
func (q *MatchAllQuery) Validate() error {

View File

@ -6,23 +6,33 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
package bleve
import (
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/search"
)
type MatchNoneQuery struct {
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}
func NewMatchNoneQuery() *MatchNoneQuery {
return &MatchNoneQuery{
BoostVal: 1.0,
}
}
func (q *MatchNoneQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchNoneQuery) Searcher(index index.Index) (Searcher, error) {
return NewMatchNoneSearcher(index, q)
func (q *MatchNoneQuery) SetBoost(b float64) *MatchNoneQuery {
q.BoostVal = b
return q
}
func (q *MatchNoneQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
return search.NewMatchNoneSearcher(i.i)
}
func (q *MatchNoneQuery) Validate() error {

82
query_match_phrase.go Normal file
View File

@ -0,0 +1,82 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/search"
)
type MatchPhraseQuery struct {
MatchPhrase string `json:"match_phrase"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
}
func NewMatchPhraseQuery(matchPhrase string) *MatchPhraseQuery {
return &MatchPhraseQuery{
MatchPhrase: matchPhrase,
BoostVal: 1.0,
}
}
func (q *MatchPhraseQuery) Boost() float64 {
return q.BoostVal
}
func (q *MatchPhraseQuery) SetBoost(b float64) *MatchPhraseQuery {
q.BoostVal = b
return q
}
func (q *MatchPhraseQuery) Field() string {
return q.FieldVal
}
func (q *MatchPhraseQuery) SetField(f string) *MatchPhraseQuery {
q.FieldVal = f
return q
}
func (q *MatchPhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
var analyzer *analysis.Analyzer
if q.Analyzer != "" {
analyzer = config.Analysis.Analyzers[q.Analyzer]
} else {
analyzer = i.m.analyzerForPath(q.FieldVal)
}
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
if len(tokens) > 0 {
tqs := make([]*TermQuery, len(tokens))
for i, token := range tokens {
tqs[i] = NewTermQuery(string(token.Term)).
SetField(q.FieldVal).
SetBoost(q.BoostVal)
}
phraseQuery := NewPhraseQuery(tqs)
return phraseQuery.Searcher(i, explain)
} else {
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, explain)
}
}
func (q *MatchPhraseQuery) Validate() error {
return nil
}

60
query_phrase.go Normal file
View File

@ -0,0 +1,60 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"github.com/couchbaselabs/bleve/search"
)
type PhraseQuery struct {
Terms []*TermQuery `json:"terms"`
BoostVal float64 `json:"boost,omitempty"`
}
func NewPhraseQuery(terms []*TermQuery) *PhraseQuery {
return &PhraseQuery{
Terms: terms,
BoostVal: 1.0,
}
}
func (q *PhraseQuery) Boost() float64 {
return q.BoostVal
}
func (q *PhraseQuery) SetBoost(b float64) *PhraseQuery {
q.BoostVal = b
return q
}
func (q *PhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
terms := make([]string, len(q.Terms))
conjuncts := make([]Query, len(q.Terms))
for i, term := range q.Terms {
conjuncts[i] = term
terms[i] = term.Term
}
conjunctionQuery := NewConjunctionQuery(conjuncts)
conjunctionSearcher, err := conjunctionQuery.Searcher(i, explain)
if err != nil {
return nil, err
}
return search.NewPhraseSearcher(i.i, conjunctionSearcher, terms)
}
func (q *PhraseQuery) Validate() error {
if q.Terms == nil {
return fmt.Errorf("Phrase query must contain at least one term")
}
return nil
}

View File

@ -6,31 +6,49 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
package bleve
import (
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/search"
)
type SyntaxQuery struct {
Query string `json:"query"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
DefaultField string `json:"default_field,omitemtpy"`
mapping document.Mapping
Query string `json:"query"`
DefaultFieldVal string `json:"default_field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
}
func NewSyntaxQuery(query string) *SyntaxQuery {
return &SyntaxQuery{
Query: query,
BoostVal: 1.0,
}
}
func (q *SyntaxQuery) Boost() float64 {
return q.BoostVal
}
func (q *SyntaxQuery) Searcher(index index.Index) (Searcher, error) {
newQuery, err := ParseQuerySyntax(q.Query, q.mapping, q.DefaultField)
func (q *SyntaxQuery) SetBoost(b float64) *SyntaxQuery {
q.BoostVal = b
return q
}
func (q *SyntaxQuery) DefaultField() string {
return q.DefaultFieldVal
}
func (q *SyntaxQuery) SetField(f string) *SyntaxQuery {
q.DefaultFieldVal = f
return q
}
func (q *SyntaxQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
newQuery, err := ParseQuerySyntax(q.Query, i.m, q.DefaultFieldVal)
if err != nil {
return nil, err
}
return newQuery.Searcher(index)
return newQuery.Searcher(i, explain)
}
func (q *SyntaxQuery) Validate() error {

View File

@ -22,7 +22,7 @@
return STRING
}
//
package search
package bleve
import("log")
import("strconv")

View File

@ -1,4 +1,4 @@
package search
package bleve
import (
"log"
@ -23,382 +23,382 @@ a = make([]family, 1)
{
var acc [18]bool
var fun [18]func(rune) int
fun[1] = func(r rune) int {
switch(r) {
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
case 47: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[14] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 15
case 47: return 3
case 34: return 2
case 98: return 15
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 15
case 65 <= r && r <= 70: return 15
case 97 <= r && r <= 102: return 15
default: return 3
}
}
panic("unreachable")
}
fun[10] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[3] = func(r rune) int {
switch(r) {
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
case 47: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[4] = func(r rune) int {
switch(r) {
case 117: return 5
case 102: return 6
case 47: return 7
case 34: return 8
case 98: return 9
case 110: return 10
case 92: return 11
case 116: return 12
case 114: return 13
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[16] = func(r rune) int {
switch(r) {
case 34: return 2
case 98: return 17
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 17
case 47: return 3
default:
switch {
case 48 <= r && r <= 57: return 17
case 65 <= r && r <= 70: return 17
case 97 <= r && r <= 102: return 17
default: return 3
}
}
panic("unreachable")
}
fun[15] = func(r rune) int {
switch(r) {
case 114: return 3
case 117: return 3
case 102: return 16
case 47: return 3
case 34: return 2
case 98: return 16
case 110: return 3
case 92: return 4
case 116: return 3
default:
switch {
case 48 <= r && r <= 57: return 16
case 65 <= r && r <= 70: return 16
case 97 <= r && r <= 102: return 16
default: return 3
}
}
panic("unreachable")
}
fun[17] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[11] = func(r rune) int {
switch(r) {
case 117: return 5
case 102: return 6
case 47: return 7
case 34: return 8
case 98: return 9
case 110: return 10
case 92: return 11
case 116: return 12
case 98: return 12
case 114: return 13
case 34: return 5
case 117: return 6
case 47: return 7
case 116: return 8
case 110: return 9
case 102: return 10
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[12] = func(r rune) int {
switch(r) {
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
acc[8] = true
fun[8] = func(r rune) int {
switch(r) {
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[6] = func(r rune) int {
switch(r) {
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[9] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[7] = func(r rune) int {
switch(r) {
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 102: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
acc[2] = true
fun[2] = func(r rune) int {
switch(r) {
case 110: return -1
case 92: return -1
case 116: return -1
case 114: return -1
case 117: return -1
case 102: return -1
case 47: return -1
case 34: return -1
case 98: return -1
default:
switch {
case 48 <= r && r <= 57: return -1
case 65 <= r && r <= 70: return -1
case 97 <= r && r <= 102: return -1
default: return -1
}
}
panic("unreachable")
}
fun[13] = func(r rune) int {
switch(r) {
case 117: return 3
case 102: return 3
case 47: return 3
case 34: return 2
case 98: return 3
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
default:
switch {
case 48 <= r && r <= 57: return 3
case 65 <= r && r <= 70: return 3
case 97 <= r && r <= 102: return 3
default: return 3
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 117: return -1
case 102: return -1
case 47: return -1
case 34: return 1
case 98: return -1
case 110: return -1
case 92: return -1
case 116: return -1
case 114: return -1
default:
switch {
case 48 <= r && r <= 57: return -1
case 65 <= r && r <= 70: return -1
case 97 <= r && r <= 102: return -1
default: return -1
}
}
panic("unreachable")
}
fun[5] = func(r rune) int {
switch(r) {
case 110: return 3
case 92: return 4
case 116: return 3
case 114: return 3
case 117: return 3
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 14
case 47: return 3
case 34: return 2
case 92: return 3
case 98: return 14
default:
switch {
case 48 <= r && r <= 57: return 14
case 65 <= r && r <= 70: return 14
case 97 <= r && r <= 102: return 14
default: return 3
default: return 2
}
}
panic("unreachable")
}
acc[4] = true
fun[4] = func(r rune) int {
switch(r) {
case 92: return -1
case 98: return -1
case 114: return -1
case 34: return -1
case 117: return -1
case 47: return -1
case 116: return -1
case 110: return -1
case 102: return -1
default:
switch {
case 48 <= r && r <= 57: return -1
case 65 <= r && r <= 70: return -1
case 97 <= r && r <= 102: return -1
default: return -1
}
}
panic("unreachable")
}
fun[17] = func(r rune) int {
switch(r) {
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
acc[5] = true
fun[5] = func(r rune) int {
switch(r) {
case 110: return 2
case 102: return 2
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 114: return -1
case 34: return 1
case 117: return -1
case 47: return -1
case 116: return -1
case 110: return -1
case 102: return -1
case 92: return -1
case 98: return -1
default:
switch {
case 48 <= r && r <= 57: return -1
case 65 <= r && r <= 70: return -1
case 97 <= r && r <= 102: return -1
default: return -1
}
}
panic("unreachable")
}
fun[10] = func(r rune) int {
switch(r) {
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
case 92: return 3
case 98: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[16] = func(r rune) int {
switch(r) {
case 102: return 17
case 92: return 3
case 98: return 17
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
default:
switch {
case 48 <= r && r <= 57: return 17
case 65 <= r && r <= 70: return 17
case 97 <= r && r <= 102: return 17
default: return 2
}
}
panic("unreachable")
}
fun[13] = func(r rune) int {
switch(r) {
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[7] = func(r rune) int {
switch(r) {
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[2] = func(r rune) int {
switch(r) {
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[8] = func(r rune) int {
switch(r) {
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[3] = func(r rune) int {
switch(r) {
case 34: return 5
case 117: return 6
case 47: return 7
case 116: return 8
case 110: return 9
case 102: return 10
case 92: return 11
case 98: return 12
case 114: return 13
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[12] = func(r rune) int {
switch(r) {
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[14] = func(r rune) int {
switch(r) {
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 15
case 92: return 3
case 98: return 15
case 114: return 2
case 34: return 4
default:
switch {
case 48 <= r && r <= 57: return 15
case 65 <= r && r <= 70: return 15
case 97 <= r && r <= 102: return 15
default: return 2
}
}
panic("unreachable")
}
fun[15] = func(r rune) int {
switch(r) {
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 16
case 92: return 3
case 98: return 16
case 114: return 2
case 34: return 4
case 117: return 2
default:
switch {
case 48 <= r && r <= 57: return 16
case 65 <= r && r <= 70: return 16
case 97 <= r && r <= 102: return 16
default: return 2
}
}
panic("unreachable")
}
fun[1] = func(r rune) int {
switch(r) {
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
}
fun[9] = func(r rune) int {
switch(r) {
case 92: return 3
case 98: return 2
case 114: return 2
case 34: return 4
case 117: return 2
case 47: return 2
case 116: return 2
case 110: return 2
case 102: return 2
default:
switch {
case 48 <= r && r <= 57: return 2
case 65 <= r && r <= 70: return 2
case 97 <= r && r <= 102: return 2
default: return 2
}
}
panic("unreachable")
@ -410,9 +410,10 @@ a0[0].id = 0
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 43: return 1
case 43: return -1
default:
switch {
default: return -1
@ -420,10 +421,9 @@ fun[0] = func(r rune) int {
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
fun[0] = func(r rune) int {
switch(r) {
case 43: return -1
case 43: return 1
default:
switch {
default: return -1
@ -466,9 +466,10 @@ a0[2].id = 2
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 58: return 1
case 58: return -1
default:
switch {
default: return -1
@ -476,10 +477,9 @@ fun[0] = func(r rune) int {
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
fun[0] = func(r rune) int {
switch(r) {
case 58: return -1
case 58: return 1
default:
switch {
default: return -1
@ -494,9 +494,10 @@ a0[3].id = 3
{
var acc [2]bool
var fun [2]func(rune) int
fun[0] = func(r rune) int {
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 94: return 1
case 94: return -1
default:
switch {
default: return -1
@ -504,10 +505,9 @@ fun[0] = func(r rune) int {
}
panic("unreachable")
}
acc[1] = true
fun[1] = func(r rune) int {
fun[0] = func(r rune) int {
switch(r) {
case 94: return -1
case 94: return 1
default:
switch {
default: return -1
@ -578,18 +578,6 @@ a0[6].id = 6
{
var acc [5]bool
var fun [5]func(rune) int
fun[1] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return 2
case 49 <= r && r <= 57: return 2
default: return -1
}
}
panic("unreachable")
}
acc[2] = true
fun[2] = func(r rune) int {
switch(r) {
@ -603,31 +591,6 @@ fun[2] = func(r rune) int {
}
panic("unreachable")
}
acc[4] = true
fun[4] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return 4
case 49 <= r && r <= 57: return 4
default: return -1
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 45: return 1
default:
switch {
case 48 <= r && r <= 48: return 2
case 49 <= r && r <= 57: return 3
default: return -1
}
}
panic("unreachable")
}
acc[3] = true
fun[3] = func(r rune) int {
switch(r) {
@ -641,6 +604,43 @@ fun[3] = func(r rune) int {
}
panic("unreachable")
}
acc[4] = true
fun[4] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return 4
case 49 <= r && r <= 57: return 4
default: return -1
}
}
panic("unreachable")
}
fun[1] = func(r rune) int {
switch(r) {
case 45: return -1
default:
switch {
case 48 <= r && r <= 48: return 2
case 49 <= r && r <= 57: return 2
default: return -1
}
}
panic("unreachable")
}
fun[0] = func(r rune) int {
switch(r) {
case 45: return 1
default:
switch {
case 48 <= r && r <= 48: return 2
case 49 <= r && r <= 57: return 3
default: return -1
}
}
panic("unreachable")
}
a0[7].acc = acc[:]
a0[7].f = fun[:]
a0[7].id = 7
@ -651,9 +651,9 @@ var fun [2]func(rune) int
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 9: return 1
case 10: return 1
case 32: return 1
case 10: return 1
case 9: return 1
default:
switch {
default: return -1
@ -664,8 +664,8 @@ fun[1] = func(r rune) int {
fun[0] = func(r rune) int {
switch(r) {
case 9: return 1
case 10: return 1
case 32: return 1
case 10: return 1
default:
switch {
default: return -1
@ -680,18 +680,17 @@ a0[8].id = 8
{
var acc [2]bool
var fun [2]func(rune) int
acc[1] = true
fun[1] = func(r rune) int {
fun[0] = func(r rune) int {
switch(r) {
case 45: return -1
case 32: return -1
case 13: return -1
case 94: return -1
case 58: return -1
case 9: return -1
case 12: return -1
case 10: return -1
case 43: return -1
case 9: return -1
case 32: return -1
case 12: return -1
case 13: return -1
case 58: return -1
case 94: return -1
case 45: return -1
default:
switch {
default: return 1
@ -699,17 +698,18 @@ fun[1] = func(r rune) int {
}
panic("unreachable")
}
fun[0] = func(r rune) int {
acc[1] = true
fun[1] = func(r rune) int {
switch(r) {
case 9: return -1
case 32: return -1
case 12: return -1
case 13: return -1
case 58: return -1
case 94: return -1
case 45: return -1
case 58: return -1
case 9: return -1
case 12: return -1
case 10: return -1
case 43: return -1
case 45: return -1
case 32: return -1
case 13: return -1
default:
switch {
default: return 1

View File

@ -1,5 +1,5 @@
%{
package search
package bleve
import "log"
func logDebugGrammar(format string, v ...interface{}) {
@ -62,46 +62,30 @@ searchBase:
STRING {
str := $1.s
logDebugGrammar("STRING - %s", str)
q := &MatchQuery{
Match: str,
Field: parsingDefaultField,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[parsingDefaultField] != nil {
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
}
q := NewMatchQuery(str).SetField(parsingDefaultField)
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMustList.AddQuery(q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNotList.AddQuery(q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
parsingShouldList.AddQuery(q)
}
}
|
PHRASE {
phrase := $1.s
logDebugGrammar("PHRASE - %s", phrase)
q := &MatchPhraseQuery{
MatchPhrase: phrase,
Field: parsingDefaultField,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[parsingDefaultField] != nil {
q.Analyzer = parsingMapping[parsingDefaultField].Analyzer
}
q := NewMatchPhraseQuery(phrase).SetField(parsingDefaultField)
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMustList.AddQuery(q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNotList.AddQuery(q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
parsingShouldList.AddQuery(q)
}
}
|
@ -109,23 +93,15 @@ STRING COLON STRING {
field := $1.s
str := $3.s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q := &MatchQuery{
Match: str,
Field: field,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[field] != nil {
q.Analyzer = parsingMapping[field].Analyzer
}
q := NewMatchQuery(str).SetField(field)
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMustList.AddQuery(q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNotList.AddQuery(q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
parsingShouldList.AddQuery(q)
}
}
|
@ -133,23 +109,15 @@ STRING COLON PHRASE {
field := $1.s
phrase := $3.s
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := &MatchPhraseQuery{
MatchPhrase: phrase,
Field: field,
BoostVal: 1.0,
Explain: true,
}
if parsingMapping[field] != nil {
q.Analyzer = parsingMapping[field].Analyzer
}
q := NewMatchPhraseQuery(phrase).SetField(field)
if parsingMust {
parsingMustList.Terms = append(parsingMustList.Terms, q)
parsingMustList.AddQuery(q)
parsingMust = false
} else if parsingMustNot {
parsingMustNotList.Terms = append(parsingMustNotList.Terms, q)
parsingMustNotList.AddQuery(q)
parsingMustNot = false
} else {
parsingShouldList.Terms = append(parsingShouldList.Terms, q)
parsingShouldList.AddQuery(q)
}
};

68
query_syntax_parser.go Normal file
View File

@ -0,0 +1,68 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"fmt"
"strings"
"sync"
)
var crashHard = false
var parserMutex sync.Mutex
var parsingDefaultField string
var parsingMust bool
var parsingMustNot bool
var debugParser bool
var debugLexer bool
var parsingMustList *ConjunctionQuery
var parsingMustNotList *DisjunctionQuery
var parsingShouldList *DisjunctionQuery
var parsingIndexMapping *IndexMapping
func ParseQuerySyntax(query string, mapping *IndexMapping, defaultField string) (rq Query, err error) {
parserMutex.Lock()
defer parserMutex.Unlock()
parsingIndexMapping = mapping
parsingDefaultField = defaultField
parsingMustList = NewConjunctionQuery([]Query{})
parsingMustNotList = NewDisjunctionQuery([]Query{})
parsingShouldList = NewDisjunctionQuery([]Query{})
defer func() {
r := recover()
if r != nil && r == "syntax error" {
// if we're panicing over a syntax error, chill
err = fmt.Errorf("Parse Error - %v", r)
} else if r != nil {
// otherise continue to panic
if crashHard {
panic(r)
} else {
err = fmt.Errorf("Other Error - %v", r)
}
}
}()
yyParse(NewLexer(strings.NewReader(query)))
parsingQuery := NewBooleanQuery(nil, nil, nil)
if len(parsingMustList.Conjuncts) > 0 {
parsingQuery.Must = parsingMustList
}
if len(parsingMustNotList.Disjuncts) > 0 {
parsingQuery.MustNot = parsingMustNotList
}
if len(parsingShouldList.Disjuncts) > 0 {
parsingQuery.Should = parsingShouldList
}
rq = parsingQuery
return rq, err
}

View File

@ -6,25 +6,45 @@
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
package bleve
import (
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/search"
)
type TermQuery struct {
Term string `json:"term"`
Field string `json:"field,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}
func NewTermQuery(term string) *TermQuery {
return &TermQuery{
Term: term,
BoostVal: 1.0,
}
}
func (q *TermQuery) Boost() float64 {
return q.BoostVal
}
func (q *TermQuery) Searcher(index index.Index) (Searcher, error) {
return NewTermSearcher(index, q)
func (q *TermQuery) SetBoost(b float64) *TermQuery {
q.BoostVal = b
return q
}
func (q *TermQuery) Field() string {
return q.FieldVal
}
func (q *TermQuery) SetField(f string) *TermQuery {
q.FieldVal = f
return q
}
func (q *TermQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
return search.NewTermSearcher(i.i, q.Term, q.FieldVal, q.BoostVal, explain)
}
func (q *TermQuery) Validate() error {

78
reflect.go Normal file
View File

@ -0,0 +1,78 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"reflect"
"strings"
)
func lookupPropertyPath(data interface{}, path string) interface{} {
pathParts := decodePath(path)
current := data
for _, part := range pathParts {
current = lookupProptyPathPart(current, part)
if current == nil {
break
}
}
return current
}
func lookupProptyPathPart(data interface{}, part string) interface{} {
val := reflect.ValueOf(data)
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
key := reflect.ValueOf(part)
entry := val.MapIndex(key)
if entry.IsValid() {
return entry.Interface()
}
}
case reflect.Struct:
field := val.FieldByName(part)
if field.IsValid() && field.CanInterface() {
return field.Interface()
}
}
return nil
}
const PATH_SEPARATOR = "."
func decodePath(path string) []string {
return strings.Split(path, PATH_SEPARATOR)
}
func encodePath(pathElements []string) string {
return strings.Join(pathElements, PATH_SEPARATOR)
}
func mustString(data interface{}) (string, bool) {
if data != nil {
str, ok := data.(string)
if ok {
return str, true
}
}
return "", false
}
// parseJSONTagName extracts the JSON field name from a struct tag
func parseJSONTagName(tag string) string {
if idx := strings.Index(tag, ","); idx != -1 {
return tag[:idx]
}
return tag
}

110
search.go Normal file
View File

@ -0,0 +1,110 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"time"
"github.com/couchbaselabs/bleve/search"
)
type HighlightRequest struct {
Style *string `json:"style"`
Fields []string `json:"fields"`
}
func NewHighlight() *HighlightRequest {
return &HighlightRequest{}
}
func NewHighlightWithStyle(style string) *HighlightRequest {
return &HighlightRequest{
Style: &style,
}
}
type SearchRequest struct {
Query Query `json:"query"`
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Explain bool `json:"explain"`
}
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
var temp struct {
Q json.RawMessage `json:"query"`
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Explain bool `json:"explain"`
}
err := json.Unmarshal(input, &temp)
if err != nil {
return err
}
r.Size = temp.Size
r.From = temp.From
r.Explain = temp.Explain
r.Highlight = temp.Highlight
r.Query, err = ParseQuery(temp.Q)
if err != nil {
return err
}
if r.Size <= 0 {
r.Size = 10
}
if r.From <= 0 {
r.From = 0
}
return nil
}
func NewSearchRequest(q Query, size, from int, explain bool) *SearchRequest {
return &SearchRequest{
Query: q,
Size: size,
From: from,
Explain: explain,
}
}
type SearchResult struct {
Request *SearchRequest `json:"request"`
Hits search.DocumentMatchCollection `json:"hits"`
Total uint64 `json:"total_hits"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
}
func (sr *SearchResult) String() string {
rv := ""
if len(sr.Hits) > 0 {
rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
for i, hit := range sr.Hits {
rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
for fragmentField, fragments := range hit.Fragments {
rv += fmt.Sprintf("\t%s\n", fragmentField)
for _, fragment := range fragments {
rv += fmt.Sprintf("\t\t%s\n", fragment)
}
}
}
} else {
rv = "No matches"
}
return rv
}

View File

@ -10,7 +10,10 @@ package search
import (
"math"
"regexp"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/index/store/inmem"
@ -27,6 +30,11 @@ func init() {
}
}
// create a simpler analyzer which will support these tests
var testAnalyzer = &analysis.Analyzer{
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
// sets up some mock data used in many tests in this package
var twoDocIndexDescIndexingOptions = document.DEFAULT_TEXT_INDEXING_OPTIONS | document.INCLUDE_TERM_VECTORS
@ -34,28 +42,28 @@ var twoDocIndexDocs = []*document.Document{
// must have 4/4 beer
document.NewDocument("1").
AddField(document.NewTextField("name", []byte("marty"))).
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions)).
AddField(document.NewTextField("street", []byte("couchbase way"))),
AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("2").
AddField(document.NewTextField("name", []byte("steve"))).
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions)).
AddField(document.NewTextField("street", []byte("couchbase way"))).
AddField(document.NewTextField("title", []byte("mister"))),
AddField(document.NewTextFieldCustom("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("3").
AddField(document.NewTextField("name", []byte("dustin"))).
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions)).
AddField(document.NewTextField("title", []byte("mister"))),
AddField(document.NewTextFieldCustom("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
// must have 65/65 beer
document.NewDocument("4").
AddField(document.NewTextField("name", []byte("ravi"))).
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions)),
AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
// must have 0/x beer
document.NewDocument("5").
AddField(document.NewTextField("name", []byte("bobert"))).
AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("water"), twoDocIndexDescIndexingOptions)).
AddField(document.NewTextField("title", []byte("mister"))),
AddField(document.NewTextFieldCustom("desc", []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
}
func scoresCloseEnough(a, b float64) bool {

View File

@ -15,6 +15,7 @@ import (
type TopScoreCollector struct {
k int
skip int
results *list.List
took time.Duration
maxScore float64
@ -24,6 +25,15 @@ type TopScoreCollector struct {
func NewTopScorerCollector(k int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: 0,
results: list.New(),
}
}
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: skip,
results: list.New(),
}
}
@ -70,7 +80,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) {
tksc.results.InsertBefore(dm, e)
// if we just made the list too long
if tksc.results.Len() > tksc.k {
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
@ -79,18 +89,26 @@ func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) {
}
// if we got to the end, we still have to add it
tksc.results.PushBack(dm)
if tksc.results.Len() > tksc.k {
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
}
func (tksc *TopScoreCollector) Results() DocumentMatchCollection {
rv := make(DocumentMatchCollection, tksc.results.Len())
i := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
rv[i] = e.Value.(*DocumentMatch)
i++
if tksc.results.Len()-tksc.skip > 0 {
rv := make(DocumentMatchCollection, tksc.results.Len()-tksc.skip)
i := 0
skipped := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
if skipped < tksc.skip {
skipped++
continue
}
rv[i] = e.Value.(*DocumentMatch)
i++
}
return rv
}
return rv
return DocumentMatchCollection{}
}

View File

@ -116,3 +116,97 @@ func TestTop10Scores(t *testing.T) {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}
func TestTop10ScoresSkip10(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: DocumentMatchCollection{
&DocumentMatch{
ID: "a",
Score: 11,
},
&DocumentMatch{
ID: "b",
Score: 9.5,
},
&DocumentMatch{
ID: "c",
Score: 11,
},
&DocumentMatch{
ID: "d",
Score: 9,
},
&DocumentMatch{
ID: "e",
Score: 11,
},
&DocumentMatch{
ID: "f",
Score: 9,
},
&DocumentMatch{
ID: "g",
Score: 11,
},
&DocumentMatch{
ID: "h",
Score: 9,
},
&DocumentMatch{
ID: "i",
Score: 11,
},
&DocumentMatch{
ID: "j",
Score: 11,
},
&DocumentMatch{
ID: "k",
Score: 11,
},
&DocumentMatch{
ID: "l",
Score: 99,
},
&DocumentMatch{
ID: "m",
Score: 11,
},
&DocumentMatch{
ID: "n",
Score: 11,
},
},
}
collector := NewTopScorerSkipCollector(10, 10)
collector.Collect(searcher)
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 4 {
t.Fatalf("expected 4 results, got %d", len(results))
}
if results[0].ID != "b" {
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
}
if results[0].Score != 9.5 {
t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score)
}
}

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import ()

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import ()

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import ()

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import ()

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (

View File

@ -1,3 +1,11 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (

Some files were not shown because too many files have changed in this diff Show More