0
0
Fork 0

initial commit

This commit is contained in:
Marty Schoch 2014-04-17 16:55:53 -04:00
commit 3d842dfaf2
55 changed files with 4170 additions and 0 deletions

10
.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
#*
*.sublime-*
*~
.#*
.project
.settings
.DS_Store
/examples/bleve_index_json/bleve_index_json
/examples/bleve_query/bleve_query
/utils/bleve_dump/bleve_dump

3
README.md Normal file
View File

@ -0,0 +1,3 @@
## bleve
A modern text indexing library for go.

View File

@ -0,0 +1,24 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package keyword_analyzer
import (
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
)
func NewKeywordAnalyzer() (*analysis.Analyzer, error) {
keyword := analysis.Analyzer{
CharFilters: []analysis.CharFilter{},
Tokenizer: single_token.NewSingleTokenTokenizer(),
Filters: []analysis.TokenFilter{},
}
return &keyword, nil
}

View File

@ -0,0 +1,39 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package standard_analyzer
import (
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter"
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
)
func NewStandardAnalyzer() (*analysis.Analyzer, error) {
lower_case_filter, err := lower_case_filter.NewLowerCaseFilter()
if err != nil {
return nil, err
}
stop_words_filter, err := stop_words_filter.NewStopWordsFilter()
if err != nil {
return nil, err
}
standard := analysis.Analyzer{
CharFilters: []analysis.CharFilter{},
Tokenizer: unicode_word_boundary.NewUnicodeWordBoundaryTokenizer(),
TokenFilters: []analysis.TokenFilter{
lower_case_filter,
stop_words_filter,
},
}
return &standard, nil
}

View File

@ -0,0 +1,32 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package html_char_filter
import (
"regexp"
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
)
// the origin of this regex is here:
// http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/
// slightly modified by me to also match the DOCTYPE
const htmlTagPattern = `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
var htmlRegex = regexp.MustCompile(htmlTagPattern)
type HtmlCharFilter struct {
*regexp_char_filter.RegexpCharFilter
}
func NewHtmlCharFilter() *HtmlCharFilter {
return &HtmlCharFilter{
regexp_char_filter.NewRegexpCharFilter(htmlRegex, []byte{' '}),
}
}

View File

@ -0,0 +1,52 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package html_char_filter
import (
"reflect"
"testing"
)
func TestHtmlCharFilter(t *testing.T) {
tests := []struct {
input []byte
output []byte
}{
{
input: []byte(`<!DOCTYPE html>
<html>
<body>
<h1>My First Heading</h1>
<p>My first paragraph.</p>
</body>
</html>`),
output: []byte(`
My First Heading
My first paragraph.
`),
},
}
for _, test := range tests {
filter := NewHtmlCharFilter()
output := filter.Filter(test.input)
if !reflect.DeepEqual(output, test.output) {
t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
}
}
}

View File

@ -0,0 +1,30 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package regexp_char_filter
import (
"bytes"
"regexp"
)
type RegexpCharFilter struct {
r *regexp.Regexp
replacement []byte
}
func NewRegexpCharFilter(r *regexp.Regexp, replacement []byte) *RegexpCharFilter {
return &RegexpCharFilter{
r: r,
replacement: replacement,
}
}
func (s *RegexpCharFilter) Filter(input []byte) []byte {
return s.r.ReplaceAllFunc(input, func(in []byte) []byte { return bytes.Repeat(s.replacement, len(in)) })
}

55
analysis/freq.go Normal file
View File

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package analysis
type TokenLocation struct {
Start int
End int
Position int
}
type TokenFreq struct {
Term []byte
Locations []*TokenLocation
}
func TokenFrequency(tokens TokenStream) []*TokenFreq {
index := make(map[string]*TokenFreq)
for _, token := range tokens {
curr, ok := index[string(token.Term)]
if ok {
curr.Locations = append(curr.Locations, &TokenLocation{
Start: token.Start,
End: token.End,
Position: token.Position,
})
} else {
index[string(token.Term)] = &TokenFreq{
Term: token.Term,
Locations: []*TokenLocation{
&TokenLocation{
Start: token.Start,
End: token.End,
Position: token.Position,
},
},
}
}
}
rv := make([]*TokenFreq, len(index))
i := 0
for _, tf := range index {
rv[i] = tf
i += 1
}
return rv
}

View File

@ -0,0 +1,44 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package length_filter
import (
"unicode/utf8"
"github.com/couchbaselabs/bleve/analysis"
)
type LengthFilter struct {
min int
max int
}
func NewLengthFilter(min, max int) (*LengthFilter, error) {
return &LengthFilter{
min: min,
max: max,
}, nil
}
func (f *LengthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
for _, token := range input {
wordLen := utf8.RuneCount(token.Term)
if f.min > 0 && f.min > wordLen {
continue
}
if f.max > 0 && f.max < wordLen {
continue
}
rv = append(rv, token)
}
return rv
}

View File

@ -0,0 +1,102 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package length_filter
import (
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestLengthFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("1"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
}
lengthFilter, err := NewLengthFilter(3, 4)
if err != nil {
t.Fatal(err)
}
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 1 {
t.Fatalf("expected 1 output token")
}
if string(ouputTokenStream[0].Term) != "two" {
t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
}
}
func TestLengthFilterNoMax(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("1"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
}
lengthFilter, err := NewLengthFilter(3, -1)
if err != nil {
t.Fatal(err)
}
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 2 {
t.Fatalf("expected 2 output token")
}
if string(ouputTokenStream[0].Term) != "two" {
t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
}
if string(ouputTokenStream[1].Term) != "three" {
t.Errorf("expected term `three`, got `%s`", ouputTokenStream[0].Term)
}
}
func TestLengthFilterNoMin(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("1"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
}
lengthFilter, err := NewLengthFilter(-1, 4)
if err != nil {
t.Fatal(err)
}
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 2 {
t.Fatalf("expected 2 output token")
}
if string(ouputTokenStream[0].Term) != "1" {
t.Errorf("expected term `1`, got `%s`", ouputTokenStream[0].Term)
}
if string(ouputTokenStream[1].Term) != "two" {
t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
}
}

View File

@ -0,0 +1,35 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package lower_case_filter
import (
"strings"
"github.com/couchbaselabs/bleve/analysis"
)
type LowerCaseFilter struct {
}
func NewLowerCaseFilter() (*LowerCaseFilter, error) {
return &LowerCaseFilter{}, nil
}
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
for _, token := range input {
word := string(token.Term)
wordLowerCase := strings.ToLower(word)
token.Term = []byte(wordLowerCase)
rv = append(rv, token)
}
return rv
}

View File

@ -0,0 +1,52 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package lower_case_filter
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestLowerCaseFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("ONE"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("ThReE"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("one"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
}
filter, err := NewLowerCaseFilter()
if err != nil {
t.Fatal(err)
}
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
}
}

View File

@ -0,0 +1,46 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package stemmer_filter
import (
"bitbucket.org/tebeka/snowball"
"github.com/couchbaselabs/bleve/analysis"
)
type StemmerFilter struct {
lang string
stemmer *snowball.Stemmer
}
func NewStemmerFilter(lang string) (*StemmerFilter, error) {
stemmer, err := snowball.New(lang)
if err != nil {
return nil, err
}
return &StemmerFilter{
lang: lang,
stemmer: stemmer,
}, nil
}
func (s *StemmerFilter) List() []string {
return snowball.LangList()
}
func (s *StemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
for _, token := range input {
stemmed := s.stemmer.Stem(string(token.Term))
token.Term = []byte(stemmed)
rv = append(rv, token)
}
return rv
}

View File

@ -0,0 +1,52 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package stemmer_filter
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestStemmerFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walking"),
},
&analysis.Token{
Term: []byte("talked"),
},
&analysis.Token{
Term: []byte("business"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("talk"),
},
&analysis.Token{
Term: []byte("busi"),
},
}
filter, err := NewStemmerFilter("english")
if err != nil {
t.Fatal(err)
}
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
}
}

View File

@ -0,0 +1,53 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package stop_words_filter
import (
"github.com/couchbaselabs/bleve/analysis"
)
var DEFAULT_STOP_WORDS []string = []string{
"a", "an", "and", "are", "as", "at", "be", "but", "by",
"for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "such",
"that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with",
}
type StopWordsFilter struct {
stopWords map[string]bool
}
func NewStopWordsFilter() (*StopWordsFilter, error) {
return &StopWordsFilter{
stopWords: buildStopWordMap(DEFAULT_STOP_WORDS),
}, nil
}
func (f *StopWordsFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
for _, token := range input {
word := string(token.Term)
_, isStopWord := f.stopWords[word]
if !isStopWord {
rv = append(rv, token)
}
}
return rv
}
func buildStopWordMap(words []string) map[string]bool {
rv := make(map[string]bool, len(words))
for _, word := range words {
rv[word] = true
}
return rv
}

View File

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package stop_words_filter
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestStopWordsFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("in"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("park"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("park"),
},
}
filter, err := NewStopWordsFilter()
if err != nil {
t.Fatal(err)
}
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
}
}

View File

@ -0,0 +1,40 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package regexp_tokenizer
import (
"regexp"
"github.com/couchbaselabs/bleve/analysis"
)
type RegexpTokenizer struct {
r *regexp.Regexp
}
func NewRegexpTokenizer(r *regexp.Regexp) *RegexpTokenizer {
return &RegexpTokenizer{
r: r,
}
}
func (rt *RegexpTokenizer) Tokenize(input []byte) analysis.TokenStream {
matches := rt.r.FindAllIndex(input, -1)
rv := make(analysis.TokenStream, len(matches))
for i, match := range matches {
token := analysis.Token{
Term: input[match[0]:match[1]],
Start: match[0],
End: match[1],
Position: i + 1,
}
rv[i] = &token
}
return rv
}

View File

@ -0,0 +1,29 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple_word_boundary
import (
"regexp"
"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
)
const wordPattern = `\w+`
var wordRegex = regexp.MustCompile(wordPattern)
type SimpleWordBoundaryTokenizer struct {
*regexp_tokenizer.RegexpTokenizer
}
func NewSimpleWordBoundaryTokenizer() *SimpleWordBoundaryTokenizer {
return &SimpleWordBoundaryTokenizer{
regexp_tokenizer.NewRegexpTokenizer(wordRegex),
}
}

View File

@ -0,0 +1,51 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple_word_boundary
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestBoundary(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello World."),
analysis.TokenStream{
{
0,
5,
[]byte("Hello"),
1,
},
{
6,
11,
[]byte("World"),
2,
},
},
},
}
for _, test := range tests {
tokenizer := NewSimpleWordBoundaryTokenizer()
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}

View File

@ -0,0 +1,31 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package single_token
import (
"github.com/couchbaselabs/bleve/analysis"
)
type SingleTokenTokenizer struct {
}
func NewSingleTokenTokenizer() *SingleTokenTokenizer {
return &SingleTokenTokenizer{}
}
func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
return analysis.TokenStream{
&analysis.Token{
Term: input,
Position: 1,
Start: 0,
End: len(input),
},
}
}

View File

@ -0,0 +1,67 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package single_token
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestSingleTokenTokenizer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello World"),
analysis.TokenStream{
{
0,
11,
[]byte("Hello World"),
1,
},
},
},
{
[]byte("こんにちは世界"),
analysis.TokenStream{
{
0,
21,
[]byte("こんにちは世界"),
1,
},
},
},
{
[]byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
analysis.TokenStream{
{
0,
72,
[]byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
1,
},
},
},
}
for _, test := range tests {
tokenizer := NewSingleTokenTokenizer()
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}

View File

@ -0,0 +1,114 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package unicode_word_boundary
// #cgo pkg-config: icu-uc
// #include <stdio.h>
// #include <stdlib.h>
// #include "unicode/utypes.h"
// #include "unicode/uchar.h"
// #include "unicode/ubrk.h"
// #include "unicode/ustring.h"
import "C"
import "log"
import "unsafe"
import "github.com/couchbaselabs/bleve/analysis"
type UnicodeWordBoundaryTokenizer struct {
locale *C.char
}
func NewUnicodeWordBoundaryTokenizer() *UnicodeWordBoundaryTokenizer {
return &UnicodeWordBoundaryTokenizer{}
}
func NewUnicodeWordBoundaryCustomLocaleTokenizer(locale string) *UnicodeWordBoundaryTokenizer {
return &UnicodeWordBoundaryTokenizer{
locale: C.CString(locale),
}
}
func (t *UnicodeWordBoundaryTokenizer) Tokenize(input []byte) analysis.TokenStream {
// var bi *C.UBreakIterator
rv := make(analysis.TokenStream, 0)
defer C.free(unsafe.Pointer(t.locale))
if len(input) < 1 {
return rv
}
// works
var myUnsafePointer = unsafe.Pointer(&(input[0]))
var myCCharPointer *C.char = (*C.char)(myUnsafePointer)
var inlen C.int32_t = C.int32_t(len(input))
var buflen C.int32_t = C.int32_t(2*len(input) + 1) // worse case each byte becomes 2
var stringToExamine []C.UChar = make([]C.UChar, buflen)
//log.Printf("new buff is: %v", stringToExamine)
var myUnsafePointerToExamine = unsafe.Pointer(&(stringToExamine[0]))
var myUCharPointer *C.UChar = (*C.UChar)(myUnsafePointerToExamine)
C.u_uastrncpy(myUCharPointer, myCCharPointer, inlen)
//log.Printf("after copy new buff is: %v", stringToExamine)
var err C.UErrorCode = C.U_ZERO_ERROR
bi := C.ubrk_open(C.UBRK_WORD, t.locale, myUCharPointer, -1, &err)
if err > C.U_ZERO_ERROR {
log.Printf("error opening boundary iterator")
return rv
}
defer C.ubrk_close(bi)
position := 0
var prev C.int32_t
p := C.ubrk_first(bi)
for p != C.UBRK_DONE {
q := C.ubrk_getRuleStatus(bi)
// convert boundaries back to utf8 positions
var nilCString *C.char
var indexA C.int32_t
C.u_strToUTF8(nilCString, 0, &indexA, myUCharPointer, prev, &err)
if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR {
log.Printf("error converting boundary %d", err)
return rv
} else {
err = C.U_ZERO_ERROR
}
var indexB C.int32_t
C.u_strToUTF8(nilCString, 0, &indexB, myUCharPointer, p, &err)
if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR {
log.Printf("error converting boundary %d", err)
return rv
} else {
err = C.U_ZERO_ERROR
}
if q != 0 {
position += 1
token := analysis.Token{
Start: int(indexA),
End: int(indexB),
Term: input[indexA:indexB],
Position: position,
}
rv = append(rv, &token)
}
prev = p
p = C.ubrk_next(bi)
}
return rv
}

View File

@ -0,0 +1,125 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package unicode_word_boundary
import (
"reflect"
"testing"
"github.com/couchbaselabs/bleve/analysis"
)
func TestBoundary(t *testing.T) {
tests := []struct {
input []byte
locale string
output analysis.TokenStream
}{
{
[]byte("Hello World"),
"en_US",
analysis.TokenStream{
{
0,
5,
[]byte("Hello"),
1,
},
{
6,
11,
[]byte("World"),
2,
},
},
},
{
[]byte("こんにちは世界"),
"en_US",
analysis.TokenStream{
{
0,
15,
[]byte("こんにちは"),
1,
},
{
15,
21,
[]byte("世界"),
2,
},
},
},
{
[]byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
"th_TH",
analysis.TokenStream{
{
0,
9,
[]byte("แยก"),
1,
},
{
9,
15,
[]byte("คำ"),
2,
},
{
15,
27,
[]byte("ภาษา"),
3,
},
{
27,
36,
[]byte("ไทย"),
4,
},
{
36,
42,
[]byte("ก็"),
5,
},
{
42,
57,
[]byte("ทำได้"),
6,
},
{
57,
63,
[]byte("นะ"),
7,
},
{
63,
72,
[]byte("จ้ะ"),
8,
},
},
},
}
for _, test := range tests {
tokenizer := NewUnicodeWordBoundaryCustomLocaleTokenizer(test.locale)
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}

59
analysis/type.go Normal file
View File

@ -0,0 +1,59 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package analysis
import (
"fmt"
)
type CharFilter interface {
Filter([]byte) []byte
}
type Token struct {
Start int
End int
Term []byte
Position int
}
func (t *Token) String() string {
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s", t.Start, t.End, t.Position, string(t.Term))
}
type TokenStream []*Token
type Tokenizer interface {
Tokenize([]byte) TokenStream
}
type TokenFilter interface {
Filter(TokenStream) TokenStream
}
type Analyzer struct {
CharFilters []CharFilter
Tokenizer Tokenizer
TokenFilters []TokenFilter
}
func (a *Analyzer) Analyze(input []byte) TokenStream {
if a.CharFilters != nil {
for _, cf := range a.CharFilters {
input = cf.Filter(input)
}
}
tokens := a.Tokenizer.Tokenize(input)
if a.TokenFilters != nil {
for _, tf := range a.TokenFilters {
tokens = tf.Filter(tokens)
}
}
return tokens
}

34
document/document.go Normal file
View File

@ -0,0 +1,34 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"encoding/json"
)
type Document struct {
ID string `json:"id"`
Fields []*Field `json:"fields"`
}
func NewDocument(id string) *Document {
return &Document{
ID: id,
Fields: make([]*Field, 0),
}
}
func (d *Document) AddField(f *Field) {
d.Fields = append(d.Fields, f)
}
func (d *Document) String() string {
bytes, _ := json.MarshalIndent(d, "", " ")
return string(bytes)
}

29
document/field.go Normal file
View File

@ -0,0 +1,29 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"github.com/couchbaselabs/bleve/analysis"
)
type Field struct {
Name string
IndexingOptions int
Analyzer *analysis.Analyzer
Value []byte
}
func NewField(name string, value []byte, indexingOptions int, analyzer *analysis.Analyzer) *Field {
return &Field{
Name: name,
IndexingOptions: indexingOptions,
Analyzer: analyzer,
Value: value,
}
}

41
document/field_text.go Normal file
View File

@ -0,0 +1,41 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"log"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
)
var standardAnalyzer *analysis.Analyzer
func init() {
var err error
standardAnalyzer, err = standard_analyzer.NewStandardAnalyzer()
if err != nil {
log.Fatal(err)
}
}
const DEFAULT_TEXT_INDEXING_OPTIONS = INDEX_FIELD
func NewTextField(name string, value []byte) *Field {
return NewTextFieldWithIndexingOptions(name, value, DEFAULT_TEXT_INDEXING_OPTIONS)
}
func NewTextFieldWithIndexingOptions(name string, value []byte, indexingOptions int) *Field {
return &Field{
Name: name,
IndexingOptions: indexingOptions,
Analyzer: standardAnalyzer,
Value: value,
}
}

View File

@ -0,0 +1,27 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
const (
INDEX_FIELD = 1 << iota
STORE_FIELD
INCLUDE_TERM_VECTORS
)
func IsIndexedField(arg int) bool {
return arg&INDEX_FIELD != 0
}
func IsStoredField(arg int) bool {
return arg&STORE_FIELD != 0
}
func IncludeTermVectors(arg int) bool {
return arg&INCLUDE_TERM_VECTORS != 0
}

View File

@ -0,0 +1,68 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"testing"
)
func TestIndexingOptions(t *testing.T) {
tests := []struct {
indexingOptions int
isIndexed bool
isStored bool
includeTermVectors bool
}{
{
indexingOptions: INDEX_FIELD | STORE_FIELD | INCLUDE_TERM_VECTORS,
isIndexed: true,
isStored: true,
includeTermVectors: true,
},
{
indexingOptions: INDEX_FIELD | INCLUDE_TERM_VECTORS,
isIndexed: true,
isStored: false,
includeTermVectors: true,
},
{
indexingOptions: STORE_FIELD | INCLUDE_TERM_VECTORS,
isIndexed: false,
isStored: true,
includeTermVectors: true,
},
{
indexingOptions: INDEX_FIELD,
isIndexed: true,
isStored: false,
includeTermVectors: false,
},
{
indexingOptions: STORE_FIELD,
isIndexed: false,
isStored: true,
includeTermVectors: false,
},
}
for _, test := range tests {
actuallyIndexed := IsIndexedField(test.indexingOptions)
if actuallyIndexed != test.isIndexed {
t.Errorf("expected indexed to be %v, got %v for %d", test.isIndexed, actuallyIndexed, test.indexingOptions)
}
actuallyStored := IsStoredField(test.indexingOptions)
if actuallyStored != test.isStored {
t.Errorf("expected stored to be %v, got %v for %d", test.isStored, actuallyStored, test.indexingOptions)
}
actuallyIncludeTermVectors := IncludeTermVectors(test.indexingOptions)
if actuallyIncludeTermVectors != test.includeTermVectors {
t.Errorf("expected includeTermVectors to be %v, got %v for %d", test.includeTermVectors, actuallyIncludeTermVectors, test.indexingOptions)
}
}
}

View File

@ -0,0 +1,63 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package main
import (
"flag"
"io/ioutil"
"log"
"github.com/couchbaselabs/bleve/index/upside_down"
"github.com/couchbaselabs/bleve/shredder"
)
var jsonDir = flag.String("jsonDir", "json", "json directory")
var indexDir = flag.String("indexDir", "index", "index directory")
func main() {
flag.Parse()
// create a automatic JSON document shredder
jsonShredder := shredder.NewAutoJsonShredder()
// create a new index
index := upside_down.NewUpsideDownCouch(*indexDir)
err := index.Open()
if err != nil {
log.Fatal(err)
}
defer index.Close()
// open the directory
dirEntries, err := ioutil.ReadDir(*jsonDir)
if err != nil {
log.Fatal(err)
}
// walk the directory entries
for _, dirEntry := range dirEntries {
// read the bytes
jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name())
if err != nil {
log.Fatal(err)
}
// shred them into a document
doc, err := jsonShredder.Shred(dirEntry.Name(), jsonBytes)
if err != nil {
log.Fatal(err)
}
//log.Printf("%+v", doc)
// update the index
err = index.Update(doc)
if err != nil {
log.Fatal(err)
}
}
}

View File

@ -0,0 +1,70 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package main
import (
"flag"
"fmt"
"log"
"github.com/couchbaselabs/bleve/index/upside_down"
"github.com/couchbaselabs/bleve/search"
)
var field = flag.String("field", "description", "field to query")
var indexDir = flag.String("indexDir", "index", "index directory")
var limit = flag.Int("limit", 10, "limit to first N results")
func main() {
flag.Parse()
if flag.NArg() < 1 {
log.Fatal("Specify search term")
}
// open index
index := upside_down.NewUpsideDownCouch(*indexDir)
err := index.Open()
if err != nil {
log.Fatal(err)
}
defer index.Close()
tq := search.TermQuery{
Term: flag.Arg(0),
Field: *field,
BoostVal: 1.0,
Explain: true,
}
collector := search.NewTopScorerCollector(*limit)
searcher, err := tq.Searcher(index)
if err != nil {
log.Fatalf("searcher error: %v", err)
return
}
err = collector.Collect(searcher)
if err != nil {
log.Fatalf("search error: %v", err)
return
}
results := collector.Results()
if len(results) == 0 {
fmt.Printf("No matches\n")
} else {
last := uint64(*limit)
if searcher.Count() < last {
last = searcher.Count()
}
fmt.Printf("%d matches, showing %d through %d\n", searcher.Count(), 1, last)
for i, result := range results {
fmt.Printf("%2d. %s (%f)\n", i+1, result.ID, result.Score)
}
}
}

48
index/index.go Normal file
View File

@ -0,0 +1,48 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package index
import (
"github.com/couchbaselabs/bleve/document"
)
type Index interface {
Open() error
Close()
Update(doc *document.Document) error
Delete(id string) error
TermFieldReader(term []byte, field string) (TermFieldReader, error)
DocCount() uint64
Dump()
}
type TermFieldVector struct {
Field string
Pos uint64
Start uint64
End uint64
}
type TermFieldDoc struct {
ID string
Freq uint64
Norm float64
Vectors []*TermFieldVector
}
type TermFieldReader interface {
Next() (*TermFieldDoc, error)
Advance(ID string) (*TermFieldDoc, error)
Count() uint64
Close()
}

227
index/mock/mock.go Normal file
View File

@ -0,0 +1,227 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package mock
import (
"fmt"
"math"
"sort"
"github.com/couchbaselabs/bleve/analysis"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
type mockFreq struct {
freq uint64
norm float64
vectors []*index.TermFieldVector
}
// key doc id
type mockDocFreq map[string]*mockFreq
//key field
type mockFieldDocFreq map[string]mockDocFreq
// 2 dim array
// inner level are always pairs (field name, term)
type mockBackIndexEntry [][]string
type MockIndex struct {
//this level of the map, the key is the term
termIndex map[string]mockFieldDocFreq
// key is docid
backIndex map[string]mockBackIndexEntry
docCount uint64
analyzer map[string]*analysis.Analyzer
}
func NewMockIndexWithDocs(docs []*document.Document) *MockIndex {
rv := NewMockIndex()
for _, doc := range docs {
rv.Update(doc)
}
return rv
}
func NewMockIndex() *MockIndex {
mi := MockIndex{
termIndex: make(map[string]mockFieldDocFreq),
backIndex: make(map[string]mockBackIndexEntry),
analyzer: make(map[string]*analysis.Analyzer),
}
return &mi
}
func (index *MockIndex) Open() error {
return nil
}
func (index *MockIndex) Close() {}
// for this implementation we dont care about performance
// update is simply delete then add
func (index *MockIndex) Update(doc *document.Document) error {
index.Delete(doc.ID)
backIndexEntry := make(mockBackIndexEntry, 0)
for _, field := range doc.Fields {
analyzer := field.Analyzer
tokens := analyzer.Analyze(field.Value)
fieldLength := len(tokens) // number of tokens in this doc field
fieldNorm := 1.0 / math.Sqrt(float64(fieldLength))
tokenFreqs := analysis.TokenFrequency(tokens)
for _, tf := range tokenFreqs {
mf := mockFreq{
freq: uint64(len(tf.Locations)),
norm: fieldNorm,
}
if document.IncludeTermVectors(field.IndexingOptions) {
mf.vectors = index.mockVectorsFromTokenFreq(field.Name, tf)
}
termString := string(tf.Term)
fieldMap, ok := index.termIndex[termString]
if !ok {
fieldMap = make(map[string]mockDocFreq)
index.termIndex[termString] = fieldMap
}
docMap, ok := fieldMap[field.Name]
if !ok {
docMap = make(map[string]*mockFreq)
fieldMap[field.Name] = docMap
}
docMap[doc.ID] = &mf
backIndexInnerEntry := []string{field.Name, termString}
backIndexEntry = append(backIndexEntry, backIndexInnerEntry)
}
}
index.backIndex[doc.ID] = backIndexEntry
index.docCount += 1
return nil
}
func (index *MockIndex) Delete(id string) error {
backIndexEntry, existed := index.backIndex[id]
if existed {
for _, backIndexPair := range backIndexEntry {
if len(backIndexPair) == 2 {
field := backIndexPair[0]
term := backIndexPair[1]
delete(index.termIndex[term][field], id)
if len(index.termIndex[term][field]) == 0 {
delete(index.termIndex[term], field)
if len(index.termIndex[term]) == 0 {
delete(index.termIndex, term)
}
}
}
}
delete(index.backIndex, id)
index.docCount -= 1
}
return nil
}
func (index *MockIndex) TermFieldReader(term []byte, field string) (index.TermFieldReader, error) {
fdf, ok := index.termIndex[string(term)]
if !ok {
fdf = make(mockFieldDocFreq)
}
docFreqs, ok := fdf[field]
if !ok {
docFreqs = make(mockDocFreq)
}
mtfr := mockTermFieldReader{
index: docFreqs,
sortedDocIds: make(sort.StringSlice, len(docFreqs)),
curr: -1,
}
i := 0
for k, _ := range docFreqs {
mtfr.sortedDocIds[i] = k
i += 1
}
sort.Sort(mtfr.sortedDocIds)
return &mtfr, nil
}
func (index *MockIndex) DocCount() uint64 {
return index.docCount
}
type mockTermFieldReader struct {
index mockDocFreq
sortedDocIds sort.StringSlice
curr int
}
func (reader *mockTermFieldReader) Next() (*index.TermFieldDoc, error) {
next := reader.curr + 1
if next < len(reader.sortedDocIds) {
nextTermKey := reader.sortedDocIds[next]
nextTerm := reader.index[nextTermKey]
reader.curr = next
return &index.TermFieldDoc{ID: nextTermKey, Freq: nextTerm.freq, Norm: nextTerm.norm, Vectors: nextTerm.vectors}, nil
}
return nil, nil
}
func (reader *mockTermFieldReader) Advance(ID string) (*index.TermFieldDoc, error) {
if reader.curr >= len(reader.sortedDocIds) {
return nil, nil
}
i := reader.curr
for currTermID := reader.sortedDocIds[i]; currTermID < ID && i < len(reader.sortedDocIds); i += 1 {
reader.curr = i
currTermID = reader.sortedDocIds[reader.curr]
}
if reader.curr < len(reader.sortedDocIds) {
nextTermKey := reader.sortedDocIds[reader.curr]
nextTerm := reader.index[nextTermKey]
return &index.TermFieldDoc{ID: nextTermKey, Freq: nextTerm.freq, Norm: nextTerm.norm, Vectors: nextTerm.vectors}, nil
}
return nil, nil
}
func (reader *mockTermFieldReader) Count() uint64 {
return uint64(len(reader.sortedDocIds))
}
func (reader *mockTermFieldReader) Close() {}
func (mi *MockIndex) mockVectorsFromTokenFreq(field string, tf *analysis.TokenFreq) []*index.TermFieldVector {
rv := make([]*index.TermFieldVector, len(tf.Locations))
for i, l := range tf.Locations {
mv := index.TermFieldVector{
Field: field,
Pos: uint64(l.Position),
Start: uint64(l.Start),
End: uint64(l.End),
}
rv[i] = &mv
}
return rv
}
func (mi *MockIndex) Dump() {
fmt.Println("dump not implemented")
}

124
index/mock/mock_test.go Normal file
View File

@ -0,0 +1,124 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package mock
import (
"reflect"
"testing"
_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
func TestCRUD(t *testing.T) {
i := NewMockIndex()
// create doc, assert doc count goes up
doc1 := document.NewDocument("1")
doc1.AddField(document.NewTextField("name", []byte("marty")))
i.Update(doc1)
count := i.DocCount()
if count != 1 {
t.Errorf("expected document count to be 1, was: %d", count)
}
// add another doc, assert doc count goes up again
doc2 := document.NewDocument("2")
doc2.AddField(document.NewTextField("name", []byte("bob")))
i.Update(doc2)
count = i.DocCount()
if count != 2 {
t.Errorf("expected document count to be 2, was: %d", count)
}
// search for doc with term that should exist
expectedMatch := &index.TermFieldDoc{
ID: "1",
Freq: 1,
Norm: 1,
}
tfr, err := i.TermFieldReader([]byte("marty"), "name")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
match, err := tfr.Next()
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(expectedMatch, match) {
t.Errorf("got %v, expected %v", match, expectedMatch)
}
nomatch, err := tfr.Next()
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if nomatch != nil {
t.Errorf("expected nil after last match")
}
// update doc, assert doc count doesn't go up
doc1 = document.NewDocument("1")
doc1.AddField(document.NewTextField("name", []byte("salad")))
doc1.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
i.Update(doc1)
count = i.DocCount()
if count != 2 {
t.Errorf("expected document count to be 2, was: %d", count)
}
// perform the original search again, should NOT find anything this time
tfr, err = i.TermFieldReader([]byte("marty"), "name")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
nomatch, err = tfr.Next()
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if nomatch != nil {
t.Errorf("expected no matches, found one")
t.Logf("%v", i)
}
// delete a doc, ensure the count is 1
err = i.Delete("2")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
count = i.DocCount()
if count != 1 {
t.Errorf("expected document count to be 1, was: %d", count)
}
expectedMatch = &index.TermFieldDoc{
ID: "1",
Freq: 1,
Norm: 0.5773502691896258,
Vectors: []*index.TermFieldVector{
&index.TermFieldVector{
Field: "desc",
Pos: 3,
Start: 9,
End: 13,
},
},
}
tfr, err = i.TermFieldReader([]byte("rice"), "desc")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
match, err = tfr.Next()
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(expectedMatch, match) {
t.Errorf("got %#v, expected %#v", match, expectedMatch)
}
}

101
index/upside_down/reader.go Normal file
View File

@ -0,0 +1,101 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (
"bytes"
"github.com/jmhodges/levigo"
"github.com/couchbaselabs/bleve/index"
)
type UpsideDownCouchTermFieldReader struct {
index *UpsideDownCouch
iterator *levigo.Iterator
count uint64
term []byte
field uint16
}
func newUpsideDownCouchTermFieldReader(index *UpsideDownCouch, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) {
ro := defaultReadOptions()
it := index.db.NewIterator(ro)
tfr := NewTermFrequencyRow(term, field, "", 0, 0)
it.Seek(tfr.Key())
var count uint64 = 0
if it.Valid() {
if bytes.Equal(it.Key(), tfr.Key()) {
tfr = ParseFromKeyValue(it.Key(), it.Value()).(*TermFrequencyRow)
count = tfr.freq
}
} else {
return nil, it.GetError()
}
return &UpsideDownCouchTermFieldReader{
index: index,
iterator: it,
count: count,
term: term,
field: field,
}, nil
}
func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
return r.count
}
func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) {
r.iterator.Next()
if r.iterator.Valid() {
tfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
if !bytes.HasPrefix(r.iterator.Key(), tfr.Key()) {
// end of the line
return nil, nil
}
tfr = ParseFromKeyValue(r.iterator.Key(), r.iterator.Value()).(*TermFrequencyRow)
return &index.TermFieldDoc{
ID: string(tfr.doc),
Freq: tfr.freq,
Norm: float64(tfr.norm),
Vectors: r.index.termFieldVectorsFromTermVectors(tfr.vectors),
}, nil
} else {
return nil, r.iterator.GetError()
}
}
func (r *UpsideDownCouchTermFieldReader) Advance(docId string) (*index.TermFieldDoc, error) {
tfr := NewTermFrequencyRow(r.term, r.field, docId, 0, 0)
r.iterator.Seek(tfr.Key())
if r.iterator.Valid() {
tfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
if !bytes.HasPrefix(r.iterator.Key(), tfr.Key()) {
// end of the line
return nil, nil
}
tfr = ParseFromKeyValue(r.iterator.Key(), r.iterator.Value()).(*TermFrequencyRow)
return &index.TermFieldDoc{
ID: string(tfr.doc),
Freq: tfr.freq,
Norm: float64(tfr.norm),
Vectors: r.index.termFieldVectorsFromTermVectors(tfr.vectors),
}, nil
} else {
return nil, r.iterator.GetError()
}
}
func (r *UpsideDownCouchTermFieldReader) Close() {
r.iterator.Close()
}

View File

@ -0,0 +1,111 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (
"os"
"reflect"
"testing"
_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
func TestIndexReader(t *testing.T) {
defer os.RemoveAll("test")
idx := NewUpsideDownCouch("test")
err := idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer idx.Close()
var expectedCount uint64 = 0
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []byte("test test test")))
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1
// first look for a term that doesnt exist
reader, err := idx.TermFieldReader([]byte("nope"), "name")
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count := reader.Count()
if count != 0 {
t.Errorf("Expected doc count to be: %d got: %d", 0, count)
}
reader.Close()
reader, err = idx.TermFieldReader([]byte("test"), "name")
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
defer reader.Close()
expectedCount = 2
count = reader.Count()
if count != expectedCount {
t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count)
}
var match *index.TermFieldDoc
var actualCount uint64
match, err = reader.Next()
for err == nil && match != nil {
match, err = reader.Next()
if err != nil {
t.Errorf("unexpected error reading next")
}
actualCount += 1
}
if actualCount != count {
t.Errorf("count was 2, but only saw %d", actualCount)
}
expectedMatch := &index.TermFieldDoc{
ID: "2",
Freq: 1,
Norm: 0.5773502588272095,
Vectors: []*index.TermFieldVector{
&index.TermFieldVector{
Field: "desc",
Pos: 3,
Start: 9,
End: 13,
},
},
}
tfr, err := idx.TermFieldReader([]byte("rice"), "desc")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
match, err = tfr.Next()
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(expectedMatch, match) {
t.Errorf("got %#v, expected %#v", match, expectedMatch)
}
}

412
index/upside_down/row.go Normal file
View File

@ -0,0 +1,412 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (
"bytes"
"encoding/binary"
"fmt"
"io"
)
const BYTE_SEPARATOR byte = 0xff
type UpsideDownCouchRowStream chan UpsideDownCouchRow
type UpsideDownCouchRow interface {
Key() []byte
Value() []byte
}
func ParseFromKeyValue(key, value []byte) UpsideDownCouchRow {
switch key[0] {
case 'v':
return NewVersionRowKV(key, value)
case 'f':
return NewFieldRowKV(key, value)
case 't':
return NewTermFrequencyRowKV(key, value)
case 'b':
return NewBackIndexRowKV(key, value)
}
return nil
}
// VERSION
type VersionRow struct {
version uint8
}
func (v *VersionRow) Key() []byte {
return []byte{'v'}
}
func (v *VersionRow) Value() []byte {
buf := new(bytes.Buffer)
err := binary.Write(buf, binary.LittleEndian, v.version)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
return buf.Bytes()
}
func (v *VersionRow) String() string {
return fmt.Sprintf("Version: %d", v.version)
}
func NewVersionRow(version uint8) *VersionRow {
return &VersionRow{
version: version,
}
}
func NewVersionRowKV(key, value []byte) *VersionRow {
rv := VersionRow{}
buf := bytes.NewBuffer(value)
err := binary.Read(buf, binary.LittleEndian, &rv.version)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
return &rv
}
// FIELD definition
type FieldRow struct {
index uint16
name string
}
func (f *FieldRow) Key() []byte {
buf := new(bytes.Buffer)
err := buf.WriteByte('f')
if err != nil {
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, f.index)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
return buf.Bytes()
}
func (f *FieldRow) Value() []byte {
buf := new(bytes.Buffer)
_, err := buf.WriteString(f.name)
if err != nil {
panic(fmt.Sprintf("Buffer.WriteString failed: %v", err))
}
err = buf.WriteByte(BYTE_SEPARATOR)
if err != nil {
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
}
return buf.Bytes()
}
func (f *FieldRow) String() string {
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
}
func NewFieldRow(index uint16, name string) *FieldRow {
return &FieldRow{
index: index,
name: name,
}
}
func NewFieldRowKV(key, value []byte) *FieldRow {
rv := FieldRow{}
buf := bytes.NewBuffer(key)
buf.ReadByte() // type
err := binary.Read(buf, binary.LittleEndian, &rv.index)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
buf = bytes.NewBuffer(value)
rv.name, err = buf.ReadString(BYTE_SEPARATOR)
if err != nil {
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
}
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
return &rv
}
// TERM FIELD FREQUENCY
type TermVector struct {
field uint16
pos uint64
start uint64
end uint64
}
func (tv *TermVector) String() string {
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d", tv.field, tv.pos, tv.start, tv.end)
}
type TermFrequencyRow struct {
term []byte
field uint16
doc []byte
freq uint64
norm float32
vectors []*TermVector
}
func (tfr *TermFrequencyRow) Key() []byte {
buf := new(bytes.Buffer)
err := buf.WriteByte('t')
if err != nil {
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
}
_, err = buf.Write(tfr.term)
if err != nil {
panic(fmt.Sprintf("Buffer.Write failed: %v", err))
}
err = buf.WriteByte(BYTE_SEPARATOR)
if err != nil {
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, tfr.field)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
_, err = buf.Write(tfr.doc)
if err != nil {
panic(fmt.Sprintf("Buffer.Write failed: %v", err))
}
return buf.Bytes()
}
func (tfr *TermFrequencyRow) Value() []byte {
buf := new(bytes.Buffer)
err := binary.Write(buf, binary.LittleEndian, tfr.freq)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, tfr.norm)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
for _, vector := range tfr.vectors {
err = binary.Write(buf, binary.LittleEndian, vector.field)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, vector.pos)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, vector.start)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, vector.end)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
}
return buf.Bytes()
}
func (tfr *TermFrequencyRow) String() string {
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
}
func NewTermFrequencyRow(term []byte, field uint16, doc string, freq uint64, norm float32) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: []byte(doc),
freq: freq,
norm: norm,
}
}
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, doc string, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: []byte(doc),
freq: freq,
norm: norm,
vectors: vectors,
}
}
func NewTermFrequencyRowKV(key, value []byte) *TermFrequencyRow {
rv := TermFrequencyRow{
doc: []byte(""),
}
buf := bytes.NewBuffer(key)
buf.ReadByte() // type
var err error
rv.term, err = buf.ReadBytes(BYTE_SEPARATOR)
if err != nil {
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
}
rv.term = rv.term[:len(rv.term)-1] // trim off separator byte
err = binary.Read(buf, binary.LittleEndian, &rv.field)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
doc, err := buf.ReadBytes(BYTE_SEPARATOR)
if err != io.EOF {
panic(fmt.Sprintf("expected binary.ReadString to end in EOF: %v", err))
}
if doc != nil {
rv.doc = doc
}
buf = bytes.NewBuffer((value))
err = binary.Read(buf, binary.LittleEndian, &rv.freq)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
err = binary.Read(buf, binary.LittleEndian, &rv.norm)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
var field uint16
err = binary.Read(buf, binary.LittleEndian, &field)
if err != nil && err != io.EOF {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
for err != io.EOF {
tv := TermVector{}
tv.field = field
// at this point we expect at least one term vector
if rv.vectors == nil {
rv.vectors = make([]*TermVector, 0)
}
err = binary.Read(buf, binary.LittleEndian, &tv.pos)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
err = binary.Read(buf, binary.LittleEndian, &tv.start)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
err = binary.Read(buf, binary.LittleEndian, &tv.end)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
rv.vectors = append(rv.vectors, &tv)
// try to read next record (may not exist)
err = binary.Read(buf, binary.LittleEndian, &field)
}
return &rv
}
type BackIndexEntry struct {
term []byte
field uint16
}
func (bie *BackIndexEntry) String() string {
return fmt.Sprintf("Term: `%s` Field: %d", string(bie.term), bie.field)
}
type BackIndexRow struct {
doc []byte
entries []*BackIndexEntry
}
func (br *BackIndexRow) Key() []byte {
buf := new(bytes.Buffer)
err := buf.WriteByte('b')
if err != nil {
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, br.doc)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
return buf.Bytes()
}
func (br *BackIndexRow) Value() []byte {
buf := new(bytes.Buffer)
for _, e := range br.entries {
_, err := buf.Write(e.term)
if err != nil {
panic(fmt.Sprintf("Buffer.Write failed: %v", err))
}
err = buf.WriteByte(BYTE_SEPARATOR)
if err != nil {
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
}
err = binary.Write(buf, binary.LittleEndian, e.field)
if err != nil {
panic(fmt.Sprintf("binary.Write failed: %v", err))
}
}
return buf.Bytes()
}
func (br *BackIndexRow) String() string {
return fmt.Sprintf("Backindex DocId: `%s` Entries: %v", string(br.doc), br.entries)
}
func NewBackIndexRow(doc string, entries []*BackIndexEntry) *BackIndexRow {
return &BackIndexRow{
doc: []byte(doc),
entries: entries,
}
}
func NewBackIndexRowKV(key, value []byte) *BackIndexRow {
rv := BackIndexRow{}
buf := bytes.NewBuffer(key)
buf.ReadByte() // type
var err error
rv.doc, err = buf.ReadBytes(BYTE_SEPARATOR)
if err != io.EOF {
panic(fmt.Sprintf("expected binary.ReadString to end in EOF: %v", err))
}
buf = bytes.NewBuffer(value)
rv.entries = make([]*BackIndexEntry, 0)
var term []byte
term, err = buf.ReadBytes(BYTE_SEPARATOR)
if err != nil && err != io.EOF {
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
}
for err != io.EOF {
ent := BackIndexEntry{}
ent.term = term[:len(term)-1] // trim off separator byte
err = binary.Read(buf, binary.LittleEndian, &ent.field)
if err != nil {
panic(fmt.Sprintf("binary.Read failed: %v", err))
}
rv.entries = append(rv.entries, &ent)
term, err = buf.ReadBytes(BYTE_SEPARATOR)
if err != nil && err != io.EOF {
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
}
}
return &rv
}

View File

@ -0,0 +1,89 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (
"reflect"
"testing"
)
func TestRows(t *testing.T) {
tests := []struct {
input UpsideDownCouchRow
outKey []byte
outVal []byte
}{
{
NewVersionRow(1),
[]byte{'v'},
[]byte{0x1},
},
{
NewFieldRow(0, "name"),
[]byte{'f', 0, 0},
[]byte{'n', 'a', 'm', 'e', BYTE_SEPARATOR},
},
{
NewFieldRow(1, "desc"),
[]byte{'f', 1, 0},
[]byte{'d', 'e', 's', 'c', BYTE_SEPARATOR},
},
{
NewFieldRow(513, "style"),
[]byte{'f', 1, 2},
[]byte{'s', 't', 'y', 'l', 'e', BYTE_SEPARATOR},
},
{
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "", 3, 3.14),
[]byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0},
[]byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64},
},
{
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14),
[]byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64},
},
{
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}),
[]byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0},
},
{
NewBackIndexRow("budweiser", []*BackIndexEntry{&BackIndexEntry{[]byte{'b', 'e', 'e', 'r'}, 0}}),
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0},
},
{
NewBackIndexRow("budweiser", []*BackIndexEntry{&BackIndexEntry{[]byte{'b', 'e', 'e', 'r'}, 0}, &BackIndexEntry{[]byte{'b', 'e', 'a', 't'}, 1}}),
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'e', 'a', 't', BYTE_SEPARATOR, 1, 0},
},
}
// test going from struct to k/v bytes
for _, test := range tests {
rk := test.input.Key()
if !reflect.DeepEqual(rk, test.outKey) {
t.Errorf("Expected key to be %v got: %v", test.outKey, rk)
}
rv := test.input.Value()
if !reflect.DeepEqual(rv, test.outVal) {
t.Errorf("Expected value to be %v got: %v", test.outVal, rv)
}
}
// now test going back from k/v bytes to struct
for _, test := range tests {
row := ParseFromKeyValue(test.outKey, test.outVal)
if !reflect.DeepEqual(row, test.input) {
t.Fatalf("Expected: %#v got: %#v", test.input, row)
}
}
}

View File

@ -0,0 +1,466 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (
"bytes"
"fmt"
"log"
"math"
"github.com/couchbaselabs/bleve/analysis"
"github.com/jmhodges/levigo"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
)
var VERSION_KEY []byte = []byte{'v'}
const VERSION uint8 = 1
type UpsideDownCouch struct {
version uint8
path string
opts *levigo.Options
db *levigo.DB
fieldIndexes map[string]uint16
lastFieldIndex int
analyzer map[string]*analysis.Analyzer
docCount uint64
}
func NewUpsideDownCouch(path string) *UpsideDownCouch {
opts := levigo.NewOptions()
opts.SetCreateIfMissing(true)
return &UpsideDownCouch{
version: VERSION,
path: path,
opts: opts,
analyzer: make(map[string]*analysis.Analyzer),
fieldIndexes: make(map[string]uint16),
}
}
func (udc *UpsideDownCouch) init() (err error) {
// prepare a list of rows
rows := make([]UpsideDownCouchRow, 0)
// version marker
rows = append(rows, NewVersionRow(udc.version))
return udc.batchRows(nil, rows, nil)
}
func (udc *UpsideDownCouch) loadSchema() (err error) {
// schema := make([]*index.Field, 0)
ro := defaultReadOptions()
it := udc.db.NewIterator(ro)
defer it.Close()
keyPrefix := []byte{'f'}
it.Seek(keyPrefix)
for it = it; it.Valid(); it.Next() {
// stop when
if !bytes.HasPrefix(it.Key(), keyPrefix) {
break
}
fieldRow := NewFieldRowKV(it.Key(), it.Value())
udc.fieldIndexes[fieldRow.name] = fieldRow.index
if int(fieldRow.index) > udc.lastFieldIndex {
udc.lastFieldIndex = int(fieldRow.index)
}
}
err = it.GetError()
if err != nil {
return
}
return
}
func (udc *UpsideDownCouch) batchRows(addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) {
ro := defaultReadOptions()
// prepare batch
wb := levigo.NewWriteBatch()
// add
for _, row := range addRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to increment counter
tr := NewTermFrequencyRow(tfr.term, tfr.field, "", 0, 0)
val, err := udc.db.Get(ro, tr.Key())
if err != nil {
return err
}
if val != nil {
tr = ParseFromKeyValue(tr.Key(), val).(*TermFrequencyRow)
tr.freq += 1 // incr
} else {
tr = NewTermFrequencyRow(tfr.term, tfr.field, "", 1, 0)
}
// now add this to the batch
wb.Put(tr.Key(), tr.Value())
}
wb.Put(row.Key(), row.Value())
}
// update
for _, row := range updateRows {
wb.Put(row.Key(), row.Value())
}
// delete
for _, row := range deleteRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to decrement counter
tr := NewTermFrequencyRow(tfr.term, tfr.field, "", 0, 0)
val, err := udc.db.Get(ro, tr.Key())
if err != nil {
return err
}
if val != nil {
tr = ParseFromKeyValue(tr.Key(), val).(*TermFrequencyRow)
tr.freq -= 1 // incr
} else {
log.Panic(fmt.Sprintf("unexpected missing row, deleting term, expected count row to exit: %v", tr.Key()))
}
if tr.freq == 0 {
wb.Delete(tr.Key())
} else {
// now add this to the batch
wb.Put(tr.Key(), tr.Value())
}
}
wb.Delete(row.Key())
}
// write out the batch
wo := defaultWriteOptions()
err = udc.db.Write(wo, wb)
return
}
func (udc *UpsideDownCouch) DocCount() uint64 {
return udc.docCount
}
func (udc *UpsideDownCouch) Open() (err error) {
udc.db, err = levigo.Open(udc.path, udc.opts)
if err != nil {
return
}
ro := defaultReadOptions()
var value []byte
value, err = udc.db.Get(ro, VERSION_KEY)
if err != nil {
return
}
// init new index OR load schema
if value == nil {
err = udc.init()
if err != nil {
return
}
} else {
err = udc.loadSchema()
if err != nil {
return
}
}
// set doc count
udc.docCount = udc.countDocs()
return
}
func (udc *UpsideDownCouch) countDocs() uint64 {
ro := defaultReadOptions()
ro.SetFillCache(false) // dont fill the cache with this
it := udc.db.NewIterator(ro)
defer it.Close()
// begining of back index
it.Seek([]byte{'b'})
var rv uint64 = 0
for it = it; it.Valid(); it.Next() {
if !bytes.HasPrefix(it.Key(), []byte{'b'}) {
break
}
rv += 1
}
return rv
}
func (udc *UpsideDownCouch) rowCount() uint64 {
ro := defaultReadOptions()
ro.SetFillCache(false) // dont fill the cache with this
it := udc.db.NewIterator(ro)
defer it.Close()
it.Seek([]byte{0})
var rv uint64 = 0
for it = it; it.Valid(); it.Next() {
rv += 1
}
return rv
}
func (udc *UpsideDownCouch) Close() {
udc.db.Close()
}
func (udc *UpsideDownCouch) Update(doc *document.Document) error {
// first we lookup the backindex row for the doc id if it exists
// lookup the back index row
backIndexRow, err := udc.backIndexRowForDoc(doc.ID)
if err != nil {
return err
}
var isAdd = true
// a map for each field, map key is term (string) bool true for existence
// FIMXE hard-coded to max of 256 fields
existingTermFieldMaps := make([]map[string]bool, 256)
if backIndexRow != nil {
isAdd = false
for _, entry := range backIndexRow.entries {
existingTermFieldMap := existingTermFieldMaps[entry.field]
if existingTermFieldMap == nil {
existingTermFieldMap = make(map[string]bool, 0)
existingTermFieldMaps[entry.field] = existingTermFieldMap
}
existingTermFieldMap[string(entry.term)] = true
}
}
// prepare a list of rows
updateRows := make([]UpsideDownCouchRow, 0)
addRows := make([]UpsideDownCouchRow, 0)
// track our back index entries
backIndexEntries := make([]*BackIndexEntry, 0)
for _, field := range doc.Fields {
fieldIndex, fieldExists := udc.fieldIndexes[field.Name]
if !fieldExists {
// assign next field id
fieldIndex = uint16(udc.lastFieldIndex + 1)
udc.fieldIndexes[field.Name] = fieldIndex
// ensure this batch adds a row for this field
row := NewFieldRow(uint16(fieldIndex), field.Name)
updateRows = append(updateRows, row)
udc.lastFieldIndex = int(fieldIndex)
}
existingTermFieldMap := existingTermFieldMaps[fieldIndex]
analyzer := field.Analyzer
tokens := analyzer.Analyze(field.Value)
fieldLength := len(tokens) // number of tokens in this doc field
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
tokenFreqs := analysis.TokenFrequency(tokens)
for _, tf := range tokenFreqs {
var termFreqRow *TermFrequencyRow
if document.IncludeTermVectors(field.IndexingOptions) {
tv := termVectorsFromTokenFreq(uint16(fieldIndex), tf)
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, uint16(fieldIndex), doc.ID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
} else {
termFreqRow = NewTermFrequencyRow(tf.Term, uint16(fieldIndex), doc.ID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
}
// record the back index entry
backIndexEntry := BackIndexEntry{tf.Term, uint16(fieldIndex)}
backIndexEntries = append(backIndexEntries, &backIndexEntry)
// remove the entry from the map of existing term fields if it exists
if existingTermFieldMap != nil {
termString := string(tf.Term)
_, ok := existingTermFieldMap[termString]
if ok {
// this is an update
updateRows = append(updateRows, termFreqRow)
// this term existed last time, delete it from that map
delete(existingTermFieldMap, termString)
} else {
// this is an add
addRows = append(addRows, termFreqRow)
}
} else {
// this is an add
addRows = append(addRows, termFreqRow)
}
}
}
// build the back index row
backIndexRow = NewBackIndexRow(doc.ID, backIndexEntries)
updateRows = append(updateRows, backIndexRow)
// any of the existing rows that weren't updated need to be deleted
deleteRows := make([]UpsideDownCouchRow, 0)
for fieldIndex, existingTermFieldMap := range existingTermFieldMaps {
if existingTermFieldMap != nil {
for termString, _ := range existingTermFieldMap {
termFreqRow := NewTermFrequencyRow([]byte(termString), uint16(fieldIndex), doc.ID, 0, 0)
deleteRows = append(deleteRows, termFreqRow)
}
}
}
err = udc.batchRows(addRows, updateRows, deleteRows)
if err == nil && isAdd {
udc.docCount += 1
}
return err
}
func (udc *UpsideDownCouch) Delete(id string) error {
// lookup the back index row
backIndexRow, err := udc.backIndexRowForDoc(id)
if err != nil {
return err
}
if backIndexRow == nil {
return nil
}
// prepare a list of rows to delete
rows := make([]UpsideDownCouchRow, 0)
for _, backIndexEntry := range backIndexRow.entries {
tfr := NewTermFrequencyRow(backIndexEntry.term, backIndexEntry.field, id, 0, 0)
rows = append(rows, tfr)
}
// also delete the back entry itself
rows = append(rows, backIndexRow)
err = udc.batchRows(nil, nil, rows)
if err == nil {
udc.docCount -= 1
}
return err
}
func (udc *UpsideDownCouch) backIndexRowForDoc(docId string) (*BackIndexRow, error) {
ro := defaultReadOptions()
// use a temporary row structure to build key
tempRow := &BackIndexRow{
doc: []byte(docId),
}
key := tempRow.Key()
value, err := udc.db.Get(ro, key)
if err != nil {
return nil, err
}
if value == nil {
return nil, nil
}
backIndexRow := ParseFromKeyValue(key, value).(*BackIndexRow)
return backIndexRow, nil
}
func (udc *UpsideDownCouch) Dump() {
ro := defaultReadOptions()
ro.SetFillCache(false)
it := udc.db.NewIterator(ro)
defer it.Close()
it.SeekToFirst()
for it = it; it.Valid(); it.Next() {
//fmt.Printf("Key: `%v` Value: `%v`\n", string(it.Key()), string(it.Value()))
row := ParseFromKeyValue(it.Key(), it.Value())
if row != nil {
fmt.Printf("%v\n", row)
fmt.Printf("Key: % -100x\nValue: % -100x\n\n", it.Key(), it.Value())
}
}
err := it.GetError()
if err != nil {
fmt.Printf("Error reading iterator: %v", err)
}
}
func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (index.TermFieldReader, error) {
fieldIndex, fieldExists := udc.fieldIndexes[fieldName]
if fieldExists {
return newUpsideDownCouchTermFieldReader(udc, term, uint16(fieldIndex))
}
log.Printf("fields: %v", udc.fieldIndexes)
return nil, fmt.Errorf("No field named `%s` in the schema", fieldName)
}
func defaultWriteOptions() *levigo.WriteOptions {
wo := levigo.NewWriteOptions()
// request fsync on write for safety
wo.SetSync(true)
return wo
}
func defaultReadOptions() *levigo.ReadOptions {
ro := levigo.NewReadOptions()
return ro
}
func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
return len(tf.Locations)
}
func termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) []*TermVector {
rv := make([]*TermVector, len(tf.Locations))
for i, l := range tf.Locations {
tv := TermVector{
field: field,
pos: uint64(l.Position),
start: uint64(l.Start),
end: uint64(l.End),
}
rv[i] = &tv
}
return rv
}
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
rv := make([]*index.TermFieldVector, len(in))
for i, tv := range in {
fieldName := udc.fieldIndexToName(tv.field)
tfv := index.TermFieldVector{
Field: fieldName,
Pos: tv.pos,
Start: tv.start,
End: tv.end,
}
rv[i] = &tfv
}
return rv
}
func (udc *UpsideDownCouch) fieldIndexToName(i uint16) string {
for fieldName, fieldIndex := range udc.fieldIndexes {
if i == fieldIndex {
return fieldName
}
}
return ""
}

View File

@ -0,0 +1,221 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down
import (
"os"
"testing"
_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
"github.com/couchbaselabs/bleve/document"
)
func TestIndexOpenReopen(t *testing.T) {
defer os.RemoveAll("test")
idx := NewUpsideDownCouch("test")
err := idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
var expectedCount uint64 = 0
docCount := idx.DocCount()
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
// opening database should have inserted version
expectedLength := uint64(1)
rowCount := idx.rowCount()
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
// now close it
idx.Close()
idx = NewUpsideDownCouch("test")
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
// now close it
idx.Close()
}
func TestIndexInsert(t *testing.T) {
defer os.RemoveAll("test")
idx := NewUpsideDownCouch("test")
err := idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer idx.Close()
var expectedCount uint64 = 0
docCount := idx.DocCount()
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1
docCount = idx.DocCount()
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
// should have 4 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the term count, and 1 for the back index entry)
expectedLength := uint64(1 + 1 + 1 + 1 + 1)
rowCount := idx.rowCount()
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}
func TestIndexInsertThenDelete(t *testing.T) {
defer os.RemoveAll("test")
idx := NewUpsideDownCouch("test")
err := idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer idx.Close()
var expectedCount uint64 = 0
docCount := idx.DocCount()
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1
docCount = idx.DocCount()
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = idx.Delete("1")
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
expectedCount -= 1
docCount = idx.DocCount()
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
// should have 2 row (1 for version, 1 for schema field)
expectedLength := uint64(1 + 1)
rowCount := idx.rowCount()
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}
func TestIndexInsertThenUpdate(t *testing.T) {
defer os.RemoveAll("test")
idx := NewUpsideDownCouch("test")
err := idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer idx.Close()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
// this update should overwrite one term, and introduce one new one
doc = document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test fail")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
// should have 2 row (1 for version, 1 for schema field, and 2 for the two term, and 2 for the term counts, and 1 for the back index entry)
expectedLength := uint64(1 + 1 + 2 + 2 + 1)
rowCount := idx.rowCount()
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
// now do another update that should remove one of term
doc = document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("fail")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
// should have 2 row (1 for version, 1 for schema field, and 1 for the remaining term, and 1 for the term count, and 1 for the back index entry)
expectedLength = uint64(1 + 1 + 1 + 1 + 1)
rowCount = idx.rowCount()
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}
func TestIndexInsertMultiple(t *testing.T) {
defer os.RemoveAll("test")
idx := NewUpsideDownCouch("test")
err := idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer idx.Close()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
// should have 4 rows (1 for version, 1 for schema field, and 2 for single term, and 1 for the term count, and 2 for the back index entries)
expectedLength := uint64(1 + 1 + 2 + 1 + 2)
rowCount := idx.rowCount()
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}

21
search/collector.go Normal file
View File

@ -0,0 +1,21 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"time"
)
type Collector interface {
Collect(searcher Searcher) error
Results() DocumentMatchCollection
Total() uint64
MaxScore() float64
Took() time.Duration
}

View File

@ -0,0 +1,96 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"container/list"
"time"
)
type TopScoreCollector struct {
k int
results *list.List
took time.Duration
maxScore float64
total uint64
}
func NewTopScorerCollector(k int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
results: list.New(),
}
}
func (tksc *TopScoreCollector) Total() uint64 {
return tksc.total
}
func (tksc *TopScoreCollector) MaxScore() float64 {
return tksc.maxScore
}
func (tksc *TopScoreCollector) Took() time.Duration {
return tksc.took
}
func (tksc *TopScoreCollector) Collect(searcher Searcher) error {
startTime := time.Now()
next, err := searcher.Next()
for err == nil && next != nil {
tksc.collectSingle(next)
next, err = searcher.Next()
}
// compute search duration
tksc.took = time.Since(startTime)
if err != nil {
return err
}
return nil
}
func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) {
// increment total hits
tksc.total += 1
// update max score
if dm.Score > tksc.maxScore {
tksc.maxScore = dm.Score
}
for e := tksc.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*DocumentMatch)
if dm.Score < curr.Score {
tksc.results.InsertBefore(dm, e)
// if we just made the list too long
if tksc.results.Len() > tksc.k {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
return
}
}
// if we got to the end, we still have to add it
tksc.results.PushBack(dm)
if tksc.results.Len() > tksc.k {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
}
func (tksc *TopScoreCollector) Results() DocumentMatchCollection {
rv := make(DocumentMatchCollection, tksc.results.Len())
i := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
rv[i] = e.Value.(*DocumentMatch)
i++
}
return rv
}

View File

@ -0,0 +1,107 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"testing"
)
func TestTop10Scores(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: DocumentMatchCollection{
&DocumentMatch{
ID: "a",
Score: 11,
},
&DocumentMatch{
ID: "b",
Score: 9,
},
&DocumentMatch{
ID: "c",
Score: 11,
},
&DocumentMatch{
ID: "d",
Score: 9,
},
&DocumentMatch{
ID: "e",
Score: 11,
},
&DocumentMatch{
ID: "f",
Score: 9,
},
&DocumentMatch{
ID: "g",
Score: 11,
},
&DocumentMatch{
ID: "h",
Score: 9,
},
&DocumentMatch{
ID: "i",
Score: 11,
},
&DocumentMatch{
ID: "j",
Score: 11,
},
&DocumentMatch{
ID: "k",
Score: 11,
},
&DocumentMatch{
ID: "l",
Score: 99,
},
&DocumentMatch{
ID: "m",
Score: 11,
},
&DocumentMatch{
ID: "n",
Score: 11,
},
},
}
collector := NewTopScorerCollector(10)
collector.Collect(searcher)
results := collector.Results()
if len(results) != 10 {
t.Fatalf("expected 10 results, got %d", len(results))
}
if results[0].ID != "l" {
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
}
if results[0].Score != 99.0 {
t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
}
minScore := 1000.0
for _, result := range results {
if result.Score < minScore {
minScore = result.Score
}
}
if minScore < 10 {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}

28
search/explanation.go Normal file
View File

@ -0,0 +1,28 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"encoding/json"
"fmt"
)
type Explanation struct {
Value float64 `json:"value"`
Message string `json:"message"`
Children []*Explanation `json:"children,omitempty"`
}
func (expl *Explanation) String() string {
js, err := json.MarshalIndent(expl, "", " ")
if err != nil {
return fmt.Sprintf("error serializing explation to json: %v", err)
}
return string(js)
}

19
search/query.go Normal file
View File

@ -0,0 +1,19 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type Query interface {
Boost() float64
Searcher(index index.Index) (Searcher, error)
Validate() error
}

32
search/query_term.go Normal file
View File

@ -0,0 +1,32 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type TermQuery struct {
Term string `json:"term"`
Field string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Explain bool `json:"explain,omitempty"`
}
func (q *TermQuery) Boost() float64 {
return q.BoostVal
}
func (q *TermQuery) Searcher(index index.Index) (Searcher, error) {
return NewTermSearcher(index, q)
}
func (q *TermQuery) Validate() error {
return nil
}

172
search/scorer_term.go Normal file
View File

@ -0,0 +1,172 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"fmt"
"math"
"github.com/couchbaselabs/bleve/index"
)
const MAX_SCORE_CACHE = 64
type TermQueryScorer struct {
query *TermQuery
docTerm uint64
docTotal uint64
idf float64
explain bool
idfExplanation *Explanation
scoreCache map[int]float64
scoreExplanationCache map[int]*Explanation
queryNorm float64
queryWeight float64
queryWeightExplanation *Explanation
}
func NewTermQueryScorer(query *TermQuery, docTotal, docTerm uint64, explain bool) *TermQueryScorer {
rv := TermQueryScorer{
query: query,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
explain: explain,
scoreCache: make(map[int]float64, MAX_SCORE_CACHE),
scoreExplanationCache: make(map[int]*Explanation, MAX_SCORE_CACHE),
queryWeight: 1.0,
}
if explain {
rv.idfExplanation = &Explanation{
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
}
}
return &rv
}
func (s *TermQueryScorer) Weight() float64 {
sum := s.query.Boost() * s.idf
return sum * sum
}
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.query.Boost() * s.idf * s.queryNorm
if s.explain {
childrenExplanations := make([]*Explanation, 3)
childrenExplanations[0] = &Explanation{
Value: s.query.Boost(),
Message: "boost",
}
childrenExplanations[1] = s.idfExplanation
childrenExplanations[2] = &Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.query.Field, string(s.query.Term), s.query.Boost()),
Children: childrenExplanations,
}
}
}
func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *DocumentMatch {
var scoreExplanation *Explanation
// see if the score was cached
score, ok := s.scoreCache[int(termMatch.Freq)]
if !ok {
// need to compute score
var tf float64
if termMatch.Freq < MAX_SQRT_CACHE {
tf = SQRT_CACHE[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score = tf * termMatch.Norm * s.idf
if s.explain {
childrenExplanations := make([]*Explanation, 3)
childrenExplanations[0] = &Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.query.Field, string(s.query.Term), termMatch.Freq),
}
childrenExplanations[1] = &Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.query.Field, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.query.Field, string(s.query.Term), termMatch.ID),
Children: childrenExplanations,
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
childExplanations := make([]*Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.query.Field, string(s.query.Term), s.query.Boost(), termMatch.ID),
Children: childExplanations,
}
}
}
if termMatch.Freq < MAX_SCORE_CACHE {
s.scoreCache[int(termMatch.Freq)] = score
if s.explain {
s.scoreExplanationCache[int(termMatch.Freq)] = scoreExplanation
}
}
}
if ok && s.explain {
scoreExplanation = s.scoreExplanationCache[int(termMatch.Freq)]
}
rv := DocumentMatch{
ID: termMatch.ID,
Score: score,
}
if s.explain {
rv.Expl = scoreExplanation
}
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
locations := make(Locations, len(termMatch.Vectors))
for i, v := range termMatch.Vectors {
loc := Location{
Pos: float64(v.Pos),
Start: float64(v.Start),
End: float64(v.End),
}
locations[i] = &loc
}
tlm := make(TermLocationMap)
tlm[s.query.Term] = locations
rv.Locations = make(FieldTermLocationMap)
rv.Locations[s.query.Field] = tlm
}
return &rv
}

39
search/search.go Normal file
View File

@ -0,0 +1,39 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
type Location struct {
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
}
type Locations []*Location
type TermLocationMap map[string]Locations
type FieldTermLocationMap map[string]TermLocationMap
type DocumentMatch struct {
ID string `json:"id"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
}
type DocumentMatchCollection []*DocumentMatch
type Searcher interface {
Next() (*DocumentMatch, error)
Advance(ID string) (*DocumentMatch, error)
Close()
Weight() float64
SetQueryNorm(float64)
Count() uint64
}

84
search/search_term.go Normal file
View File

@ -0,0 +1,84 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type TermSearcher struct {
index index.Index
query *TermQuery
reader index.TermFieldReader
scorer *TermQueryScorer
}
func NewTermSearcher(index index.Index, query *TermQuery) (*TermSearcher, error) {
reader, err := index.TermFieldReader([]byte(query.Term), query.Field)
if err != nil {
return nil, err
}
scorer := NewTermQueryScorer(query, index.DocCount(), reader.Count(), query.Explain)
return &TermSearcher{
index: index,
query: query,
reader: reader,
scorer: scorer,
}, nil
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
func (s *TermSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *TermSearcher) Next() (*DocumentMatch, error) {
termMatch, err := s.reader.Next()
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Advance(ID string) (*DocumentMatch, error) {
termMatch, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Close() {
s.reader.Close()
}

50
search/search_test.go Normal file
View File

@ -0,0 +1,50 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
type stubSearcher struct {
index int
matches DocumentMatchCollection
}
func (ss *stubSearcher) Next() (*DocumentMatch, error) {
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Advance(ID string) (*DocumentMatch, error) {
for ss.index < len(ss.matches) && ss.matches[ss.index].ID < ID {
ss.index++
}
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Close() {
}
func (ss *stubSearcher) Weight() float64 {
return 0.0
}
func (ss *stubSearcher) SetQueryNorm(float64) {
}
func (ss *stubSearcher) Count() uint64 {
return uint64(len(ss.matches))
}

24
search/sqrt_cache.go Normal file
View File

@ -0,0 +1,24 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"math"
)
var SQRT_CACHE map[int]float64
const MAX_SQRT_CACHE = 64
func init() {
SQRT_CACHE = make(map[int]float64, MAX_SQRT_CACHE)
for i := 0; i < MAX_SQRT_CACHE; i++ {
SQRT_CACHE[i] = math.Sqrt(float64(i))
}
}

64
shredder/json_shredder.go Normal file
View File

@ -0,0 +1,64 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package shredder
import (
"encoding/json"
"strconv"
"github.com/couchbaselabs/bleve/document"
)
// A simple automatic JSON shredder which parses the whole document body.
// Any strings found in the JSON are added as text fields
type AutoJsonShredder struct {
}
func NewAutoJsonShredder() *AutoJsonShredder {
return &AutoJsonShredder{}
}
func (s *AutoJsonShredder) Shred(id string, body []byte) (*document.Document, error) {
rv := document.NewDocument(id)
var section interface{}
err := json.Unmarshal(body, &section)
if err != nil {
return nil, err
}
shredSection(rv, section, "")
return rv, nil
}
func shredSection(doc *document.Document, section interface{}, parent string) {
nextParent := parent
if nextParent != "" {
nextParent = nextParent + "."
}
switch section := section.(type) {
case string:
f := document.NewTextField(parent, []byte(section))
doc.AddField(f)
case []interface{}:
for i, sub := range section {
shredSection(doc, sub, nextParent+strconv.Itoa(i))
}
case map[string]interface{}:
for k, sub := range section {
shredSection(doc, sub, nextParent+k)
}
}
}

View File

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package shredder
import (
"github.com/couchbaselabs/bleve/document"
"github.com/dustin/go-jsonpointer"
)
// A simple automatic JSON shredder which parses the whole document body.
// Any strings found in the JSON are added as text fields
type JsonPointerShredder struct {
fieldPaths map[string]string
paths []string
}
func NewJsonPointerShredder() *JsonPointerShredder {
return &JsonPointerShredder{
fieldPaths: make(map[string]string),
paths: make([]string, 0),
}
}
func (s *JsonPointerShredder) AddTextField(name string, path string) {
s.fieldPaths[name] = path
s.paths = append(s.paths, path)
}
func (s *JsonPointerShredder) AddField(name string, path string) {
s.fieldPaths[name] = path
s.paths = append(s.paths, path)
}
func (s *JsonPointerShredder) Shred(id string, body []byte) (*document.Document, error) {
rv := document.NewDocument(id)
values, err := jsonpointer.FindMany(body, s.paths)
if err != nil {
return nil, err
}
for fieldName, fieldPath := range s.fieldPaths {
field := document.NewTextField(fieldName, values[fieldPath])
rv.AddField(field)
}
return rv, nil
}

17
shredder/shredder.go Normal file
View File

@ -0,0 +1,17 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package shredder
import (
"github.com/couchbaselabs/bleve/document"
)
type Shredder interface {
Shred(id string, body []byte) (document.Document, error)
}

31
utils/bleve_dump/main.go Normal file
View File

@ -0,0 +1,31 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package main
import (
"flag"
"log"
"github.com/couchbaselabs/bleve/index/upside_down"
)
var indexDir = flag.String("indexDir", "index", "index directory")
func main() {
flag.Parse()
index := upside_down.NewUpsideDownCouch(*indexDir)
err := index.Open()
if err != nil {
log.Fatal(err)
}
defer index.Close()
index.Dump()
}