0
0
Fork 0

Merge branch 'master' into documenting

This commit is contained in:
Ben Campbell 2016-03-23 10:48:09 +13:00
commit 4fafb2be3f
90 changed files with 3649 additions and 888 deletions

2
.gitignore vendored
View File

@ -4,6 +4,8 @@
.#*
.project
.settings
**/.idea/
**/*.iml
.DS_Store
/analysis/token_filters/cld2/cld2-read-only
/analysis/token_filters/cld2/libcld2_full.a

View File

@ -1,6 +1,6 @@
# ![bleve](docs/bleve.png) bleve
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)

View File

@ -1,37 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package ja
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize"
"github.com/blevesearch/bleve/registry"
)
const AnalyzerName = "ja"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
kagomeTokenizer, err := cache.TokenizerNamed(TokenizerName)
if err != nil {
return nil, err
}
normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD)
rv := analysis.Analyzer{
Tokenizer: kagomeTokenizer,
TokenFilters: []analysis.TokenFilter{
normalizeFilter,
},
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
}

View File

@ -1,69 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package ja
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
func TestJaAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
input: []byte("こんにちは世界"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こんにちは"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 15,
},
&analysis.Token{
Term: []byte("世界"),
Type: analysis.Ideographic,
Position: 2,
Start: 15,
End: 21,
},
},
},
{
input: []byte("カタカナ"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("カタカナ"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 12,
},
},
},
}
cache := registry.NewCache()
for _, test := range tests {
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}

View File

@ -1,83 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package ja
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
"github.com/ikawaha/kagome/tokenizer"
)
const TokenizerName = "kagome"
type KagomeMorphTokenizer struct {
tok tokenizer.Tokenizer
}
func init() {
_ = tokenizer.SysDic() // prepare system dictionary
}
func NewKagomeMorphTokenizer() *KagomeMorphTokenizer {
return &KagomeMorphTokenizer{
tok: tokenizer.New(),
}
}
func NewKagomeMorphTokenizerWithUserDic(userdic tokenizer.UserDic) *KagomeMorphTokenizer {
k := tokenizer.New()
k.SetUserDic(userdic)
return &KagomeMorphTokenizer{
tok: k,
}
}
func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream {
var (
morphs []tokenizer.Token
prevstart int
)
rv := make(analysis.TokenStream, 0, len(input))
if len(input) < 1 {
return rv
}
morphs = t.tok.Analyze(string(input), tokenizer.Search)
for i, m := range morphs {
if m.Surface == "EOS" || m.Surface == "BOS" {
continue
}
surfacelen := len(m.Surface)
token := &analysis.Token{
Term: []byte(m.Surface),
Position: i,
Start: prevstart,
End: prevstart + surfacelen,
Type: analysis.Ideographic,
}
prevstart = prevstart + surfacelen
rv = append(rv, token)
}
return rv
}
func KagomeMorphTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return NewKagomeMorphTokenizer(), nil
}
func init() {
registry.RegisterTokenizer(TokenizerName, KagomeMorphTokenizerConstructor)
}

View File

@ -1,54 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package ja
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/analysis"
)
func TestKagome(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("こんにちは世界"),
analysis.TokenStream{
{
Start: 0,
End: 15,
Term: []byte("こんにちは"),
Position: 1,
Type: analysis.Ideographic,
},
{
Start: 15,
End: 21,
Term: []byte("世界"),
Position: 2,
Type: analysis.Ideographic,
},
},
},
}
tokenizer := NewKagomeMorphTokenizer()
for _, test := range tests {
actuals := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actuals, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actuals, string(test.input))
}
}
}

View File

@ -88,6 +88,7 @@ import (
_ "github.com/blevesearch/bleve/index/store/boltdb"
_ "github.com/blevesearch/bleve/index/store/goleveldb"
_ "github.com/blevesearch/bleve/index/store/gtreap"
_ "github.com/blevesearch/bleve/index/store/moss"
// index types
_ "github.com/blevesearch/bleve/index/firestorm"

View File

@ -12,5 +12,5 @@
package config
import (
_ "github.com/blevesearch/bleve/analysis/language/ja"
_ "github.com/blevesearch/blevex/lang/ja"
)

View File

@ -55,3 +55,18 @@ func (d *Document) GoString() string {
}
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields)
}
func (d *Document) NumPlainTextBytes() uint64 {
rv := uint64(0)
for _, field := range d.Fields {
rv += field.NumPlainTextBytes()
}
for _, compositeField := range d.CompositeFields {
for _, field := range d.Fields {
if compositeField.includesField(field.Name()) {
rv += field.NumPlainTextBytes()
}
}
}
return rv
}

68
document/document_test.go Normal file
View File

@ -0,0 +1,68 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"testing"
)
func TestDocumentNumPlainTextBytes(t *testing.T) {
tests := []struct {
doc *Document
num uint64
}{
{
doc: NewDocument("a"),
num: 0,
},
{
doc: NewDocument("b").
AddField(NewTextField("name", nil, []byte("hello"))),
num: 5,
},
{
doc: NewDocument("c").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))),
num: 6,
},
{
doc: NewDocument("d").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)),
num: 14,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, nil)),
num: 28,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, []string{"age"})),
num: 20,
},
}
for _, test := range tests {
actual := test.doc.NumPlainTextBytes()
if actual != test.num {
t.Errorf("expected doc '%s' to have %d plain text bytes, got %d", test.doc.ID, test.num, actual)
}
}
}

View File

@ -26,4 +26,9 @@ type Field interface {
Options() IndexingOptions
Analyze() (int, analysis.TokenFrequencies)
Value() []byte
// NumPlainTextBytes should return the number of plain text bytes
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
}

View File

@ -18,10 +18,11 @@ import (
const DefaultBooleanIndexingOptions = StoreField | IndexField
type BooleanField struct {
name string
arrayPositions []uint64
options IndexingOptions
value []byte
name string
arrayPositions []uint64
options IndexingOptions
value []byte
numPlainTextBytes uint64
}
func (b *BooleanField) Name() string {
@ -66,12 +67,17 @@ func (b *BooleanField) GoString() string {
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
}
func (b *BooleanField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
@ -80,14 +86,17 @@ func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField
}
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField {
numPlainTextBytes := 5
v := []byte("F")
if b {
numPlainTextBytes = 4
v = []byte("T")
}
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: uint64(numPlainTextBytes),
}
}

View File

@ -69,7 +69,11 @@ func (c *CompositeField) Value() []byte {
return []byte{}
}
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
func (c *CompositeField) NumPlainTextBytes() uint64 {
return 0
}
func (c *CompositeField) includesField(field string) bool {
shouldInclude := c.defaultInclude
_, fieldShouldBeIncluded := c.includedFields[field]
if fieldShouldBeIncluded {
@ -79,8 +83,11 @@ func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFr
if fieldShouldBeExcluded {
shouldInclude = false
}
return shouldInclude
}
if shouldInclude {
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
if c.includesField(field) {
c.totalLength += length
c.compositeFrequencies.MergeAll(field, freq)
}

View File

@ -25,10 +25,11 @@ var MinTimeRepresentable = time.Unix(0, math.MinInt64)
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)
type DateTimeField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
numPlainTextBytes uint64
}
func (n *DateTimeField) Name() string {
@ -95,12 +96,17 @@ func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *DateTimeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
@ -117,6 +123,9 @@ func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, d
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}, nil
}
return nil, fmt.Errorf("cannot represent %s in this type", dt)

View File

@ -21,10 +21,11 @@ const DefaultNumericIndexingOptions = StoreField | IndexField
const DefaultPrecisionStep uint = 4
type NumericField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
numPlainTextBytes uint64
}
func (n *NumericField) Name() string {
@ -91,12 +92,17 @@ func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *NumericField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
@ -112,5 +118,8 @@ func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, nu
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}

View File

@ -18,11 +18,12 @@ import (
const DefaultTextIndexingOptions = IndexField
type TextField struct {
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
numPlainTextBytes uint64
}
func (t *TextField) Name() string {
@ -72,35 +73,42 @@ func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}
func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}

View File

@ -26,6 +26,7 @@ const (
ErrorAliasEmpty
ErrorUnknownIndexType
ErrorEmptyID
ErrorIndexReadInconsistency
)
// Error represents a more strongly typed bleve error for detecting
@ -52,4 +53,5 @@ var errorMessages = map[Error]string{
ErrorAliasEmpty: "cannot perform operation on empty alias",
ErrorUnknownIndexType: "unknown index type",
ErrorEmptyID: "document ID cannot be empty",
ErrorIndexReadInconsistency: "index read inconsistency detected",
}

View File

@ -28,6 +28,12 @@ func TestMain(m *testing.M) {
panic(err)
}
toRun := m.Run()
if example_index != nil {
err = example_index.Close()
if err != nil {
panic(err)
}
}
err = os.RemoveAll("path_to_index")
if err != nil {
panic(err)

View File

@ -701,4 +701,15 @@ func TestHandlers(t *testing.T) {
}
}
}
// close indexes
for _, indexName := range IndexNames() {
index := UnregisterIndexByName(indexName)
if index != nil {
err := index.Close()
if err != nil {
t.Errorf("error closing index %s: %v", indexName, err)
}
}
}
}

View File

@ -13,6 +13,7 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"golang.org/x/net/context"
)
// A Batch groups together multiple Index and Delete
@ -167,6 +168,7 @@ type Index interface {
DocCount() (uint64, error)
Search(req *SearchRequest) (*SearchResult, error)
SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error)
Fields() ([]string, error)
@ -197,6 +199,7 @@ type Index interface {
Mapping() *IndexMapping
Stats() *IndexStat
StatsMap() map[string]interface{}
GetInternal(key []byte) ([]byte, error)
SetInternal(key, val []byte) error

View File

@ -7,6 +7,8 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// +build cznicb
package firestorm
import (

View File

@ -22,6 +22,9 @@ const DefaultDictUpdateThreshold = 10
var DefaultDictUpdateSleep = 1 * time.Second
type DictUpdater struct {
batchesStarted uint64
batchesFlushed uint64
f *Firestorm
dictUpdateSleep time.Duration
quit chan struct{}
@ -30,9 +33,6 @@ type DictUpdater struct {
mutex sync.RWMutex
workingSet map[string]int64
closeWait sync.WaitGroup
batchesStarted uint64
batchesFlushed uint64
}
func NewDictUpdater(f *Firestorm) *DictUpdater {

View File

@ -91,7 +91,10 @@ func TestDictUpdater(t *testing.T) {
for len(f.(*Firestorm).dictUpdater.incoming) > 0 {
runtime.Gosched()
}
f.(*Firestorm).dictUpdater.update()
err = f.(*Firestorm).dictUpdater.waitTasksDone(5 * time.Second)
if err != nil {
t.Fatal(err)
}
// assert that dictionary rows are correct
reader, err = f.(*Firestorm).store.Reader()
@ -133,7 +136,10 @@ func TestDictUpdater(t *testing.T) {
for len(f.(*Firestorm).dictUpdater.incoming) > 0 {
runtime.Gosched()
}
f.(*Firestorm).dictUpdater.update()
err = f.(*Firestorm).dictUpdater.waitTasksDone(5 * time.Second)
if err != nil {
t.Fatal(err)
}
// assert that dictionary rows are correct
reader, err = f.(*Firestorm).store.Reader()

View File

@ -27,14 +27,15 @@ const Name = "firestorm"
var UnsafeBatchUseDetected = fmt.Errorf("bleve.Batch is NOT thread-safe, modification after execution detected")
type Firestorm struct {
highDocNumber uint64
docCount uint64
storeName string
storeConfig map[string]interface{}
store store.KVStore
compensator *Compensator
analysisQueue *index.AnalysisQueue
fieldCache *index.FieldCache
highDocNumber uint64
docCount *uint64
garbageCollector *GarbageCollector
lookuper *Lookuper
dictUpdater *DictUpdater
@ -42,14 +43,13 @@ type Firestorm struct {
}
func NewFirestorm(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
initialCount := uint64(0)
rv := Firestorm{
storeName: storeName,
storeConfig: storeConfig,
compensator: NewCompensator(),
analysisQueue: analysisQueue,
fieldCache: index.NewFieldCache(),
docCount: &initialCount,
docCount: 0,
highDocNumber: 0,
stats: &indexStat{},
}
@ -130,7 +130,7 @@ func (f *Firestorm) Close() error {
}
func (f *Firestorm) DocCount() (uint64, error) {
count := atomic.LoadUint64(f.docCount)
count := atomic.LoadUint64(&f.docCount)
return count, nil
}
@ -142,6 +142,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(f, doc, resultChan)
@ -179,6 +180,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
f.dictUpdater.NotifyBatch(dictionaryDeltas)
atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
return
}
@ -298,11 +300,13 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
var docsUpdated uint64
var docsDeleted uint64
var numPlainTextBytes uint64
for _, doc := range batch.IndexOps {
if doc != nil {
doc.Number = firstDocNumber // actually assign doc numbers here
firstDocNumber++
docsUpdated++
numPlainTextBytes += doc.NumPlainTextBytes()
} else {
docsDeleted++
}
@ -407,6 +411,7 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&f.stats.updates, docsUpdated)
atomic.AddUint64(&f.stats.deletes, docsDeleted)
atomic.AddUint64(&f.stats.batches, 1)
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&f.stats.errors, 1)
}
@ -539,13 +544,20 @@ func (f *Firestorm) Reader() (index.IndexReader, error) {
func (f *Firestorm) Stats() json.Marshaler {
return f.stats
}
func (f *Firestorm) StatsMap() map[string]interface{} {
return f.stats.statsMap()
}
func (f *Firestorm) Wait(timeout time.Duration) error {
return f.dictUpdater.waitTasksDone(timeout)
}
func (f *Firestorm) Advanced() (store.KVStore, error) {
return f.store, nil
}
func init() {
registry.RegisterIndexType(Name, NewFirestorm)
}

View File

@ -19,13 +19,13 @@ import (
const channelBufferSize = 1000
type Lookuper struct {
tasksQueued uint64
tasksDone uint64
f *Firestorm
workChan chan []*InFlightItem
quit chan struct{}
closeWait sync.WaitGroup
tasksQueued uint64
tasksDone uint64
}
func NewLookuper(f *Firestorm) *Lookuper {
@ -117,10 +117,10 @@ func (l *Lookuper) lookup(item *InFlightItem) {
l.f.compensator.Migrate(item.docID, item.docNum, oldDocNums)
if len(oldDocNums) == 0 && item.docNum != 0 {
// this was an add, not an update
atomic.AddUint64(l.f.docCount, 1)
atomic.AddUint64(&l.f.docCount, 1)
} else if len(oldDocNums) > 0 && item.docNum == 0 {
// this was a delete (and it previously existed)
atomic.AddUint64(l.f.docCount, ^uint64(0))
atomic.AddUint64(&l.f.docCount, ^uint64(0))
}
}

View File

@ -11,6 +11,7 @@ package firestorm
import (
"bytes"
"sync/atomic"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
@ -55,6 +56,7 @@ func newFirestormTermFieldReader(r *firestormReader, field uint16, term []byte)
rv.count = dictionaryRow.Count()
}
atomic.AddUint64(&r.f.stats.termSearchersStarted, uint64(1))
return &rv, nil
}
@ -135,6 +137,7 @@ func (r *firestormTermFieldReader) Count() uint64 {
}
func (r *firestormTermFieldReader) Close() error {
atomic.AddUint64(&r.r.f.stats.termSearchersFinished, uint64(1))
if r.i != nil {
return r.i.Close()
}

View File

@ -12,15 +12,20 @@ package firestorm
import (
"encoding/json"
"sync/atomic"
"github.com/blevesearch/bleve/index/store"
)
type indexStat struct {
f *Firestorm
updates, deletes, batches, errors uint64
analysisTime, indexTime uint64
termSearchersStarted uint64
termSearchersFinished uint64
numPlainTextBytesIndexed uint64
f *Firestorm
}
func (i *indexStat) MarshalJSON() ([]byte, error) {
func (i *indexStat) statsMap() map[string]interface{} {
m := map[string]interface{}{}
m["updates"] = atomic.LoadUint64(&i.updates)
m["deletes"] = atomic.LoadUint64(&i.deletes)
@ -29,5 +34,18 @@ func (i *indexStat) MarshalJSON() ([]byte, error) {
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime)
m["index_time"] = atomic.LoadUint64(&i.indexTime)
m["lookup_queue_len"] = len(i.f.lookuper.workChan)
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
if o, ok := i.f.store.(store.KVStoreStats); ok {
m["kv"] = o.StatsMap()
}
return m
}
func (i *indexStat) MarshalJSON() ([]byte, error) {
m := i.statsMap()
return json.Marshal(m)
}

View File

@ -90,7 +90,7 @@ func (f *Firestorm) warmup(reader store.KVReader) error {
lastDocNumbers = append(lastDocNumbers, docNum)
} else {
// new doc id
atomic.AddUint64(f.docCount, 1)
atomic.AddUint64(&f.docCount, 1)
// last docID had multiple doc numbers
if len(lastDocNumbers) > 1 {

View File

@ -15,6 +15,7 @@ import (
"time"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index/store"
)
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
@ -41,8 +42,11 @@ type Index interface {
Reader() (IndexReader, error)
Stats() json.Marshaler
StatsMap() map[string]interface{}
Analyze(d *document.Document) *AnalysisResult
Advanced() (store.KVStore, error)
}
// AsyncIndex is an interface for indexes which perform

View File

@ -0,0 +1,21 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package boltdb
import "encoding/json"
type stats struct {
s *Store
}
func (s *stats) MarshalJSON() ([]byte, error) {
bs := s.s.db.Stats()
return json.Marshal(bs)
}

View File

@ -18,6 +18,7 @@
package boltdb
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index/store"
@ -95,6 +96,12 @@ func (bs *Store) Writer() (store.KVWriter, error) {
}, nil
}
func (bs *Store) Stats() json.Marshaler {
return &stats{
s: bs,
}
}
func init() {
registry.RegisterKVStore(Name, New)
}

View File

@ -9,6 +9,8 @@
package store
import "encoding/json"
// KVStore is an abstraction for working with KV stores
type KVStore interface {
@ -154,3 +156,12 @@ type KVBatch interface {
// Close frees resources
Close() error
}
// KVStoreStats is an optional interface that KVStores can implement
// if they're able to report any useful stats
type KVStoreStats interface {
// Stats returns a JSON serializable object representing stats for this KVStore
Stats() json.Marshaler
StatsMap() map[string]interface{}
}

View File

@ -0,0 +1,46 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package metrics
import (
"encoding/json"
"github.com/blevesearch/bleve/index/store"
)
type stats struct {
s *Store
}
func (s *stats) statsMap() map[string]interface{} {
ms := map[string]interface{}{}
ms["metrics"] = map[string]interface{}{
"reader_get": TimerMap(s.s.TimerReaderGet),
"reader_multi_get": TimerMap(s.s.TimerReaderMultiGet),
"reader_prefix_iterator": TimerMap(s.s.TimerReaderPrefixIterator),
"reader_range_iterator": TimerMap(s.s.TimerReaderRangeIterator),
"writer_execute_batch": TimerMap(s.s.TimerWriterExecuteBatch),
"iterator_seek": TimerMap(s.s.TimerIteratorSeek),
"iterator_next": TimerMap(s.s.TimerIteratorNext),
"batch_merge": TimerMap(s.s.TimerBatchMerge),
}
if o, ok := s.s.o.(store.KVStoreStats); ok {
ms["kv"] = o.StatsMap()
}
return ms
}
func (s *stats) MarshalJSON() ([]byte, error) {
m := s.statsMap()
return json.Marshal(m)
}

View File

@ -43,6 +43,8 @@ type Store struct {
m sync.Mutex // Protects the fields that follow.
errors *list.List // Capped list of StoreError's.
s *stats
}
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
@ -68,7 +70,7 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore,
return nil, err
}
return &Store{
rv := &Store{
o: kvs,
TimerReaderGet: metrics.NewTimer(),
@ -81,7 +83,11 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore,
TimerBatchMerge: metrics.NewTimer(),
errors: list.New(),
}, nil
}
rv.s = &stats{s: rv}
return rv, nil
}
func init() {
@ -213,6 +219,20 @@ func (s *Store) WriteJSON(w io.Writer) (err error) {
return
}
// see if the underlying implementation has its own stats
if o, ok := s.o.(store.KVStoreStats); ok {
storeStats := o.Stats()
var storeBytes []byte
storeBytes, err = json.Marshal(storeStats)
if err != nil {
return
}
_, err = fmt.Fprintf(w, `, "store": %s`, string(storeBytes))
if err != nil {
return
}
}
_, err = w.Write([]byte(`}`))
if err != nil {
return
@ -240,3 +260,11 @@ func (s *Store) WriteCSV(w io.Writer) {
WriteTimerCSV(w, s.TimerIteratorNext)
WriteTimerCSV(w, s.TimerBatchMerge)
}
func (s *Store) Stats() json.Marshaler {
return s.s
}
func (s *Store) StatsMap() map[string]interface{} {
return s.s.statsMap()
}

View File

@ -12,6 +12,36 @@ import (
var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999}
func TimerMap(timer metrics.Timer) map[string]interface{} {
rv := make(map[string]interface{})
t := timer.Snapshot()
p := t.Percentiles(timerPercentiles)
percentiles := make(map[string]interface{})
percentiles["median"] = p[0]
percentiles["75%"] = p[1]
percentiles["95%"] = p[2]
percentiles["99%"] = p[3]
percentiles["99.9%"] = p[4]
rates := make(map[string]interface{})
rates["1-min"] = t.Rate1()
rates["5-min"] = t.Rate5()
rates["15-min"] = t.Rate15()
rates["mean"] = t.RateMean()
rv["count"] = t.Count()
rv["min"] = t.Min()
rv["max"] = t.Max()
rv["mean"] = t.Mean()
rv["stddev"] = t.StdDev()
rv["percentiles"] = percentiles
rv["rates"] = rates
return rv
}
func WriteTimerJSON(w io.Writer, timer metrics.Timer) {
t := timer.Snapshot()
p := t.Percentiles(timerPercentiles)

84
index/store/moss/batch.go Normal file
View File

@ -0,0 +1,84 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS
// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language
// governing permissions and limitations under the License.
package moss
import (
"github.com/couchbase/moss"
"github.com/blevesearch/bleve/index/store"
)
type Batch struct {
store *Store
merge *store.EmulatedMerge
batch moss.Batch
buf []byte // Non-nil when using pre-alloc'ed / NewBatchEx().
bufUsed int
}
func (b *Batch) Set(key, val []byte) {
var err error
if b.buf != nil {
b.bufUsed += len(key) + len(val)
err = b.batch.AllocSet(key, val)
} else {
err = b.batch.Set(key, val)
}
if err != nil {
b.store.Logf("bleve moss batch.Set err: %v", err)
}
}
func (b *Batch) Delete(key []byte) {
var err error
if b.buf != nil {
b.bufUsed += len(key)
err = b.batch.AllocDel(key)
} else {
err = b.batch.Del(key)
}
if err != nil {
b.store.Logf("bleve moss batch.Delete err: %v", err)
}
}
func (b *Batch) Merge(key, val []byte) {
if b.buf != nil {
b.bufUsed += len(key) + len(val)
}
b.merge.Merge(key, val)
}
func (b *Batch) Reset() {
err := b.Close()
if err != nil {
b.store.Logf("bleve moss batch.Close err: %v", err)
return
}
batch, err := b.store.ms.NewBatch(0, 0)
if err == nil {
b.batch = batch
b.merge = store.NewEmulatedMerge(b.store.mo)
b.buf = nil
b.bufUsed = 0
}
}
func (b *Batch) Close() error {
b.merge = nil
err := b.batch.Close()
b.batch = nil
return err
}

View File

@ -0,0 +1,134 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS
// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language
// governing permissions and limitations under the License.
package moss
import (
"bytes"
"github.com/couchbase/moss"
)
type Iterator struct {
store *Store
ss moss.Snapshot
iter moss.Iterator
prefix []byte
start []byte
end []byte
done bool
k []byte
v []byte
}
func (x *Iterator) Seek(seekToKey []byte) {
x.done = true
x.k = nil
x.v = nil
if bytes.Compare(seekToKey, x.start) < 0 {
seekToKey = x.start
}
iter, err := x.ss.StartIterator(seekToKey, x.end, moss.IteratorOptions{})
if err != nil {
x.store.Logf("bleve moss StartIterator err: %v", err)
return
}
err = x.iter.Close()
if err != nil {
x.store.Logf("bleve moss iterator.Seek err: %v", err)
return
}
x.iter = iter
x.checkDone()
}
func (x *Iterator) Next() {
if x.done {
return
}
x.done = true
x.k = nil
x.v = nil
err := x.iter.Next()
if err != nil {
return
}
x.checkDone()
}
func (x *Iterator) Current() ([]byte, []byte, bool) {
return x.k, x.v, !x.done
}
func (x *Iterator) Key() []byte {
if x.done {
return nil
}
return x.k
}
func (x *Iterator) Value() []byte {
if x.done {
return nil
}
return x.v
}
func (x *Iterator) Valid() bool {
return !x.done
}
func (x *Iterator) Close() error {
var err error
x.ss = nil
if x.iter != nil {
err = x.iter.Close()
x.iter = nil
}
x.prefix = nil
x.done = true
x.k = nil
x.v = nil
return err
}
func (x *Iterator) checkDone() {
x.done = true
x.k = nil
x.v = nil
k, v, err := x.iter.Current()
if err != nil {
return
}
if x.prefix != nil && !bytes.HasPrefix(k, x.prefix) {
return
}
x.done = false
x.k = k
x.v = v
}

404
index/store/moss/lower.go Normal file
View File

@ -0,0 +1,404 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS
// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language
// governing permissions and limitations under the License.
// Package moss provides a KVStore implementation based on the
// github.com/couchbaselabs/moss library.
package moss
import (
"fmt"
"sync"
"github.com/couchbase/moss"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
)
func initLowerLevelStore(
mo store.MergeOperator,
config map[string]interface{},
lowerLevelStoreName string,
lowerLevelStoreConfig map[string]interface{},
lowerLevelMaxBatchSize uint64,
logf func(format string, a ...interface{}),
) (moss.Snapshot, moss.LowerLevelUpdate, store.KVStore, error) {
constructor := registry.KVStoreConstructorByName(lowerLevelStoreName)
if constructor == nil {
return nil, nil, nil, fmt.Errorf("moss store, initLowerLevelStore,"+
" could not find lower level store: %s", lowerLevelStoreName)
}
if lowerLevelStoreConfig == nil {
lowerLevelStoreConfig = map[string]interface{}{}
}
for k, v := range config {
_, exists := lowerLevelStoreConfig[k]
if !exists {
lowerLevelStoreConfig[k] = v
}
}
kvStore, err := constructor(mo, lowerLevelStoreConfig)
if err != nil {
return nil, nil, nil, err
}
llStore := &llStore{
refs: 0,
config: config,
llConfig: lowerLevelStoreConfig,
kvStore: kvStore,
logf: logf,
}
llUpdate := func(ssHigher moss.Snapshot) (ssLower moss.Snapshot, err error) {
return llStore.update(ssHigher, lowerLevelMaxBatchSize)
}
llSnapshot, err := llUpdate(nil)
if err != nil {
_ = kvStore.Close()
return nil, nil, nil, err
}
return llSnapshot, llUpdate, kvStore, nil // llStore.refs is now 1.
}
// ------------------------------------------------
// llStore is a lower level store and provides ref-counting around a
// bleve store.KVStore.
type llStore struct {
kvStore store.KVStore
config map[string]interface{}
llConfig map[string]interface{}
logf func(format string, a ...interface{})
m sync.Mutex // Protects fields that follow.
refs int
}
// llSnapshot represents a lower-level snapshot, wrapping a bleve
// store.KVReader, and implements the moss.Snapshot interface.
type llSnapshot struct {
llStore *llStore // Holds 1 refs on the llStore.
kvReader store.KVReader
m sync.Mutex // Protects fields that follow.
refs int
}
// llIterator represents a lower-level iterator, wrapping a bleve
// store.KVIterator, and implements the moss.Iterator interface.
type llIterator struct {
llSnapshot *llSnapshot // Holds 1 refs on the llSnapshot.
// Some lower-level KVReader implementations need a separate
// KVReader clone, due to KVReader single-threaded'ness.
kvReader store.KVReader
kvIterator store.KVIterator
}
type readerSource interface {
Reader() (store.KVReader, error)
}
// ------------------------------------------------
func (s *llStore) addRef() *llStore {
s.m.Lock()
s.refs += 1
s.m.Unlock()
return s
}
func (s *llStore) decRef() {
s.m.Lock()
s.refs -= 1
if s.refs <= 0 {
err := s.kvStore.Close()
if err != nil {
s.logf("llStore kvStore.Close err: %v", err)
}
}
s.m.Unlock()
}
// update() mutates this lower level store with latest data from the
// given higher level moss.Snapshot and returns a new moss.Snapshot
// that the higher level can use which represents this lower level
// store.
func (s *llStore) update(ssHigher moss.Snapshot, maxBatchSize uint64) (
ssLower moss.Snapshot, err error) {
if ssHigher != nil {
iter, err := ssHigher.StartIterator(nil, nil, moss.IteratorOptions{
IncludeDeletions: true,
SkipLowerLevel: true,
})
if err != nil {
return nil, err
}
defer func() {
err = iter.Close()
if err != nil {
s.logf("llStore iter.Close err: %v", err)
}
}()
kvWriter, err := s.kvStore.Writer()
if err != nil {
return nil, err
}
defer func() {
err = kvWriter.Close()
if err != nil {
s.logf("llStore kvWriter.Close err: %v", err)
}
}()
batch := kvWriter.NewBatch()
defer func() {
if batch != nil {
err = batch.Close()
if err != nil {
s.logf("llStore batch.Close err: %v", err)
}
}
}()
var readOptions moss.ReadOptions
i := uint64(0)
for {
if i%1000000 == 0 {
s.logf("llStore.update, i: %d", i)
}
ex, key, val, err := iter.CurrentEx()
if err == moss.ErrIteratorDone {
break
}
if err != nil {
return nil, err
}
switch ex.Operation {
case moss.OperationSet:
batch.Set(key, val)
case moss.OperationDel:
batch.Delete(key)
case moss.OperationMerge:
val, err = ssHigher.Get(key, readOptions)
if err != nil {
return nil, err
}
if val != nil {
batch.Set(key, val)
} else {
batch.Delete(key)
}
default:
return nil, fmt.Errorf("moss store, update,"+
" unexpected operation, ex: %v", ex)
}
i++
err = iter.Next()
if err == moss.ErrIteratorDone {
break
}
if err != nil {
return nil, err
}
if maxBatchSize > 0 && i%maxBatchSize == 0 {
err = kvWriter.ExecuteBatch(batch)
if err != nil {
return nil, err
}
err = batch.Close()
if err != nil {
return nil, err
}
batch = kvWriter.NewBatch()
}
}
if i > 0 {
s.logf("llStore.update, ExecuteBatch,"+
" path: %s, total: %d, start", s.llConfig["path"], i)
err = kvWriter.ExecuteBatch(batch)
if err != nil {
return nil, err
}
s.logf("llStore.update, ExecuteBatch,"+
" path: %s: total: %d, done", s.llConfig["path"], i)
}
}
kvReader, err := s.kvStore.Reader()
if err != nil {
return nil, err
}
s.logf("llStore.update, new reader")
return &llSnapshot{
llStore: s.addRef(),
kvReader: kvReader,
refs: 1,
}, nil
}
// ------------------------------------------------
func (llss *llSnapshot) addRef() *llSnapshot {
llss.m.Lock()
llss.refs += 1
llss.m.Unlock()
return llss
}
func (llss *llSnapshot) decRef() {
llss.m.Lock()
llss.refs -= 1
if llss.refs <= 0 {
if llss.kvReader != nil {
err := llss.kvReader.Close()
if err != nil {
llss.llStore.logf("llSnapshot kvReader.Close err: %v", err)
}
llss.kvReader = nil
}
if llss.llStore != nil {
llss.llStore.decRef()
llss.llStore = nil
}
}
llss.m.Unlock()
}
func (llss *llSnapshot) Close() error {
llss.decRef()
return nil
}
func (llss *llSnapshot) Get(key []byte,
readOptions moss.ReadOptions) ([]byte, error) {
rs, ok := llss.kvReader.(readerSource)
if ok {
r2, err := rs.Reader()
if err != nil {
return nil, err
}
val, err := r2.Get(key)
_ = r2.Close()
return val, err
}
return llss.kvReader.Get(key)
}
func (llss *llSnapshot) StartIterator(
startKeyInclusive, endKeyExclusive []byte,
iteratorOptions moss.IteratorOptions) (moss.Iterator, error) {
rs, ok := llss.kvReader.(readerSource)
if ok {
r2, err := rs.Reader()
if err != nil {
return nil, err
}
i2 := r2.RangeIterator(startKeyInclusive, endKeyExclusive)
return &llIterator{llSnapshot: llss.addRef(), kvReader: r2, kvIterator: i2}, nil
}
i := llss.kvReader.RangeIterator(startKeyInclusive, endKeyExclusive)
return &llIterator{llSnapshot: llss.addRef(), kvReader: nil, kvIterator: i}, nil
}
// ------------------------------------------------
func (lli *llIterator) Close() error {
var err0 error
if lli.kvIterator != nil {
err0 = lli.kvIterator.Close()
lli.kvIterator = nil
}
var err1 error
if lli.kvReader != nil {
err1 = lli.kvReader.Close()
lli.kvReader = nil
}
lli.llSnapshot.decRef()
lli.llSnapshot = nil
if err0 != nil {
return err0
}
if err1 != nil {
return err1
}
return nil
}
func (lli *llIterator) Next() error {
lli.kvIterator.Next()
return nil
}
func (lli *llIterator) Current() (key, val []byte, err error) {
key, val, ok := lli.kvIterator.Current()
if !ok {
return nil, nil, moss.ErrIteratorDone
}
return key, val, nil
}
func (lli *llIterator) CurrentEx() (
entryEx moss.EntryEx, key, val []byte, err error) {
return moss.EntryEx{}, nil, nil, moss.ErrUnimplemented
}

View File

@ -0,0 +1,82 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS
// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language
// governing permissions and limitations under the License.
package moss
import (
"github.com/couchbase/moss"
"github.com/blevesearch/bleve/index/store"
)
type Reader struct {
store *Store
ss moss.Snapshot
}
func (r *Reader) Get(k []byte) (v []byte, err error) {
v, err = r.ss.Get(k, moss.ReadOptions{})
if err != nil {
return nil, err
}
if v != nil {
return append([]byte(nil), v...), nil
}
return nil, nil
}
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
return store.MultiGet(r, keys)
}
func (r *Reader) PrefixIterator(k []byte) store.KVIterator {
iter, err := r.ss.StartIterator(k, nil, moss.IteratorOptions{})
if err != nil {
return nil
}
rv := &Iterator{
store: r.store,
ss: r.ss,
iter: iter,
prefix: k,
start: k,
end: nil,
}
rv.checkDone()
return rv
}
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
iter, err := r.ss.StartIterator(start, end, moss.IteratorOptions{})
if err != nil {
return nil
}
rv := &Iterator{
store: r.store,
ss: r.ss,
iter: iter,
prefix: nil,
start: start,
end: end,
}
rv.checkDone()
return rv
}
func (r *Reader) Close() error {
return r.ss.Close()
}

43
index/store/moss/stats.go Normal file
View File

@ -0,0 +1,43 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package moss
import (
"encoding/json"
"github.com/blevesearch/bleve/index/store"
)
type stats struct {
s *Store
}
func (s *stats) statsMap() map[string]interface{} {
ms := map[string]interface{}{}
var err error
ms["moss"], err = s.s.ms.Stats()
if err != nil {
return ms
}
if s.s.llstore != nil {
if o, ok := s.s.llstore.(store.KVStoreStats); ok {
ms["kv"] = o.StatsMap()
}
}
return ms
}
func (s *stats) MarshalJSON() ([]byte, error) {
m := s.statsMap()
return json.Marshal(m)
}

196
index/store/moss/store.go Normal file
View File

@ -0,0 +1,196 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS
// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language
// governing permissions and limitations under the License.
// Package moss provides a KVStore implementation based on the
// github.com/couchbaselabs/moss library.
package moss
import (
"encoding/json"
"fmt"
"sync"
"github.com/couchbase/moss"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
)
// RegistryCollectionOptions should be treated as read-only after
// process init()'ialization.
var RegistryCollectionOptions = map[string]moss.CollectionOptions{}
const Name = "moss"
type Store struct {
m sync.Mutex
ms moss.Collection
mo store.MergeOperator
llstore store.KVStore
s *stats
}
// New initializes a moss storage with values from the optional
// config["mossCollectionOptions"] (a JSON moss.CollectionOptions).
// Next, values from the RegistryCollectionOptions, named by the
// optional config["mossCollectionOptionsName"], take precedence.
// Finally, base case defaults are taken from
// moss.DefaultCollectionOptions.
func New(mo store.MergeOperator, config map[string]interface{}) (
store.KVStore, error) {
options := moss.DefaultCollectionOptions // Copy.
v, ok := config["mossCollectionOptionsName"]
if ok {
name, ok := v.(string)
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not parse config[mossCollectionOptionsName]: %v", v)
}
options, ok = RegistryCollectionOptions[name] // Copy.
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not find RegistryCollectionOptions, name: %s", name)
}
}
v, ok = config["mossCollectionOptions"]
if ok {
b, err := json.Marshal(v) // Convert from map[string]interface{}.
if err != nil {
return nil, fmt.Errorf("moss store,"+
" could not marshal config[mossCollectionOptions]: %v", v)
}
err = json.Unmarshal(b, &options)
if err != nil {
return nil, fmt.Errorf("moss store,"+
" could not unmarshal config[mossCollectionOptions]: %v", v)
}
}
// --------------------------------------------------
if options.Log == nil || options.Debug <= 0 {
options.Log = func(format string, a ...interface{}) {}
}
// --------------------------------------------------
mossLowerLevelStoreName := ""
v, ok = config["mossLowerLevelStoreName"]
if ok {
mossLowerLevelStoreName, ok = v.(string)
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not parse config[mossLowerLevelStoreName]: %v", v)
}
}
var llStore store.KVStore
if options.LowerLevelInit == nil &&
options.LowerLevelUpdate == nil &&
mossLowerLevelStoreName != "" {
mossLowerLevelStoreConfig := map[string]interface{}{}
v, ok := config["mossLowerLevelStoreConfig"]
if ok {
mossLowerLevelStoreConfig, ok = v.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("moss store, initLowerLevelStore,"+
" could parse mossLowerLevelStoreConfig: %v", v)
}
}
mossLowerLevelMaxBatchSize := uint64(0)
v, ok = config["mossLowerLevelMaxBatchSize"]
if ok {
mossLowerLevelMaxBatchSizeF, ok := v.(float64)
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not parse config[mossLowerLevelMaxBatchSize]: %v", v)
}
mossLowerLevelMaxBatchSize = uint64(mossLowerLevelMaxBatchSizeF)
}
lowerLevelInit, lowerLevelUpdate, lowerLevelStore, err :=
initLowerLevelStore(mo, config,
mossLowerLevelStoreName,
mossLowerLevelStoreConfig,
mossLowerLevelMaxBatchSize,
options.Log)
if err != nil {
return nil, err
}
options.LowerLevelInit = lowerLevelInit
options.LowerLevelUpdate = lowerLevelUpdate
llStore = lowerLevelStore
}
// --------------------------------------------------
options.MergeOperator = mo
ms, err := moss.NewCollection(options)
if err != nil {
return nil, err
}
err = ms.Start()
if err != nil {
return nil, err
}
rv := Store{
ms: ms,
mo: mo,
llstore: llStore,
}
rv.s = &stats{s: &rv}
return &rv, nil
}
func (s *Store) Close() error {
return s.ms.Close()
}
func (s *Store) Reader() (store.KVReader, error) {
ss, err := s.ms.Snapshot()
if err != nil {
return nil, err
}
return &Reader{ss: ss}, nil
}
func (s *Store) Writer() (store.KVWriter, error) {
return &Writer{s: s}, nil
}
func (s *Store) Logf(fmt string, args ...interface{}) {
options := s.ms.Options()
if options.Log != nil {
options.Log(fmt, args...)
}
}
func (s *Store) Stats() json.Marshaler {
return s.s
}
func (s *Store) StatsMap() map[string]interface{} {
return s.s.statsMap()
}
func init() {
registry.RegisterKVStore(Name, New)
}

View File

@ -0,0 +1,88 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS
// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language
// governing permissions and limitations under the License.
package moss
import (
"testing"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/index/store/test"
)
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
rv, err := New(mo, nil)
if err != nil {
t.Fatal(err)
}
return rv
}
func cleanup(t *testing.T, s store.KVStore) {
err := s.Close()
if err != nil {
t.Fatal(err)
}
}
func TestMossKVCrud(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestKVCrud(t, s)
}
func TestMossReaderIsolation(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderIsolation(t, s)
}
func TestMossReaderOwnsGetBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderOwnsGetBytes(t, s)
}
func TestMossWriterOwnsBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestWriterOwnsBytes(t, s)
}
func TestMossPrefixIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIterator(t, s)
}
func TestMossPrefixIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIteratorSeek(t, s)
}
func TestMossRangeIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIterator(t, s)
}
func TestMossRangeIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIteratorSeek(t, s)
}
func TestMossMerge(t *testing.T) {
s := open(t, &test.TestMergeCounter{})
defer cleanup(t, s)
test.CommonTestMerge(t, s)
}

View File

@ -0,0 +1,94 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an "AS
// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language
// governing permissions and limitations under the License.
package moss
import (
"fmt"
"github.com/blevesearch/bleve/index/store"
"github.com/couchbase/moss"
)
type Writer struct {
s *Store
}
func (w *Writer) NewBatch() store.KVBatch {
b, err := w.s.ms.NewBatch(0, 0)
if err != nil {
return nil
}
return &Batch{
store: w.s,
merge: store.NewEmulatedMerge(w.s.mo),
batch: b,
}
}
func (w *Writer) NewBatchEx(options store.KVBatchOptions) (
[]byte, store.KVBatch, error) {
numOps := options.NumSets + options.NumDeletes + options.NumMerges
b, err := w.s.ms.NewBatch(numOps, options.TotalBytes)
if err != nil {
return nil, nil, err
}
buf, err := b.Alloc(options.TotalBytes)
if err != nil {
return nil, nil, err
}
return buf, &Batch{
store: w.s,
merge: store.NewEmulatedMerge(w.s.mo),
batch: b,
buf: buf,
bufUsed: 0,
}, nil
}
func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) {
batch, ok := b.(*Batch)
if !ok {
return fmt.Errorf("wrong type of batch")
}
for kStr, mergeOps := range batch.merge.Merges {
for _, v := range mergeOps {
if batch.buf != nil {
kLen := len(kStr)
vLen := len(v)
kBuf := batch.buf[batch.bufUsed : batch.bufUsed+kLen]
vBuf := batch.buf[batch.bufUsed+kLen : batch.bufUsed+kLen+vLen]
copy(kBuf, kStr)
copy(vBuf, v)
batch.bufUsed += kLen + vLen
err = batch.batch.AllocMerge(kBuf, vBuf)
} else {
err = batch.batch.Merge([]byte(kStr), v)
}
if err != nil {
return err
}
}
}
return w.s.ms.ExecuteBatch(batch.batch, moss.WriteOptions{})
}
func (w *Writer) Close() error {
w.s = nil
return nil
}

View File

@ -10,6 +10,8 @@
package upside_down
import (
"sync/atomic"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
)
@ -29,6 +31,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi
return nil, err
}
if val == nil {
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
count: 0,
term: term,
@ -44,6 +47,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
it := indexReader.kvreader.PrefixIterator(tfr.Key())
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
indexReader: indexReader,
iterator: it,
@ -163,5 +167,6 @@ func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) {
}
func (r *UpsideDownCouchDocIDReader) Close() error {
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
return r.iterator.Close()
}

View File

@ -234,6 +234,8 @@ func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
// DICTIONARY
const DictionaryRowMaxValueSize = binary.MaxVarintLen64
type DictionaryRow struct {
field uint16
term []byte
@ -264,7 +266,7 @@ func (dr *DictionaryRow) Value() []byte {
}
func (dr *DictionaryRow) ValueSize() int {
return binary.MaxVarintLen64
return DictionaryRowMaxValueSize
}
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) {

View File

@ -12,14 +12,20 @@ package upside_down
import (
"encoding/json"
"sync/atomic"
"github.com/blevesearch/bleve/index/store"
)
type indexStat struct {
updates, deletes, batches, errors uint64
analysisTime, indexTime uint64
termSearchersStarted uint64
termSearchersFinished uint64
numPlainTextBytesIndexed uint64
i *UpsideDownCouch
}
func (i *indexStat) MarshalJSON() ([]byte, error) {
func (i *indexStat) statsMap() map[string]interface{} {
m := map[string]interface{}{}
m["updates"] = atomic.LoadUint64(&i.updates)
m["deletes"] = atomic.LoadUint64(&i.deletes)
@ -27,5 +33,18 @@ func (i *indexStat) MarshalJSON() ([]byte, error) {
m["errors"] = atomic.LoadUint64(&i.errors)
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime)
m["index_time"] = atomic.LoadUint64(&i.indexTime)
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
if o, ok := i.i.store.(store.KVStoreStats); ok {
m["kv"] = o.StatsMap()
}
return m
}
func (i *indexStat) MarshalJSON() ([]byte, error) {
m := i.statsMap()
return json.Marshal(m)
}

View File

@ -68,14 +68,15 @@ type docBackIndexRow struct {
}
func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
return &UpsideDownCouch{
rv := &UpsideDownCouch{
version: Version,
fieldCache: index.NewFieldCache(),
storeName: storeName,
storeConfig: storeConfig,
analysisQueue: analysisQueue,
stats: &indexStat{},
}, nil
}
rv.stats = &indexStat{i: rv}
return rv, nil
}
func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) {
@ -208,7 +209,7 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
mergeNum := len(dictionaryDeltas)
mergeKeyBytes := 0
mergeValBytes := mergeNum * 8
mergeValBytes := mergeNum * DictionaryRowMaxValueSize
for dictRowKey, _ := range dictionaryDeltas {
mergeKeyBytes += len(dictRowKey)
@ -218,7 +219,7 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
totBytes := addKeyBytes + addValBytes +
updateKeyBytes + updateValBytes +
deleteKeyBytes +
mergeKeyBytes + mergeValBytes
2*(mergeKeyBytes+mergeValBytes)
buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
TotalBytes: totBytes,
@ -278,8 +279,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
for dictRowKey, delta := range dictionaryDeltas {
dictRowKeyLen := copy(buf, dictRowKey)
binary.LittleEndian.PutUint64(buf[dictRowKeyLen:], uint64(delta))
wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+8])
buf = buf[dictRowKeyLen+8:]
wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+DictionaryRowMaxValueSize])
buf = buf[dictRowKeyLen+DictionaryRowMaxValueSize:]
}
// write out the batch
@ -415,6 +416,7 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan)
@ -489,6 +491,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
@ -791,9 +794,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
var numUpdates uint64
var numPlainTextBytes uint64
for _, doc := range batch.IndexOps {
if doc != nil {
numUpdates++
numPlainTextBytes += doc.NumPlainTextBytes()
}
}
@ -959,6 +964,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&udc.stats.updates, numUpdates)
atomic.AddUint64(&udc.stats.deletes, docsDeleted)
atomic.AddUint64(&udc.stats.batches, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
@ -1024,6 +1030,14 @@ func (udc *UpsideDownCouch) Stats() json.Marshaler {
return udc.stats
}
func (udc *UpsideDownCouch) StatsMap() map[string]interface{} {
return udc.stats.statsMap()
}
func (udc *UpsideDownCouch) Advanced() (store.KVStore, error) {
return udc.store, nil
}
func (udc *UpsideDownCouch) fieldIndexOrNewRow(name string) (uint16, *FieldRow) {
index, existed := udc.fieldCache.FieldNamed(name, true)
if !existed {

View File

@ -14,6 +14,8 @@ import (
"sync"
"time"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
@ -122,16 +124,20 @@ func (i *indexAliasImpl) DocCount() (uint64, error) {
for _, index := range i.indexes {
otherCount, err := index.DocCount()
if err != nil {
return 0, err
if err == nil {
rv += otherCount
}
rv += otherCount
// tolerate errors to produce partial counts
}
return rv, nil
}
func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) {
return i.SearchInContext(context.Background(), req)
}
func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
@ -145,10 +151,10 @@ func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) {
// short circuit the simple case
if len(i.indexes) == 1 {
return i.indexes[0].Search(req)
return i.indexes[0].SearchInContext(ctx, req)
}
return MultiSearch(req, i.indexes...)
return MultiSearch(ctx, req, i.indexes...)
}
func (i *indexAliasImpl) Fields() ([]string, error) {
@ -333,6 +339,22 @@ func (i *indexAliasImpl) Stats() *IndexStat {
return i.indexes[0].Stats()
}
func (i *indexAliasImpl) StatsMap() map[string]interface{} {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].StatsMap()
}
func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
@ -456,69 +478,96 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
return &rv
}
type asyncSearchResult struct {
Name string
Result *SearchResult
Err error
}
func wrapSearch(ctx context.Context, in Index, req *SearchRequest) *asyncSearchResult {
rv := asyncSearchResult{Name: in.Name()}
rv.Result, rv.Err = in.SearchInContext(ctx, req)
return &rv
}
func wrapSearchTimeout(ctx context.Context, in Index, req *SearchRequest) *asyncSearchResult {
reschan := make(chan *asyncSearchResult)
go func() { reschan <- wrapSearch(ctx, in, req) }()
select {
case res := <-reschan:
return res
case <-ctx.Done():
return &asyncSearchResult{Name: in.Name(), Err: ctx.Err()}
}
}
// MultiSearch executes a SearchRequest across multiple
// Index objects, then merges the results.
func MultiSearch(req *SearchRequest, indexes ...Index) (*SearchResult, error) {
func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
searchStart := time.Now()
results := make(chan *SearchResult)
errs := make(chan error)
asyncResults := make(chan *asyncSearchResult)
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
var searchChildIndex = func(waitGroup *sync.WaitGroup, in Index, results chan *SearchResult, errs chan error) {
go func() {
defer waitGroup.Done()
childReq := createChildSearchRequest(req)
searchResult, err := in.Search(childReq)
if err != nil {
errs <- err
} else {
results <- searchResult
}
}()
var searchChildIndex = func(waitGroup *sync.WaitGroup, in Index, asyncResults chan *asyncSearchResult) {
childReq := createChildSearchRequest(req)
if ia, ok := in.(IndexAlias); ok {
// if the child index is another alias, trust it returns promptly on timeout/cancel
go func() {
defer waitGroup.Done()
asyncResults <- wrapSearch(ctx, ia, childReq)
}()
} else {
// if the child index is not an alias, enforce timeout here
go func() {
defer waitGroup.Done()
asyncResults <- wrapSearchTimeout(ctx, in, childReq)
}()
}
}
for _, in := range indexes {
waitGroup.Add(1)
searchChildIndex(&waitGroup, in, results, errs)
searchChildIndex(&waitGroup, in, asyncResults)
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(results)
close(errs)
close(asyncResults)
}()
var sr *SearchResult
var err error
var result *SearchResult
ok := true
for ok {
select {
case result, ok = <-results:
if ok {
if sr == nil {
// first result
sr = result
} else {
// merge with previous
sr.Merge(result)
}
}
case err, ok = <-errs:
// for now stop on any error
// FIXME offer other behaviors
if err != nil {
return nil, err
indexErrors := make(map[string]error)
for asr := range asyncResults {
if asr.Err == nil {
if sr == nil {
// first result
sr = asr.Result
} else {
// merge with previous
sr.Merge(asr.Result)
}
} else {
indexErrors[asr.Name] = asr.Err
}
}
// merge just concatenated all the hits
// now lets clean it up
// handle case where no results were successful
if sr == nil {
sr = &SearchResult{
Status: &SearchStatus{
Errors: make(map[string]error),
},
}
}
// first sort it by score
sort.Sort(sr.Hits)
@ -544,6 +593,18 @@ func MultiSearch(req *SearchRequest, indexes ...Index) (*SearchResult, error) {
searchDuration := time.Since(searchStart)
sr.Took = searchDuration
// fix up errors
if len(indexErrors) > 0 {
if sr.Status.Errors == nil {
sr.Status.Errors = make(map[string]error)
}
for indexName, indexErr := range indexErrors {
sr.Status.Errors[indexName] = indexErr
sr.Status.Total++
sr.Status.Failed++
}
}
return sr, nil
}

View File

@ -6,6 +6,8 @@ import (
"testing"
"time"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
@ -93,9 +95,12 @@ func TestIndexAliasSingle(t *testing.T) {
t.Errorf("expected %v, got %v", expectedError, err)
}
_, err = alias.DocCount()
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
count, err := alias.DocCount()
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if count != 0 {
t.Errorf("expected count 0, got %d", count)
}
// now change the def using add/remove
@ -178,9 +183,12 @@ func TestIndexAliasSingle(t *testing.T) {
t.Errorf("expected %v, got %v", expectedError2, err)
}
_, err = alias.DocCount()
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
count, err = alias.DocCount()
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if count != 0 {
t.Errorf("expected count 0, got %d", count)
}
// now change the def using swap
@ -262,9 +270,12 @@ func TestIndexAliasSingle(t *testing.T) {
t.Errorf("expected %v, got %v", expectedError3, err)
}
_, err = alias.DocCount()
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
count, err = alias.DocCount()
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if count != 0 {
t.Errorf("expected count 0, got %d", count)
}
}
@ -442,6 +453,11 @@ func TestIndexAliasMulti(t *testing.T) {
err: nil,
docCountResult: &ei1Count,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
@ -456,6 +472,11 @@ func TestIndexAliasMulti(t *testing.T) {
err: nil,
docCountResult: &ei2Count,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
@ -537,6 +558,11 @@ func TestIndexAliasMulti(t *testing.T) {
// now a few things that should work
sr := NewSearchRequest(NewTermQuery("test"))
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Request: sr,
Total: 2,
Hits: search.DocumentMatchCollection{
@ -570,6 +596,11 @@ func TestIndexAliasMulti(t *testing.T) {
// TestMultiSearchNoError
func TestMultiSearchNoError(t *testing.T) {
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
@ -581,6 +612,11 @@ func TestMultiSearchNoError(t *testing.T) {
MaxScore: 1.0,
}}
ei2 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
@ -594,6 +630,11 @@ func TestMultiSearchNoError(t *testing.T) {
sr := NewSearchRequest(NewTermQuery("test"))
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Request: sr,
Total: 2,
Hits: search.DocumentMatchCollection{
@ -611,7 +652,7 @@ func TestMultiSearchNoError(t *testing.T) {
MaxScore: 2.0,
}
results, err := MultiSearch(sr, ei1, ei2)
results, err := MultiSearch(context.Background(), sr, ei1, ei2)
if err != nil {
t.Error(err)
}
@ -624,7 +665,12 @@ func TestMultiSearchNoError(t *testing.T) {
// TestMultiSearchSomeError
func TestMultiSearchSomeError(t *testing.T) {
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
ei1 := &stubIndex{name: "ei1", err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
@ -635,23 +681,56 @@ func TestMultiSearchSomeError(t *testing.T) {
Took: 1 * time.Second,
MaxScore: 1.0,
}}
ei2 := &stubIndex{err: fmt.Errorf("deliberate error")}
ei2 := &stubIndex{name: "ei2", err: fmt.Errorf("deliberate error")}
sr := NewSearchRequest(NewTermQuery("test"))
_, err := MultiSearch(sr, ei1, ei2)
if err == nil {
t.Errorf("expected error, got %v", err)
res, err := MultiSearch(context.Background(), sr, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 indexes to be queried, got %d", res.Status.Total)
}
if res.Status.Failed != 1 {
t.Errorf("expected 1 index to fail, got %d", res.Status.Failed)
}
if res.Status.Successful != 1 {
t.Errorf("expected 1 index to be successful, got %d", res.Status.Successful)
}
if len(res.Status.Errors) != 1 {
t.Fatalf("expected 1 status error message, got %d", len(res.Status.Errors))
}
if res.Status.Errors["ei2"].Error() != "deliberate error" {
t.Errorf("expected ei2 index error message 'deliberate error', got '%s'", res.Status.Errors["ei2"])
}
}
// TestMultiSearchAllError
// reproduces https://github.com/blevesearch/bleve/issues/126
func TestMultiSearchAllError(t *testing.T) {
ei1 := &stubIndex{err: fmt.Errorf("deliberate error")}
ei2 := &stubIndex{err: fmt.Errorf("deliberate error")}
ei1 := &stubIndex{name: "ei1", err: fmt.Errorf("deliberate error")}
ei2 := &stubIndex{name: "ei2", err: fmt.Errorf("deliberate error")}
sr := NewSearchRequest(NewTermQuery("test"))
_, err := MultiSearch(sr, ei1, ei2)
if err == nil {
t.Errorf("expected error, got %v", err)
res, err := MultiSearch(context.Background(), sr, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 indexes to be queried, got %d", res.Status.Total)
}
if res.Status.Failed != 2 {
t.Errorf("expected 2 indexes to fail, got %d", res.Status.Failed)
}
if res.Status.Successful != 0 {
t.Errorf("expected 0 indexes to be successful, got %d", res.Status.Successful)
}
if len(res.Status.Errors) != 2 {
t.Fatalf("expected 2 status error messages, got %d", len(res.Status.Errors))
}
if res.Status.Errors["ei1"].Error() != "deliberate error" {
t.Errorf("expected ei1 index error message 'deliberate error', got '%s'", res.Status.Errors["ei1"])
}
if res.Status.Errors["ei2"].Error() != "deliberate error" {
t.Errorf("expected ei2 index error message 'deliberate error', got '%s'", res.Status.Errors["ei2"])
}
}
@ -667,21 +746,403 @@ func TestMultiSearchSecondPage(t *testing.T) {
}
ei1 := &stubIndex{
searchResult: &SearchResult{},
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
},
checkRequest: checkRequest,
}
ei2 := &stubIndex{
searchResult: &SearchResult{},
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
},
checkRequest: checkRequest,
}
sr := NewSearchRequestOptions(NewTermQuery("test"), 10, 10, false)
_, err := MultiSearch(sr, ei1, ei2)
_, err := MultiSearch(context.Background(), sr, ei1, ei2)
if err != nil {
t.Errorf("unexpected error %v", err)
}
}
// TestMultiSearchTimeout tests simple timeout cases
// 1. all searches finish successfully before timeout
// 2. no searchers finish before the timeout
// 3. no searches finish before cancellation
func TestMultiSearchTimeout(t *testing.T) {
ei1 := &stubIndex{
name: "ei1",
checkRequest: func(req *SearchRequest) error {
time.Sleep(50 * time.Millisecond)
return nil
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "1",
ID: "a",
Score: 1.0,
},
},
MaxScore: 1.0,
}}
ei2 := &stubIndex{
name: "ei2",
checkRequest: func(req *SearchRequest) error {
time.Sleep(50 * time.Millisecond)
return nil
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "2",
ID: "b",
Score: 2.0,
},
},
MaxScore: 2.0,
}}
// first run with absurdly long time out, should succeed
ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
query := NewTermQuery("test")
sr := NewSearchRequest(query)
res, err := MultiSearch(ctx, sr, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 total, got %d", res.Status.Failed)
}
if res.Status.Successful != 2 {
t.Errorf("expected 0 success, got %d", res.Status.Successful)
}
if res.Status.Failed != 0 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if len(res.Status.Errors) != 0 {
t.Errorf("expected 0 errors, got %v", res.Status.Errors)
}
// now run a search again with an absurdly low timeout (should timeout)
ctx, _ = context.WithTimeout(context.Background(), 1*time.Microsecond)
res, err = MultiSearch(ctx, sr, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if res.Status.Successful != 0 {
t.Errorf("expected 0 success, got %d", res.Status.Successful)
}
if res.Status.Failed != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if len(res.Status.Errors) != 2 {
t.Errorf("expected 2 errors, got %v", res.Status.Errors)
} else {
if res.Status.Errors["ei1"].Error() != context.DeadlineExceeded.Error() {
t.Errorf("expected err for 'ei1' to be '%s' got '%s'", context.DeadlineExceeded.Error(), res.Status.Errors["ei1"])
}
if res.Status.Errors["ei2"].Error() != context.DeadlineExceeded.Error() {
t.Errorf("expected err for 'ei2' to be '%s' got '%s'", context.DeadlineExceeded.Error(), res.Status.Errors["ei2"])
}
}
// now run a search again with a normal timeout, but cancel it first
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
cancel()
res, err = MultiSearch(ctx, sr, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if res.Status.Successful != 0 {
t.Errorf("expected 0 success, got %d", res.Status.Successful)
}
if res.Status.Failed != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if len(res.Status.Errors) != 2 {
t.Errorf("expected 2 errors, got %v", res.Status.Errors)
} else {
if res.Status.Errors["ei1"].Error() != context.Canceled.Error() {
t.Errorf("expected err for 'ei1' to be '%s' got '%s'", context.Canceled.Error(), res.Status.Errors["ei1"])
}
if res.Status.Errors["ei2"].Error() != context.Canceled.Error() {
t.Errorf("expected err for 'ei2' to be '%s' got '%s'", context.Canceled.Error(), res.Status.Errors["ei2"])
}
}
}
// TestMultiSearchTimeoutPartial tests the case where some indexes exceed
// the timeout, while others complete successfully
func TestMultiSearchTimeoutPartial(t *testing.T) {
ei1 := &stubIndex{
name: "ei1",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "1",
ID: "a",
Score: 1.0,
},
},
MaxScore: 1.0,
}}
ei2 := &stubIndex{
name: "ei2",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "2",
ID: "b",
Score: 2.0,
},
},
MaxScore: 2.0,
}}
ei3 := &stubIndex{
name: "ei3",
checkRequest: func(req *SearchRequest) error {
time.Sleep(50 * time.Millisecond)
return nil
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "3",
ID: "c",
Score: 3.0,
},
},
MaxScore: 3.0,
}}
// ei3 is set to take >50ms, so run search with timeout less than
// this, this should return partial results
ctx, _ := context.WithTimeout(context.Background(), 25*time.Millisecond)
query := NewTermQuery("test")
sr := NewSearchRequest(query)
expected := &SearchResult{
Status: &SearchStatus{
Total: 3,
Successful: 2,
Failed: 1,
Errors: map[string]error{
"ei3": context.DeadlineExceeded,
},
},
Request: sr,
Total: 2,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
Index: "2",
ID: "b",
Score: 2.0,
},
&search.DocumentMatch{
Index: "1",
ID: "a",
Score: 1.0,
},
},
MaxScore: 2.0,
}
res, err := MultiSearch(ctx, sr, ei1, ei2, ei3)
if err != nil {
t.Fatalf("expected no err, got %v", err)
}
expected.Took = res.Took
if !reflect.DeepEqual(res, expected) {
t.Errorf("expected %#v, got %#v", expected, res)
}
}
func TestIndexAliasMultipleLayer(t *testing.T) {
ei1 := &stubIndex{
name: "ei1",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "1",
ID: "a",
Score: 1.0,
},
},
MaxScore: 1.0,
}}
ei2 := &stubIndex{
name: "ei2",
checkRequest: func(req *SearchRequest) error {
time.Sleep(50 * time.Millisecond)
return nil
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "2",
ID: "b",
Score: 2.0,
},
},
MaxScore: 2.0,
}}
ei3 := &stubIndex{
name: "ei3",
checkRequest: func(req *SearchRequest) error {
time.Sleep(50 * time.Millisecond)
return nil
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "3",
ID: "c",
Score: 3.0,
},
},
MaxScore: 3.0,
}}
ei4 := &stubIndex{
name: "ei4",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
&search.DocumentMatch{
Index: "4",
ID: "d",
Score: 4.0,
},
},
MaxScore: 4.0,
}}
alias1 := NewIndexAlias(ei1, ei2)
alias2 := NewIndexAlias(ei3, ei4)
aliasTop := NewIndexAlias(alias1, alias2)
// ei2 and ei3 have 50ms delay
// search across aliasTop should still get results from ei1 and ei4
// total should still be 4
ctx, _ := context.WithTimeout(context.Background(), 25*time.Millisecond)
query := NewTermQuery("test")
sr := NewSearchRequest(query)
expected := &SearchResult{
Status: &SearchStatus{
Total: 4,
Successful: 2,
Failed: 2,
Errors: map[string]error{
"ei2": context.DeadlineExceeded,
"ei3": context.DeadlineExceeded,
},
},
Request: sr,
Total: 2,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
Index: "4",
ID: "d",
Score: 4.0,
},
&search.DocumentMatch{
Index: "1",
ID: "a",
Score: 1.0,
},
},
MaxScore: 4.0,
}
res, err := aliasTop.SearchInContext(ctx, sr)
if err != nil {
t.Fatalf("expected no err, got %v", err)
}
expected.Took = res.Took
if !reflect.DeepEqual(res, expected) {
t.Errorf("expected %#v, got %#v", expected, res)
}
}
// stubIndex is an Index impl for which all operations
// return the configured error value, unless the
// corresponding operation result value has been
@ -722,6 +1183,10 @@ func (i *stubIndex) DocCount() (uint64, error) {
}
func (i *stubIndex) Search(req *SearchRequest) (*SearchResult, error) {
return i.SearchInContext(context.Background(), req)
}
func (i *stubIndex) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) {
if i.checkRequest != nil {
err := i.checkRequest(req)
if err != nil {
@ -774,6 +1239,10 @@ func (i *stubIndex) Stats() *IndexStat {
return nil
}
func (i *stubIndex) StatsMap() map[string]interface{} {
return nil
}
func (i *stubIndex) GetInternal(key []byte) ([]byte, error) {
return nil, i.err
}

View File

@ -17,6 +17,8 @@ import (
"sync/atomic"
"time"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
@ -32,7 +34,6 @@ type indexImpl struct {
path string
name string
meta *indexMeta
s store.KVStore
i index.Index
m *IndexMapping
mutex sync.RWMutex
@ -72,7 +73,6 @@ func newMemIndex(indexType string, mapping *IndexMapping) (*indexImpl, error) {
if err != nil {
return nil, err
}
rv.stats.indexStat = rv.i.Stats()
// now persist the mapping
mappingBytes, err := json.Marshal(mapping)
@ -108,12 +108,12 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore
}
rv := indexImpl{
path: path,
name: path,
m: mapping,
meta: newIndexMeta(indexType, kvstore, kvconfig),
stats: &IndexStat{},
path: path,
name: path,
m: mapping,
meta: newIndexMeta(indexType, kvstore, kvconfig),
}
rv.stats = &IndexStat{i: &rv}
// at this point there is hope that we can be successful, so save index meta
err = rv.meta.Save(path)
if err != nil {
@ -140,7 +140,6 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore
}
return nil, err
}
rv.stats.indexStat = rv.i.Stats()
// now persist the mapping
mappingBytes, err := json.Marshal(mapping)
@ -162,10 +161,10 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore
func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) {
rv = &indexImpl{
path: path,
name: path,
stats: &IndexStat{},
path: path,
name: path,
}
rv.stats = &IndexStat{i: rv}
rv.meta, err = openIndexMeta(path)
if err != nil {
@ -206,7 +205,6 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
}
return nil, err
}
rv.stats.indexStat = rv.i.Stats()
// now load the mapping
indexReader, err := rv.i.Reader()
@ -227,7 +225,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
var im IndexMapping
err = json.Unmarshal(mappingBytes, &im)
if err != nil {
return nil, err
return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes))
}
// mark the index as open
@ -251,7 +249,11 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
// Advanced returns implementation internals
// necessary ONLY for advanced usage.
func (i *indexImpl) Advanced() (index.Index, store.KVStore, error) {
return i.i, i.s, nil
s, err := i.i.Advanced()
if err != nil {
return nil, nil, err
}
return i.i, s, nil
}
// Mapping returns the IndexMapping in use by this
@ -361,6 +363,12 @@ func (i *indexImpl) DocCount() (uint64, error) {
// Search executes a search request operation.
// Returns a SearchResult object or an error.
func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
return i.SearchInContext(context.Background(), req)
}
// SearchInContext executes a search request operation within the provided
// Context. Returns a SearchResult object or an error.
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
@ -421,7 +429,7 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
collector.SetFacetsBuilder(facetsBuilder)
}
err = collector.Collect(searcher)
err = collector.Collect(ctx, searcher)
if err != nil {
return nil, err
}
@ -462,7 +470,7 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
} else if err == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
panic(fmt.Sprintf("search hit with doc id: '%s' not found in doc lookup", hit.ID))
return nil, ErrorIndexReadInconsistency
}
}
}
@ -472,7 +480,7 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
// FIXME avoid loading doc second time
// if we already loaded it for highlighting
doc, err := indexReader.Document(hit.ID)
if err == nil {
if err == nil && doc != nil {
for _, f := range req.Fields {
for _, docF := range doc.Fields {
if f == "*" || docF.Name() == f {
@ -502,6 +510,10 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
}
}
}
} else if doc == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
return nil, ErrorIndexReadInconsistency
}
}
}
@ -521,6 +533,12 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
Failed: 0,
Successful: 1,
Errors: make(map[string]error),
},
Request: req,
Hits: hits,
Total: collector.Total(),
@ -692,6 +710,10 @@ func (i *indexImpl) Stats() *IndexStat {
return i.stats
}
func (i *indexImpl) StatsMap() map[string]interface{} {
return i.stats.statsMap()
}
func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()

View File

@ -56,9 +56,12 @@ func openIndexMeta(path string) (*indexMeta, error) {
func (i *indexMeta) Save(path string) (err error) {
indexMetaPath := indexMetaPath(path)
// ensure any necessary parent directories exist
err = os.Mkdir(path, 0700)
err = os.MkdirAll(path, 0700)
if err != nil {
return ErrorIndexPathExists
if os.IsExist(err) {
return ErrorIndexPathExists
}
return err
}
metaBytes, err := json.Marshal(i)
if err != nil {

View File

@ -16,16 +16,21 @@ import (
)
type IndexStat struct {
indexStat json.Marshaler
searches uint64
searchTime uint64
i *indexImpl
}
func (is *IndexStat) statsMap() map[string]interface{} {
m := map[string]interface{}{}
m["index"] = is.i.i.StatsMap()
m["searches"] = atomic.LoadUint64(&is.searches)
m["search_time"] = atomic.LoadUint64(&is.searchTime)
return m
}
func (is *IndexStat) MarshalJSON() ([]byte, error) {
m := map[string]interface{}{}
m["index"] = is.indexStat
m["searches"] = atomic.LoadUint64(&is.searches)
m["search_time"] = atomic.LoadUint64(&is.searchTime)
m := is.statsMap()
return json.Marshal(m)
}

View File

@ -21,10 +21,14 @@ import (
"testing"
"time"
"golang.org/x/net/context"
"encoding/json"
"strconv"
"github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func TestCrud(t *testing.T) {
@ -339,6 +343,36 @@ func TestClosedIndex(t *testing.T) {
}
}
type slowQuery struct {
actual Query
delay time.Duration
}
func (s *slowQuery) Boost() float64 {
return s.actual.Boost()
}
func (s *slowQuery) SetBoost(b float64) Query {
return s.actual.SetBoost(b)
}
func (s *slowQuery) Field() string {
return s.actual.Field()
}
func (s *slowQuery) SetField(f string) Query {
return s.actual.SetField(f)
}
func (s *slowQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
time.Sleep(s.delay)
return s.actual.Searcher(i, m, explain)
}
func (s *slowQuery) Validate() error {
return s.actual.Validate()
}
func TestSlowSearch(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")
@ -379,6 +413,11 @@ func TestSlowSearch(t *testing.T) {
t.Errorf("expected to not see slow query logged, but did")
}
sq := &slowQuery{
actual: query,
delay: 50 * time.Millisecond, // on Windows timer resolution is 15ms
}
req.Query = sq
Config.SlowSearchLogThreshold = 1 * time.Microsecond
_, err = index.Search(req)
if err != nil {
@ -593,6 +632,12 @@ func TestBatchString(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := index.NewBatch()
err = batch.Index("a", []byte("{}"))
@ -634,12 +679,24 @@ func TestIndexMetadataRaceBug198(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
done := make(chan struct{})
go func() {
for {
_, err := index.DocCount()
if err != nil {
t.Fatal(err)
select {
case <-done:
return
default:
_, err := index.DocCount()
if err != nil {
t.Fatal(err)
}
}
}
}()
@ -655,7 +712,7 @@ func TestIndexMetadataRaceBug198(t *testing.T) {
t.Fatal(err)
}
}
close(done)
}
func TestIndexCountMatchSearch(t *testing.T) {
@ -1122,6 +1179,12 @@ func TestIndexEmptyDocId(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := map[string]interface{}{
"body": "nodocid",
@ -1379,3 +1442,69 @@ func TestBooleanFieldMappingIssue109(t *testing.T) {
t.Fatal(err)
}
}
func TestSearchTimeout(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
t.Fatal(err)
}
}()
index, err := New("testidx", NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
// first run a search with an absurdly long timeout (should succeeed)
ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
query := NewTermQuery("water")
req := NewSearchRequest(query)
_, err = index.SearchInContext(ctx, req)
if err != nil {
t.Fatal(err)
}
// now run a search again with an absurdly low timeout (should timeout)
ctx, _ = context.WithTimeout(context.Background(), 1*time.Microsecond)
sq := &slowQuery{
actual: query,
delay: 50 * time.Millisecond, // on Windows timer resolution is 15ms
}
req.Query = sq
_, err = index.SearchInContext(ctx, req)
if err != context.DeadlineExceeded {
t.Fatalf("exected %v, got: %v", context.DeadlineExceeded, err)
}
// now run a search with a long timeout, but with a long query, and cancel it
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
sq = &slowQuery{
actual: query,
delay: 100 * time.Millisecond, // on Windows timer resolution is 15ms
}
req = NewSearchRequest(sq)
cancel()
_, err = index.SearchInContext(ctx, req)
if err != context.Canceled {
t.Fatalf("exected %v, got: %v", context.Canceled, err)
}
}
// TestConfigCache exposes a concurrent map write with go 1.6
func TestConfigCache(t *testing.T) {
for i := 0; i < 100; i++ {
go func() {
_, err := Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
if err != nil {
t.Error(err)
}
}()
}
}

View File

@ -75,40 +75,93 @@ func (dm *DocumentMapping) validate(cache *registry.Cache) error {
return nil
}
// analyzerNameForPath attempts to first find the field
// described by this path, then returns the analyzer
// configured for that field
func (dm *DocumentMapping) analyzerNameForPath(path string) string {
pathElements := decodePath(path)
last := false
current := dm
OUTER:
for i, pathElement := range pathElements {
if i == len(pathElements)-1 {
last = true
}
for name, subDocMapping := range current.Properties {
for _, field := range subDocMapping.Fields {
if field.Name == "" && name == pathElement {
if last {
return field.Analyzer
}
current = subDocMapping
continue OUTER
} else if field.Name == pathElement {
if last {
return field.Analyzer
}
current = subDocMapping
continue OUTER
}
}
}
return ""
field := dm.fieldDescribedByPath(path)
if field != nil {
return field.Analyzer
}
return ""
}
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
pathElements := decodePath(path)
if len(pathElements) > 1 {
// easy case, there is more than 1 path element remaining
// the next path element must match a property name
// at this level
for propName, subDocMapping := range dm.Properties {
if propName == pathElements[0] {
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
}
}
} else {
// just 1 path elememnt
// first look for property name with empty field
for propName, subDocMapping := range dm.Properties {
if propName == pathElements[0] {
// found property name match, now look at its fields
for _, field := range subDocMapping.Fields {
if field.Name == "" || field.Name == pathElements[0] {
// match
return field
}
}
}
}
// next, walk the properties again, looking for field overriding the name
for propName, subDocMapping := range dm.Properties {
if propName != pathElements[0] {
// property name isn't a match, but field name could override it
for _, field := range subDocMapping.Fields {
if field.Name == pathElements[0] {
return field
}
}
}
}
}
return nil
}
// documentMappingForPath only returns EXACT matches for a sub document
// or for an explicitly mapped field, if you want to find the
// closest document mapping to a field not explicitly mapped
// use closestDocMapping
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
OUTER:
for i, pathElement := range pathElements {
for name, subDocMapping := range current.Properties {
if name == pathElement {
current = subDocMapping
continue OUTER
}
}
// no subDocMapping matches this pathElement
// only if this is the last element check for field name
if i == len(pathElements)-1 {
for _, field := range current.Fields {
if field.Name == pathElement {
break
}
}
}
return nil
}
return current
}
// closestDocMapping findest the most specific document mapping that matches
// part of the provided path
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
OUTER:
for _, pathElement := range pathElements {
for name, subDocMapping := range current.Properties {
@ -117,12 +170,6 @@ OUTER:
continue OUTER
}
}
for _, field := range current.Fields {
if field.Name == pathElement {
continue OUTER
}
}
return nil
}
return current
}
@ -192,45 +239,56 @@ func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
dm.Fields = append(dm.Fields, fm)
}
// UnmarshalJSON deserializes a JSON representation
// of the DocumentMapping.
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
var tmp struct {
Enabled *bool `json:"enabled"`
Dynamic *bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties"`
Fields []*FieldMapping `json:"fields"`
DefaultAnalyzer string `json:"default_analyzer"`
}
var tmp map[string]json.RawMessage
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
dm.Enabled = true
if tmp.Enabled != nil {
dm.Enabled = *tmp.Enabled
}
dm.Dynamic = true
if tmp.Dynamic != nil {
dm.Dynamic = *tmp.Dynamic
var invalidKeys []string
for k, v := range tmp {
switch k {
case "enabled":
err := json.Unmarshal(v, &dm.Enabled)
if err != nil {
return err
}
case "dynamic":
err := json.Unmarshal(v, &dm.Dynamic)
if err != nil {
return err
}
case "default_analyzer":
err := json.Unmarshal(v, &dm.DefaultAnalyzer)
if err != nil {
return err
}
case "properties":
err := json.Unmarshal(v, &dm.Properties)
if err != nil {
return err
}
case "fields":
err := json.Unmarshal(v, &dm.Fields)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
dm.DefaultAnalyzer = tmp.DefaultAnalyzer
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
}
if tmp.Properties != nil {
dm.Properties = make(map[string]*DocumentMapping, len(tmp.Properties))
}
for propName, propMapping := range tmp.Properties {
dm.Properties[propName] = propMapping
}
if tmp.Fields != nil {
dm.Fields = make([]*FieldMapping, len(tmp.Fields))
}
for i, field := range tmp.Fields {
dm.Fields[i] = field
}
return nil
}
@ -302,6 +360,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
closestDocMapping := dm.closestDocMapping(pathString)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
@ -322,7 +381,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
} else if dm.Dynamic {
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
// first see if it can be parsed by the default date parser
@ -331,25 +390,31 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic()
fieldMapping := newTextFieldMappingDynamic(context.im)
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic()
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
}
case reflect.Float64:
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
return
case reflect.Float64, reflect.Float32:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if dm.Dynamic {
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic()
fieldMapping := newNumericFieldMappingDynamic(context.im)
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
@ -359,9 +424,9 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if dm.Dynamic {
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic()
fieldMapping := newBooleanFieldMappingDynamic(context.im)
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
@ -373,8 +438,8 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, pathString, path, indexes, context)
}
} else if dm.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic()
} else if closestDocMapping.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, pathString, path, indexes, context)
}
default:

View File

@ -10,6 +10,8 @@
package bleve
import (
"encoding/json"
"fmt"
"time"
"github.com/blevesearch/bleve/analysis"
@ -59,10 +61,10 @@ func NewTextFieldMapping() *FieldMapping {
}
}
func newTextFieldMappingDynamic() *FieldMapping {
func newTextFieldMappingDynamic(im *IndexMapping) *FieldMapping {
rv := NewTextFieldMapping()
rv.Store = StoreDynamic
rv.Index = IndexDynamic
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
return rv
}
@ -76,10 +78,10 @@ func NewNumericFieldMapping() *FieldMapping {
}
}
func newNumericFieldMappingDynamic() *FieldMapping {
func newNumericFieldMappingDynamic(im *IndexMapping) *FieldMapping {
rv := NewNumericFieldMapping()
rv.Store = StoreDynamic
rv.Index = IndexDynamic
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
return rv
}
@ -93,10 +95,10 @@ func NewDateTimeFieldMapping() *FieldMapping {
}
}
func newDateTimeFieldMappingDynamic() *FieldMapping {
func newDateTimeFieldMappingDynamic(im *IndexMapping) *FieldMapping {
rv := NewDateTimeFieldMapping()
rv.Store = StoreDynamic
rv.Index = IndexDynamic
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
return rv
}
@ -110,10 +112,10 @@ func NewBooleanFieldMapping() *FieldMapping {
}
}
func newBooleanFieldMappingDynamic() *FieldMapping {
func newBooleanFieldMappingDynamic(im *IndexMapping) *FieldMapping {
rv := NewBooleanFieldMapping()
rv.Store = StoreDynamic
rv.Index = IndexDynamic
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
return rv
}
@ -223,3 +225,67 @@ func getFieldName(pathString string, path []string, fieldMapping *FieldMapping)
}
return fieldName
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
var invalidKeys []string
for k, v := range tmp {
switch k {
case "name":
err := json.Unmarshal(v, &fm.Name)
if err != nil {
return err
}
case "type":
err := json.Unmarshal(v, &fm.Type)
if err != nil {
return err
}
case "analyzer":
err := json.Unmarshal(v, &fm.Analyzer)
if err != nil {
return err
}
case "store":
err := json.Unmarshal(v, &fm.Store)
if err != nil {
return err
}
case "index":
err := json.Unmarshal(v, &fm.Index)
if err != nil {
return err
}
case "include_term_vectors":
err := json.Unmarshal(v, &fm.IncludeTermVectors)
if err != nil {
return err
}
case "include_in_all":
err := json.Unmarshal(v, &fm.IncludeInAll)
if err != nil {
return err
}
case "date_format":
err := json.Unmarshal(v, &fm.DateFormat)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("field mapping contains invalid keys: %v", invalidKeys)
}
return nil
}

View File

@ -11,6 +11,7 @@ package bleve
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer"
@ -20,6 +21,8 @@ import (
"github.com/blevesearch/bleve/registry"
)
var MappingJSONStrict = false
const defaultTypeField = "_type"
const defaultType = "_default"
const defaultField = "_all"
@ -127,6 +130,8 @@ type IndexMapping struct {
DefaultDateTimeParser string `json:"default_datetime_parser"`
DefaultField string `json:"default_field"`
ByteArrayConverter string `json:"byte_array_converter"`
StoreDynamic bool `json:"store_dynamic"`
IndexDynamic bool `json:"index_dynamic"`
CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
cache *registry.Cache
}
@ -230,6 +235,8 @@ func NewIndexMapping() *IndexMapping {
DefaultDateTimeParser: defaultDateTimeParser,
DefaultField: defaultField,
ByteArrayConverter: defaultByteArrayConverter,
IndexDynamic: IndexDynamic,
StoreDynamic: StoreDynamic,
CustomAnalysis: newCustomAnalysis(),
cache: registry.NewCache(),
}
@ -272,86 +279,94 @@ func (im *IndexMapping) mappingForType(docType string) *DocumentMapping {
return docMapping
}
// UnmarshalJSON deserializes a JSON representation of the IndexMapping
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (im *IndexMapping) UnmarshalJSON(data []byte) error {
var tmp struct {
TypeMapping map[string]*DocumentMapping `json:"types"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
TypeField string `json:"type_field"`
DefaultType string `json:"default_type"`
DefaultAnalyzer string `json:"default_analyzer"`
DefaultDateTimeParser string `json:"default_datetime_parser"`
DefaultField string `json:"default_field"`
ByteArrayConverter string `json:"byte_array_converter"`
CustomAnalysis *customAnalysis `json:"analysis"`
}
var tmp map[string]json.RawMessage
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
im.cache = registry.NewCache()
im.CustomAnalysis = newCustomAnalysis()
if tmp.CustomAnalysis != nil {
if tmp.CustomAnalysis.CharFilters != nil {
im.CustomAnalysis.CharFilters = tmp.CustomAnalysis.CharFilters
}
if tmp.CustomAnalysis.Tokenizers != nil {
im.CustomAnalysis.Tokenizers = tmp.CustomAnalysis.Tokenizers
}
if tmp.CustomAnalysis.TokenMaps != nil {
im.CustomAnalysis.TokenMaps = tmp.CustomAnalysis.TokenMaps
}
if tmp.CustomAnalysis.TokenFilters != nil {
im.CustomAnalysis.TokenFilters = tmp.CustomAnalysis.TokenFilters
}
if tmp.CustomAnalysis.Analyzers != nil {
im.CustomAnalysis.Analyzers = tmp.CustomAnalysis.Analyzers
}
if tmp.CustomAnalysis.DateTimeParsers != nil {
im.CustomAnalysis.DateTimeParsers = tmp.CustomAnalysis.DateTimeParsers
}
}
im.TypeField = defaultTypeField
if tmp.TypeField != "" {
im.TypeField = tmp.TypeField
}
im.DefaultType = defaultType
if tmp.DefaultType != "" {
im.DefaultType = tmp.DefaultType
}
im.DefaultAnalyzer = defaultAnalyzer
if tmp.DefaultAnalyzer != "" {
im.DefaultAnalyzer = tmp.DefaultAnalyzer
}
im.DefaultDateTimeParser = defaultDateTimeParser
if tmp.DefaultDateTimeParser != "" {
im.DefaultDateTimeParser = tmp.DefaultDateTimeParser
}
im.DefaultField = defaultField
if tmp.DefaultField != "" {
im.DefaultField = tmp.DefaultField
}
im.ByteArrayConverter = defaultByteArrayConverter
if tmp.ByteArrayConverter != "" {
im.ByteArrayConverter = tmp.ByteArrayConverter
}
im.DefaultMapping = NewDocumentMapping()
if tmp.DefaultMapping != nil {
im.DefaultMapping = tmp.DefaultMapping
im.TypeMapping = make(map[string]*DocumentMapping)
im.StoreDynamic = StoreDynamic
im.IndexDynamic = IndexDynamic
var invalidKeys []string
for k, v := range tmp {
switch k {
case "analysis":
err := json.Unmarshal(v, &im.CustomAnalysis)
if err != nil {
return err
}
case "type_field":
err := json.Unmarshal(v, &im.TypeField)
if err != nil {
return err
}
case "default_type":
err := json.Unmarshal(v, &im.DefaultType)
if err != nil {
return err
}
case "default_analyzer":
err := json.Unmarshal(v, &im.DefaultAnalyzer)
if err != nil {
return err
}
case "default_datetime_parser":
err := json.Unmarshal(v, &im.DefaultDateTimeParser)
if err != nil {
return err
}
case "default_field":
err := json.Unmarshal(v, &im.DefaultField)
if err != nil {
return err
}
case "byte_array_converter":
err := json.Unmarshal(v, &im.ByteArrayConverter)
if err != nil {
return err
}
case "default_mapping":
err := json.Unmarshal(v, &im.DefaultMapping)
if err != nil {
return err
}
case "types":
err := json.Unmarshal(v, &im.TypeMapping)
if err != nil {
return err
}
case "store_dynamic":
err := json.Unmarshal(v, &im.StoreDynamic)
if err != nil {
return err
}
case "index_dynamic":
err := json.Unmarshal(v, &im.IndexDynamic)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
im.TypeMapping = make(map[string]*DocumentMapping, len(tmp.TypeMapping))
for typeName, typeDocMapping := range tmp.TypeMapping {
im.TypeMapping[typeName] = typeDocMapping
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys)
}
err = im.CustomAnalysis.registerAll(im)

View File

@ -11,6 +11,7 @@ package bleve
import (
"encoding/json"
"fmt"
"reflect"
"testing"
@ -231,7 +232,13 @@ func TestMappingForPath(t *testing.T) {
customMapping.Analyzer = "xyz"
customMapping.Name = "nameCustom"
subDocMappingB := NewDocumentMapping()
customFieldX := NewTextFieldMapping()
customFieldX.Analyzer = "analyzerx"
subDocMappingB.AddFieldMappingsAt("desc", customFieldX)
docMappingA.AddFieldMappingsAt("author", enFieldMapping, customMapping)
docMappingA.AddSubDocumentMapping("child", subDocMappingB)
mapping := NewIndexMapping()
mapping.AddDocumentMapping("a", docMappingA)
@ -246,6 +253,11 @@ func TestMappingForPath(t *testing.T) {
t.Errorf("expected '%s' got '%s'", customMapping.Analyzer, analyzerName)
}
analyzerName = mapping.analyzerNameForPath("child.desc")
if analyzerName != customFieldX.Analyzer {
t.Errorf("expected '%s' got '%s'", customFieldX.Analyzer, analyzerName)
}
}
func TestMappingWithTokenizerDeps(t *testing.T) {
@ -327,6 +339,7 @@ func TestMappingWithTokenizerDeps(t *testing.T) {
func TestEnablingDisablingStoringDynamicFields(t *testing.T) {
// first verify that with system defaults, dynamic field is stored
data := map[string]interface{}{
"name": "bleve",
}
@ -342,11 +355,13 @@ func TestEnablingDisablingStoringDynamicFields(t *testing.T) {
}
}
// now change system level defaults, verify dynamic field is not stored
StoreDynamic = false
defer func() {
StoreDynamic = true
}()
mapping = NewIndexMapping()
doc = document.NewDocument("y")
err = mapping.mapDocument(doc, data)
if err != nil {
@ -357,6 +372,20 @@ func TestEnablingDisablingStoringDynamicFields(t *testing.T) {
t.Errorf("expected field 'name' to be not stored, is")
}
}
// now override the system level defaults inside the index mapping
mapping = NewIndexMapping()
mapping.StoreDynamic = true
doc = document.NewDocument("y")
err = mapping.mapDocument(doc, data)
if err != nil {
t.Fatal(err)
}
for _, field := range doc.Fields {
if field.Name() == "name" && !field.Options().IsStored() {
t.Errorf("expected field 'name' to be stored, isn't")
}
}
}
func TestMappingBool(t *testing.T) {
@ -421,3 +450,156 @@ func TestDisableDefaultMapping(t *testing.T) {
t.Errorf("expected no fields, got %d", len(doc.Fields))
}
}
func TestInvalidFieldMappingStrict(t *testing.T) {
mappingBytes := []byte(`{"includeInAll":true,"name":"a parsed name"}`)
// first unmarhsal it without strict
var fm FieldMapping
err := json.Unmarshal(mappingBytes, &fm)
if err != nil {
t.Fatal(err)
}
if fm.Name != "a parsed name" {
t.Fatalf("expect to find field mapping name 'a parsed name', got '%s'", fm.Name)
}
// reset
fm.Name = ""
// now enable strict
MappingJSONStrict = true
defer func() {
MappingJSONStrict = false
}()
expectedInvalidKeys := []string{"includeInAll"}
expectedErr := fmt.Errorf("field mapping contains invalid keys: %v", expectedInvalidKeys)
err = json.Unmarshal(mappingBytes, &fm)
if err.Error() != expectedErr.Error() {
t.Fatalf("expected err: %v, got err: %v", expectedErr, err)
}
if fm.Name != "a parsed name" {
t.Fatalf("expect to find field mapping name 'a parsed name', got '%s'", fm.Name)
}
}
func TestInvalidDocumentMappingStrict(t *testing.T) {
mappingBytes := []byte(`{"defaultAnalyzer":true,"enabled":false}`)
// first unmarhsal it without strict
var dm DocumentMapping
err := json.Unmarshal(mappingBytes, &dm)
if err != nil {
t.Fatal(err)
}
if dm.Enabled != false {
t.Fatalf("expect to find document mapping enabled false, got '%t'", dm.Enabled)
}
// reset
dm.Enabled = true
// now enable strict
MappingJSONStrict = true
defer func() {
MappingJSONStrict = false
}()
expectedInvalidKeys := []string{"defaultAnalyzer"}
expectedErr := fmt.Errorf("document mapping contains invalid keys: %v", expectedInvalidKeys)
err = json.Unmarshal(mappingBytes, &dm)
if err.Error() != expectedErr.Error() {
t.Fatalf("expected err: %v, got err: %v", expectedErr, err)
}
if dm.Enabled != false {
t.Fatalf("expect to find document mapping enabled false, got '%t'", dm.Enabled)
}
}
func TestInvalidIndexMappingStrict(t *testing.T) {
mappingBytes := []byte(`{"typeField":"type","default_field":"all"}`)
// first unmarhsal it without strict
var im IndexMapping
err := json.Unmarshal(mappingBytes, &im)
if err != nil {
t.Fatal(err)
}
if im.DefaultField != "all" {
t.Fatalf("expect to find index mapping default field 'all', got '%s'", im.DefaultField)
}
// reset
im.DefaultField = "_all"
// now enable strict
MappingJSONStrict = true
defer func() {
MappingJSONStrict = false
}()
expectedInvalidKeys := []string{"typeField"}
expectedErr := fmt.Errorf("index mapping contains invalid keys: %v", expectedInvalidKeys)
err = json.Unmarshal(mappingBytes, &im)
if err.Error() != expectedErr.Error() {
t.Fatalf("expected err: %v, got err: %v", expectedErr, err)
}
if im.DefaultField != "all" {
t.Fatalf("expect to find index mapping default field 'all', got '%s'", im.DefaultField)
}
}
func TestMappingBug353(t *testing.T) {
dataBytes := `{
"Reviews": [
{
"ReviewID": "RX16692001",
"Content": "Usually stay near the airport..."
}
],
"Other": {
"Inside": "text"
},
"Name": "The Inn at Baltimore White Marsh"
}`
var data map[string]interface{}
err := json.Unmarshal([]byte(dataBytes), &data)
if err != nil {
t.Fatal(err)
}
reviewContentFieldMapping := NewTextFieldMapping()
reviewContentFieldMapping.Analyzer = "crazy"
reviewsMapping := NewDocumentMapping()
reviewsMapping.Dynamic = false
reviewsMapping.AddFieldMappingsAt("Content", reviewContentFieldMapping)
otherMapping := NewDocumentMapping()
otherMapping.Dynamic = false
mapping := NewIndexMapping()
mapping.DefaultMapping.AddSubDocumentMapping("Reviews", reviewsMapping)
mapping.DefaultMapping.AddSubDocumentMapping("Other", otherMapping)
doc := document.NewDocument("x")
err = mapping.mapDocument(doc, data)
if err != nil {
t.Fatal(err)
}
// expect doc has only 2 fields
if len(doc.Fields) != 2 {
t.Errorf("expected doc with 2 fields, got: %d", len(doc.Fields))
for _, f := range doc.Fields {
t.Logf("field named: %s", f.Name())
}
}
}

View File

@ -192,6 +192,9 @@ func ParseQuery(input []byte) (Query, error) {
if err != nil {
return nil, err
}
if rv.Boost() == 0 {
rv.SetBoost(1)
}
return &rv, nil
}
_, hasWildcard := tmp["wildcard"]
@ -201,6 +204,9 @@ func ParseQuery(input []byte) (Query, error) {
if err != nil {
return nil, err
}
if rv.Boost() == 0 {
rv.SetBoost(1)
}
return &rv, nil
}
_, hasMatchAll := tmp["match_all"]

View File

@ -25,40 +25,47 @@ func RegisterAnalyzer(name string, constructor AnalyzerConstructor) {
type AnalyzerConstructor func(config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error)
type AnalyzerRegistry map[string]AnalyzerConstructor
type AnalyzerCache map[string]*analysis.Analyzer
func (c AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (*analysis.Analyzer, error) {
analyzer, cached := c[name]
if cached {
return analyzer, nil
type AnalyzerCache struct {
*ConcurrentCache
}
func NewAnalyzerCache() *AnalyzerCache {
return &AnalyzerCache{
NewConcurrentCache(),
}
analyzerConstructor, registered := analyzers[name]
}
func AnalyzerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := analyzers[name]
if !registered {
return nil, fmt.Errorf("no analyzer with name or type '%s' registered", name)
}
analyzer, err := analyzerConstructor(nil, cache)
analyzer, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building analyzer: %v", err)
}
c[name] = analyzer
return analyzer, nil
}
func (c AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("analyzer named '%s' already defined", name)
}
analyzerConstructor, registered := analyzers[typ]
if !registered {
return nil, fmt.Errorf("no analyzer type '%s' registered", typ)
}
analyzer, err := analyzerConstructor(config, cache)
func (c *AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (*analysis.Analyzer, error) {
item, err := c.ItemNamed(name, cache, AnalyzerBuild)
if err != nil {
return nil, fmt.Errorf("error building analyzer: %v", err)
return nil, err
}
c[name] = analyzer
return analyzer, nil
return item.(*analysis.Analyzer), nil
}
func (c *AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) {
item, err := c.DefineItem(name, typ, config, cache, AnalyzerBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("analyzer named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(*analysis.Analyzer), nil
}
func AnalyzerTypesAndInstances() ([]string, []string) {

82
registry/cache.go Normal file
View File

@ -0,0 +1,82 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package registry
import (
"fmt"
"sync"
)
var ErrAlreadyDefined = fmt.Errorf("item already defined")
type CacheBuild func(name string, config map[string]interface{}, cache *Cache) (interface{}, error)
type ConcurrentCache struct {
mutex sync.RWMutex
data map[string]interface{}
}
func NewConcurrentCache() *ConcurrentCache {
return &ConcurrentCache{
data: make(map[string]interface{}),
}
}
func (c *ConcurrentCache) ItemNamed(name string, cache *Cache, build CacheBuild) (interface{}, error) {
c.mutex.RLock()
item, cached := c.data[name]
if cached {
c.mutex.RUnlock()
return item, nil
}
// give up read lock
c.mutex.RUnlock()
// try to build it
newItem, err := build(name, nil, cache)
if err != nil {
return nil, err
}
// acquire write lock
c.mutex.Lock()
defer c.mutex.Unlock()
// check again because it could have been created while trading locks
item, cached = c.data[name]
if cached {
return item, nil
}
c.data[name] = newItem
return newItem, nil
}
func (c *ConcurrentCache) DefineItem(name string, typ string, config map[string]interface{}, cache *Cache, build CacheBuild) (interface{}, error) {
c.mutex.RLock()
_, cached := c.data[name]
if cached {
c.mutex.RUnlock()
return nil, ErrAlreadyDefined
}
// give up read lock so others lookups can proceed
c.mutex.RUnlock()
// really not there, try to build it
newItem, err := build(typ, config, cache)
if err != nil {
return nil, err
}
// now we've built it, acquire lock
c.mutex.Lock()
defer c.mutex.Unlock()
// check again because it could have been created while trading locks
_, cached = c.data[name]
if cached {
return nil, ErrAlreadyDefined
}
c.data[name] = newItem
return newItem, nil
}

View File

@ -25,40 +25,47 @@ func RegisterCharFilter(name string, constructor CharFilterConstructor) {
type CharFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.CharFilter, error)
type CharFilterRegistry map[string]CharFilterConstructor
type CharFilterCache map[string]analysis.CharFilter
func (c CharFilterCache) CharFilterNamed(name string, cache *Cache) (analysis.CharFilter, error) {
charFilter, cached := c[name]
if cached {
return charFilter, nil
type CharFilterCache struct {
*ConcurrentCache
}
func NewCharFilterCache() *CharFilterCache {
return &CharFilterCache{
NewConcurrentCache(),
}
charFilterConstructor, registered := charFilters[name]
}
func CharFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := charFilters[name]
if !registered {
return nil, fmt.Errorf("no char filter with name or type '%s' registered", name)
}
charFilter, err := charFilterConstructor(nil, cache)
charFilter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building char filter: %v", err)
}
c[name] = charFilter
return charFilter, nil
}
func (c CharFilterCache) DefineCharFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("char filter named '%s' already defined", name)
}
charFilterConstructor, registered := charFilters[typ]
if !registered {
return nil, fmt.Errorf("no char filter type '%s' registered", typ)
}
charFilter, err := charFilterConstructor(config, cache)
func (c *CharFilterCache) CharFilterNamed(name string, cache *Cache) (analysis.CharFilter, error) {
item, err := c.ItemNamed(name, cache, CharFilterBuild)
if err != nil {
return nil, fmt.Errorf("error building char filter: %v", err)
return nil, err
}
c[name] = charFilter
return charFilter, nil
return item.(analysis.CharFilter), nil
}
func (c *CharFilterCache) DefineCharFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) {
item, err := c.DefineItem(name, typ, config, cache, CharFilterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("char filter named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(analysis.CharFilter), nil
}
func CharFilterTypesAndInstances() ([]string, []string) {

View File

@ -25,42 +25,49 @@ func RegisterDateTimeParser(name string, constructor DateTimeParserConstructor)
type DateTimeParserConstructor func(config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error)
type DateTimeParserRegistry map[string]DateTimeParserConstructor
type DateTimeParserCache map[string]analysis.DateTimeParser
func (c DateTimeParserCache) DateTimeParserNamed(name string, cache *Cache) (analysis.DateTimeParser, error) {
dateTimeParser, cached := c[name]
if cached {
return dateTimeParser, nil
type DateTimeParserCache struct {
*ConcurrentCache
}
func NewDateTimeParserCache() *DateTimeParserCache {
return &DateTimeParserCache{
NewConcurrentCache(),
}
dateTimeParserConstructor, registered := dateTimeParsers[name]
}
func DateTimeParserBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := dateTimeParsers[name]
if !registered {
return nil, fmt.Errorf("no date time parser with name or type '%s' registered", name)
}
dateTimeParser, err := dateTimeParserConstructor(nil, cache)
if err != nil {
return nil, fmt.Errorf("error building date time parse: %v", err)
}
c[name] = dateTimeParser
return dateTimeParser, nil
}
func (c DateTimeParserCache) DefineDateTimeParser(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("date time parser named '%s' already defined", name)
}
dateTimeParserConstructor, registered := dateTimeParsers[typ]
if !registered {
return nil, fmt.Errorf("no date time parser type '%s' registered", typ)
}
dateTimeParser, err := dateTimeParserConstructor(config, cache)
dateTimeParser, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building date time parser: %v", err)
}
c[name] = dateTimeParser
return dateTimeParser, nil
}
func (c *DateTimeParserCache) DateTimeParserNamed(name string, cache *Cache) (analysis.DateTimeParser, error) {
item, err := c.ItemNamed(name, cache, DateTimeParserBuild)
if err != nil {
return nil, err
}
return item.(analysis.DateTimeParser), nil
}
func (c *DateTimeParserCache) DefineDateTimeParser(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error) {
item, err := c.DefineItem(name, typ, config, cache, DateTimeParserBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("date time parser named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(analysis.DateTimeParser), nil
}
func DateTimeParserTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()

View File

@ -25,40 +25,47 @@ func RegisterFragmentFormatter(name string, constructor FragmentFormatterConstru
type FragmentFormatterConstructor func(config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error)
type FragmentFormatterRegistry map[string]FragmentFormatterConstructor
type FragmentFormatterCache map[string]highlight.FragmentFormatter
func (c FragmentFormatterCache) FragmentFormatterNamed(name string, cache *Cache) (highlight.FragmentFormatter, error) {
fragmentFormatter, cached := c[name]
if cached {
return fragmentFormatter, nil
type FragmentFormatterCache struct {
*ConcurrentCache
}
func NewFragmentFormatterCache() *FragmentFormatterCache {
return &FragmentFormatterCache{
NewConcurrentCache(),
}
fragmentFormatterConstructor, registered := fragmentFormatters[name]
}
func FragmentFormatterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := fragmentFormatters[name]
if !registered {
return nil, fmt.Errorf("no fragment formatter with name or type '%s' registered", name)
}
fragmentFormatter, err := fragmentFormatterConstructor(nil, cache)
fragmentFormatter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
c[name] = fragmentFormatter
return fragmentFormatter, nil
}
func (c FragmentFormatterCache) DefineFragmentFormatter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("fragment formatter named '%s' already defined", name)
}
fragmentFormatterConstructor, registered := fragmentFormatters[typ]
if !registered {
return nil, fmt.Errorf("no fragment formatter type '%s' registered", typ)
}
fragmentFormatter, err := fragmentFormatterConstructor(config, cache)
func (c *FragmentFormatterCache) FragmentFormatterNamed(name string, cache *Cache) (highlight.FragmentFormatter, error) {
item, err := c.ItemNamed(name, cache, FragmentFormatterBuild)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
return nil, err
}
c[name] = fragmentFormatter
return fragmentFormatter, nil
return item.(highlight.FragmentFormatter), nil
}
func (c *FragmentFormatterCache) DefineFragmentFormatter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error) {
item, err := c.DefineItem(name, typ, config, cache, FragmentFormatterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("fragment formatter named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(highlight.FragmentFormatter), nil
}
func FragmentFormatterTypesAndInstances() ([]string, []string) {

View File

@ -25,40 +25,47 @@ func RegisterFragmenter(name string, constructor FragmenterConstructor) {
type FragmenterConstructor func(config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error)
type FragmenterRegistry map[string]FragmenterConstructor
type FragmenterCache map[string]highlight.Fragmenter
func (c FragmenterCache) FragmenterNamed(name string, cache *Cache) (highlight.Fragmenter, error) {
fragmenter, cached := c[name]
if cached {
return fragmenter, nil
type FragmenterCache struct {
*ConcurrentCache
}
func NewFragmenterCache() *FragmenterCache {
return &FragmenterCache{
NewConcurrentCache(),
}
fragmenterConstructor, registered := fragmenters[name]
}
func FragmenterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := fragmenters[name]
if !registered {
return nil, fmt.Errorf("no fragmenter with name or type '%s' registered", name)
}
fragmenter, err := fragmenterConstructor(nil, cache)
fragmenter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
c[name] = fragmenter
return fragmenter, nil
}
func (c FragmenterCache) DefineFragmenter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("fragmenter named '%s' already defined", name)
}
fragmenterConstructor, registered := fragmenters[typ]
if !registered {
return nil, fmt.Errorf("no fragmenter type '%s' registered", typ)
}
fragmenter, err := fragmenterConstructor(config, cache)
func (c *FragmenterCache) FragmenterNamed(name string, cache *Cache) (highlight.Fragmenter, error) {
item, err := c.ItemNamed(name, cache, FragmenterBuild)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
return nil, err
}
c[name] = fragmenter
return fragmenter, nil
return item.(highlight.Fragmenter), nil
}
func (c *FragmenterCache) DefineFragmenter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error) {
item, err := c.DefineItem(name, typ, config, cache, FragmenterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("fragmenter named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(highlight.Fragmenter), nil
}
func FragmenterTypesAndInstances() ([]string, []string) {

View File

@ -25,40 +25,47 @@ func RegisterHighlighter(name string, constructor HighlighterConstructor) {
type HighlighterConstructor func(config map[string]interface{}, cache *Cache) (highlight.Highlighter, error)
type HighlighterRegistry map[string]HighlighterConstructor
type HighlighterCache map[string]highlight.Highlighter
func (c HighlighterCache) HighlighterNamed(name string, cache *Cache) (highlight.Highlighter, error) {
highlighter, cached := c[name]
if cached {
return highlighter, nil
type HighlighterCache struct {
*ConcurrentCache
}
func NewHighlighterCache() *HighlighterCache {
return &HighlighterCache{
NewConcurrentCache(),
}
highlighterConstructor, registered := highlighters[name]
}
func HighlighterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := highlighters[name]
if !registered {
return nil, fmt.Errorf("no highlighter with name or type '%s' registered", name)
}
highlighter, err := highlighterConstructor(nil, cache)
highlighter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building highlighter: %v", err)
}
c[name] = highlighter
return highlighter, nil
}
func (c HighlighterCache) DefineHighlighter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Highlighter, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("highlighter named '%s' already defined", name)
}
highlighterConstructor, registered := highlighters[typ]
if !registered {
return nil, fmt.Errorf("no highlighter type '%s' registered", typ)
}
highlighter, err := highlighterConstructor(config, cache)
func (c *HighlighterCache) HighlighterNamed(name string, cache *Cache) (highlight.Highlighter, error) {
item, err := c.ItemNamed(name, cache, HighlighterBuild)
if err != nil {
return nil, fmt.Errorf("error building highlighter: %v", err)
return nil, err
}
c[name] = highlighter
return highlighter, nil
return item.(highlight.Highlighter), nil
}
func (c *HighlighterCache) DefineHighlighter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Highlighter, error) {
item, err := c.DefineItem(name, typ, config, cache, HighlighterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("highlighter named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(highlight.Highlighter), nil
}
func HighlighterTypesAndInstances() ([]string, []string) {

View File

@ -35,28 +35,28 @@ var analyzers = make(AnalyzerRegistry, 0)
var dateTimeParsers = make(DateTimeParserRegistry, 0)
type Cache struct {
CharFilters CharFilterCache
Tokenizers TokenizerCache
TokenMaps TokenMapCache
TokenFilters TokenFilterCache
Analyzers AnalyzerCache
DateTimeParsers DateTimeParserCache
FragmentFormatters FragmentFormatterCache
Fragmenters FragmenterCache
Highlighters HighlighterCache
CharFilters *CharFilterCache
Tokenizers *TokenizerCache
TokenMaps *TokenMapCache
TokenFilters *TokenFilterCache
Analyzers *AnalyzerCache
DateTimeParsers *DateTimeParserCache
FragmentFormatters *FragmentFormatterCache
Fragmenters *FragmenterCache
Highlighters *HighlighterCache
}
func NewCache() *Cache {
return &Cache{
CharFilters: make(CharFilterCache, 0),
Tokenizers: make(TokenizerCache, 0),
TokenMaps: make(TokenMapCache, 0),
TokenFilters: make(TokenFilterCache, 0),
Analyzers: make(AnalyzerCache, 0),
DateTimeParsers: make(DateTimeParserCache, 0),
FragmentFormatters: make(FragmentFormatterCache, 0),
Fragmenters: make(FragmenterCache, 0),
Highlighters: make(HighlighterCache, 0),
CharFilters: NewCharFilterCache(),
Tokenizers: NewTokenizerCache(),
TokenMaps: NewTokenMapCache(),
TokenFilters: NewTokenFilterCache(),
Analyzers: NewAnalyzerCache(),
DateTimeParsers: NewDateTimeParserCache(),
FragmentFormatters: NewFragmentFormatterCache(),
Fragmenters: NewFragmenterCache(),
Highlighters: NewHighlighterCache(),
}
}

View File

@ -25,40 +25,47 @@ func RegisterTokenFilter(name string, constructor TokenFilterConstructor) {
type TokenFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error)
type TokenFilterRegistry map[string]TokenFilterConstructor
type TokenFilterCache map[string]analysis.TokenFilter
func (c TokenFilterCache) TokenFilterNamed(name string, cache *Cache) (analysis.TokenFilter, error) {
tokenFilter, cached := c[name]
if cached {
return tokenFilter, nil
type TokenFilterCache struct {
*ConcurrentCache
}
func NewTokenFilterCache() *TokenFilterCache {
return &TokenFilterCache{
NewConcurrentCache(),
}
tokenFilterConstructor, registered := tokenFilters[name]
}
func TokenFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := tokenFilters[name]
if !registered {
return nil, fmt.Errorf("no token filter with name or type '%s' registered", name)
}
tokenFilter, err := tokenFilterConstructor(nil, cache)
tokenFilter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building token filter: %v", err)
}
c[name] = tokenFilter
return tokenFilter, nil
}
func (c TokenFilterCache) DefineTokenFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("token filter named '%s' already defined", name)
}
tokenFilterConstructor, registered := tokenFilters[typ]
if !registered {
return nil, fmt.Errorf("no token filter type '%s' registered", typ)
}
tokenFilter, err := tokenFilterConstructor(config, cache)
func (c *TokenFilterCache) TokenFilterNamed(name string, cache *Cache) (analysis.TokenFilter, error) {
item, err := c.ItemNamed(name, cache, TokenFilterBuild)
if err != nil {
return nil, fmt.Errorf("error building token filter: %v", err)
return nil, err
}
c[name] = tokenFilter
return tokenFilter, nil
return item.(analysis.TokenFilter), nil
}
func (c *TokenFilterCache) DefineTokenFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error) {
item, err := c.DefineItem(name, typ, config, cache, TokenFilterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("token filter named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(analysis.TokenFilter), nil
}
func TokenFilterTypesAndInstances() ([]string, []string) {

View File

@ -25,40 +25,47 @@ func RegisterTokenMap(name string, constructor TokenMapConstructor) {
type TokenMapConstructor func(config map[string]interface{}, cache *Cache) (analysis.TokenMap, error)
type TokenMapRegistry map[string]TokenMapConstructor
type TokenMapCache map[string]analysis.TokenMap
func (c TokenMapCache) TokenMapNamed(name string, cache *Cache) (analysis.TokenMap, error) {
tokenMap, cached := c[name]
if cached {
return tokenMap, nil
type TokenMapCache struct {
*ConcurrentCache
}
func NewTokenMapCache() *TokenMapCache {
return &TokenMapCache{
NewConcurrentCache(),
}
tokenMapConstructor, registered := tokenMaps[name]
}
func TokenMapBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := tokenMaps[name]
if !registered {
return nil, fmt.Errorf("no token map with name or type '%s' registered", name)
}
tokenMap, err := tokenMapConstructor(nil, cache)
tokenMap, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building token map: %v", err)
}
c[name] = tokenMap
return tokenMap, nil
}
func (c TokenMapCache) DefineTokenMap(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenMap, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("token map named '%s' already defined", name)
}
tokenMapConstructor, registered := tokenMaps[typ]
if !registered {
return nil, fmt.Errorf("no token map type '%s' registered", typ)
}
tokenMap, err := tokenMapConstructor(config, cache)
func (c *TokenMapCache) TokenMapNamed(name string, cache *Cache) (analysis.TokenMap, error) {
item, err := c.ItemNamed(name, cache, TokenMapBuild)
if err != nil {
return nil, fmt.Errorf("error building token map: %v", err)
return nil, err
}
c[name] = tokenMap
return tokenMap, nil
return item.(analysis.TokenMap), nil
}
func (c *TokenMapCache) DefineTokenMap(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenMap, error) {
item, err := c.DefineItem(name, typ, config, cache, TokenMapBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("token map named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(analysis.TokenMap), nil
}
func TokenMapTypesAndInstances() ([]string, []string) {

View File

@ -25,40 +25,47 @@ func RegisterTokenizer(name string, constructor TokenizerConstructor) {
type TokenizerConstructor func(config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error)
type TokenizerRegistry map[string]TokenizerConstructor
type TokenizerCache map[string]analysis.Tokenizer
func (c TokenizerCache) TokenizerNamed(name string, cache *Cache) (analysis.Tokenizer, error) {
tokenizer, cached := c[name]
if cached {
return tokenizer, nil
type TokenizerCache struct {
*ConcurrentCache
}
func NewTokenizerCache() *TokenizerCache {
return &TokenizerCache{
NewConcurrentCache(),
}
tokenizerConstructor, registered := tokenizers[name]
}
func TokenizerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := tokenizers[name]
if !registered {
return nil, fmt.Errorf("no tokenizer with name or type '%s' registered", name)
}
tokenizer, err := tokenizerConstructor(nil, cache)
tokenizer, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building tokenizer '%s': %v", name, err)
return nil, fmt.Errorf("error building tokenizer: %v", err)
}
c[name] = tokenizer
return tokenizer, nil
}
func (c TokenizerCache) DefineTokenizer(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error) {
_, cached := c[name]
if cached {
return nil, fmt.Errorf("tokenizer named '%s' already defined", name)
}
tokenizerConstructor, registered := tokenizers[typ]
if !registered {
return nil, fmt.Errorf("no tokenizer type '%s' registered", typ)
}
tokenizer, err := tokenizerConstructor(config, cache)
func (c *TokenizerCache) TokenizerNamed(name string, cache *Cache) (analysis.Tokenizer, error) {
item, err := c.ItemNamed(name, cache, TokenizerBuild)
if err != nil {
return nil, fmt.Errorf("error building tokenizer '%s': %v", name, err)
return nil, err
}
c[name] = tokenizer
return tokenizer, nil
return item.(analysis.Tokenizer), nil
}
func (c *TokenizerCache) DefineTokenizer(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error) {
item, err := c.DefineItem(name, typ, config, cache, TokenizerBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("tokenizer named '%s' already defined", name)
} else {
return nil, err
}
}
return item.(analysis.Tokenizer), nil
}
func TokenizerTypesAndInstances() ([]string, []string) {

View File

@ -236,9 +236,47 @@ func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchReque
}
}
// IndexErrMap tracks errors with the name of the index where it occurred
type IndexErrMap map[string]error
// MarshalJSON seralizes the error into a string for JSON consumption
func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
tmp := make(map[string]string, len(iem))
for k, v := range iem {
tmp[k] = v.Error()
}
return json.Marshal(tmp)
}
// SearchStatus is a secion in the SearchResult reporting how many
// underlying indexes were queried, how many were successful/failed
// and a map of any errors that were encountered
type SearchStatus struct {
Total int `json:"total"`
Failed int `json:"failed"`
Successful int `json:"successful"`
Errors IndexErrMap `json:"errors,omitempty"`
}
// Merge will merge together multiple SearchStatuses during a MultiSearch
func (ss *SearchStatus) Merge(other *SearchStatus) {
ss.Total += other.Total
ss.Failed += other.Failed
ss.Successful += other.Successful
if len(other.Errors) > 0 {
if ss.Errors == nil {
ss.Errors = make(map[string]error)
}
for otherIndex, otherError := range other.Errors {
ss.Errors[otherIndex] = otherError
}
}
}
// A SearchResult describes the results of executing
// a SearchRequest.
type SearchResult struct {
Status *SearchStatus `json:"status"`
Request *SearchRequest `json:"request"`
Hits search.DocumentMatchCollection `json:"hits"`
Total uint64 `json:"total_hits"`
@ -288,7 +326,9 @@ func (sr *SearchResult) String() string {
return rv
}
// Merge will merge together multiple SearchResults during a MultiSearch
func (sr *SearchResult) Merge(other *SearchResult) {
sr.Status.Merge(other.Status)
sr.Hits = append(sr.Hits, other.Hits...)
sr.Total += other.Total
if other.MaxScore > sr.MaxScore {

View File

@ -11,10 +11,12 @@ package search
import (
"time"
"golang.org/x/net/context"
)
type Collector interface {
Collect(searcher Searcher) error
Collect(ctx context.Context, searcher Searcher) error
Results() DocumentMatchCollection
Total() uint64
MaxScore() float64

View File

@ -13,6 +13,8 @@ import (
"container/list"
"time"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/search"
)
@ -54,19 +56,31 @@ func (tksc *TopScoreCollector) Took() time.Duration {
return tksc.took
}
func (tksc *TopScoreCollector) Collect(searcher search.Searcher) error {
func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error {
startTime := time.Now()
next, err := searcher.Next()
for err == nil && next != nil {
tksc.collectSingle(next)
if tksc.facetsBuilder != nil {
err = tksc.facetsBuilder.Update(next)
if err != nil {
break
}
}
var err error
var next *search.DocumentMatch
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next()
}
for err == nil && next != nil {
select {
case <-ctx.Done():
return ctx.Err()
default:
tksc.collectSingle(next)
if tksc.facetsBuilder != nil {
err = tksc.facetsBuilder.Update(next)
if err != nil {
break
}
}
next, err = searcher.Next()
}
}
// compute search duration
tksc.took = time.Since(startTime)
if err != nil {

View File

@ -14,6 +14,8 @@ import (
"strconv"
"testing"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/search"
)
@ -84,7 +86,7 @@ func TestTop10Scores(t *testing.T) {
}
collector := NewTopScorerCollector(10)
err := collector.Collect(searcher)
err := collector.Collect(context.Background(), searcher)
if err != nil {
t.Fatal(err)
}
@ -192,7 +194,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
}
collector := NewTopScorerSkipCollector(10, 10)
err := collector.Collect(searcher)
err := collector.Collect(context.Background(), searcher)
if err != nil {
t.Fatal(err)
}
@ -238,7 +240,7 @@ func BenchmarkTop10of100000Scores(b *testing.B) {
collector := NewTopScorerCollector(10)
b.ResetTimer()
err := collector.Collect(searcher)
err := collector.Collect(context.Background(), searcher)
if err != nil {
b.Fatal(err)
}

View File

@ -10,7 +10,7 @@
package facets
import (
"container/list"
"sort"
"time"
"github.com/blevesearch/bleve/index"
@ -90,11 +90,8 @@ func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
Missing: fb.missing,
}
// FIXME better implementation needed here this is quick and dirty
topN := list.New()
rv.DateRanges = make([]*search.DateRangeFacet, 0, len(fb.termsCount))
// walk entries and find top N
OUTER:
for term, count := range fb.termsCount {
dateRange := fb.ranges[term]
tf := &search.DateRangeFacet{
@ -109,37 +106,19 @@ OUTER:
end := dateRange.end.Format(time.RFC3339Nano)
tf.End = &end
}
for e := topN.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DateRangeFacet)
if tf.Count < curr.Count {
topN.InsertBefore(tf, e)
// if we just made the list too long
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
continue OUTER
}
}
// if we got to the end, we still have to add it
topN.PushBack(tf)
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
rv.DateRanges = append(rv.DateRanges, tf)
}
sort.Sort(rv.DateRanges)
// we now have the list of the top N facets
rv.DateRanges = make([]*search.DateRangeFacet, topN.Len())
i := 0
if fb.size < len(rv.DateRanges) {
rv.DateRanges = rv.DateRanges[:fb.size]
}
notOther := 0
for e := topN.Back(); e != nil; e = e.Prev() {
rv.DateRanges[i] = e.Value.(*search.DateRangeFacet)
i++
notOther += e.Value.(*search.DateRangeFacet).Count
for _, nr := range rv.DateRanges {
notOther += nr.Count
}
rv.Other = fb.total - notOther

View File

@ -66,9 +66,14 @@ func (tf TermFacets) Add(termFacet *TermFacet) TermFacets {
return tf
}
func (tf TermFacets) Len() int { return len(tf) }
func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf TermFacets) Less(i, j int) bool { return tf[i].Count > tf[j].Count }
func (tf TermFacets) Len() int { return len(tf) }
func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf TermFacets) Less(i, j int) bool {
if tf[i].Count == tf[j].Count {
return tf[i].Term < tf[j].Term
}
return tf[i].Count > tf[j].Count
}
type NumericRangeFacet struct {
Name string `json:"name"`
@ -91,9 +96,14 @@ func (nrf NumericRangeFacets) Add(numericRangeFacet *NumericRangeFacet) NumericR
return nrf
}
func (nrf NumericRangeFacets) Len() int { return len(nrf) }
func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] }
func (nrf NumericRangeFacets) Less(i, j int) bool { return nrf[i].Count > nrf[j].Count }
func (nrf NumericRangeFacets) Len() int { return len(nrf) }
func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] }
func (nrf NumericRangeFacets) Less(i, j int) bool {
if nrf[i].Count == nrf[j].Count {
return nrf[i].Name < nrf[j].Name
}
return nrf[i].Count > nrf[j].Count
}
type DateRangeFacet struct {
Name string `json:"name"`
@ -102,11 +112,34 @@ type DateRangeFacet struct {
Count int `json:"count"`
}
func (drf *DateRangeFacet) Same(other *DateRangeFacet) bool {
if drf.Start == nil && other.Start != nil {
return false
}
if drf.Start != nil && other.Start == nil {
return false
}
if drf.Start != nil && other.Start != nil && *drf.Start != *other.Start {
return false
}
if drf.End == nil && other.End != nil {
return false
}
if drf.End != nil && other.End == nil {
return false
}
if drf.End != nil && other.End != nil && *drf.End != *other.End {
return false
}
return true
}
type DateRangeFacets []*DateRangeFacet
func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets {
for _, existingDr := range drf {
if dateRangeFacet.Start == existingDr.Start && dateRangeFacet.End == existingDr.End {
if dateRangeFacet.Same(existingDr) {
existingDr.Count += dateRangeFacet.Count
return drf
}
@ -116,9 +149,14 @@ func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets {
return drf
}
func (drf DateRangeFacets) Len() int { return len(drf) }
func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] }
func (drf DateRangeFacets) Less(i, j int) bool { return drf[i].Count > drf[j].Count }
func (drf DateRangeFacets) Len() int { return len(drf) }
func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] }
func (drf DateRangeFacets) Less(i, j int) bool {
if drf[i].Count == drf[j].Count {
return drf[i].Name < drf[j].Name
}
return drf[i].Count > drf[j].Count
}
type FacetResult struct {
Field string `json:"field"`

View File

@ -208,6 +208,12 @@ func TestDateFacetResultsMerge(t *testing.T) {
medhi := "2011-01-01"
hihigher := "2012-01-01"
// why second copy? the pointer are to strings done by date time parsing
// inside the facet generation, so comparing pointers will not work
lowmed2 := "2010-01-01"
medhi2 := "2011-01-01"
hihigher2 := "2012-01-01"
fr1 := &FacetResult{
Field: "birthday",
Total: 100,
@ -245,18 +251,18 @@ func TestDateFacetResultsMerge(t *testing.T) {
DateRanges: []*DateRangeFacet{
&DateRangeFacet{
Name: "low",
End: &lowmed,
End: &lowmed2,
Count: 25,
},
&DateRangeFacet{
Name: "med",
Start: &lowmed,
End: &medhi,
Start: &lowmed2,
End: &medhi2,
Count: 22,
},
&DateRangeFacet{
Name: "highest",
Start: &hihigher,
Start: &hihigher2,
Count: 3,
},
},

View File

@ -35,6 +35,10 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h
if termLocation == nil {
continue
}
// make sure the array positions match
if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}

View File

@ -38,6 +38,10 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h
if termLocation == nil {
continue
}
// make sure the array positions match
if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}

View File

@ -32,7 +32,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) {
OUTER:
for _, locations := range s.tlm {
for _, location := range locations {
if sameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0
// once we find a term in the fragment
// don't care about additional matches

View File

@ -80,10 +80,9 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
if f.Name() == field {
_, ok := f.(*document.TextField)
if ok {
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
for _, otl := range orderedTermLocations {
if sameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
}
}
@ -152,18 +151,6 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
return formattedFragments
}
func sameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool {
if len(fieldArrayPositions) != len(termLocationArrayPositions) {
return false
}
for i := 0; i < len(fieldArrayPositions); i++ {
if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) {
return false
}
}
return true
}
// FragmentQueue implements heap.Interface and holds Items.
type FragmentQueue []*highlight.Fragment

View File

@ -98,3 +98,15 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
sort.Sort(rv)
return rv
}
func SameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool {
if len(fieldArrayPositions) != len(termLocationArrayPositions) {
return false
}
for i := 0; i < len(fieldArrayPositions); i++ {
if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) {
return false
}
}
return true
}

View File

@ -10,6 +10,7 @@
package searchers
import (
"fmt"
"math"
"sort"
@ -18,6 +19,11 @@ import (
"github.com/blevesearch/bleve/search/scorers"
)
// DisjunctionMaxClauseCount is a compile time setting that applications can
// adjust to non-zero value to cause the DisjunctionSearcher to return an
// error instead of exeucting searches when the size exceeds this value.
var DisjunctionMaxClauseCount = 0
type DisjunctionSearcher struct {
initialized bool
indexReader index.IndexReader
@ -30,6 +36,9 @@ type DisjunctionSearcher struct {
}
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) {
if DisjunctionMaxClauseCount != 0 && len(qsearchers) > DisjunctionMaxClauseCount {
return nil, fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", DisjunctionMaxClauseCount)
}
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {

View File

@ -166,3 +166,41 @@ func TestDisjunctionAdvance(t *testing.T) {
t.Errorf("expected 3, got nil")
}
}
func TestDisjunctionSearchTooMany(t *testing.T) {
// set to max to a low non-zero value
DisjunctionMaxClauseCount = 2
defer func() {
// reset it after the test
DisjunctionMaxClauseCount = 0
}()
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
_, err = NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher, steveTermSearcher}, 0, true)
if err == nil {
t.Fatal(err)
}
}

View File

@ -55,11 +55,16 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
return nil, err
}
err = fieldDict.Close()
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
if err != nil {
return nil, err
}

View File

@ -61,7 +61,7 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *f
qsearchers := make([]search.Searcher, len(terms))
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, 1.0, explain)
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, boost, explain)
if err != nil {
return nil, err
}

View File

@ -51,13 +51,18 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, fi
if err != nil {
return nil, err
}
err = fieldDict.Close()
if err != nil {
return nil, err
}
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
if err != nil {
return nil, err
}

View File

@ -37,6 +37,12 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field s
qsearchers = append(qsearchers, qsearcher)
tfd, err = fieldDict.Next()
}
err = fieldDict.Close()
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {

View File

@ -73,6 +73,11 @@ func TestSearchResultString(t *testing.T) {
func TestSearchResultMerge(t *testing.T) {
l := &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
MaxScore: 1,
Hits: search.DocumentMatchCollection{
@ -84,6 +89,11 @@ func TestSearchResultMerge(t *testing.T) {
}
r := &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
MaxScore: 2,
Hits: search.DocumentMatchCollection{
@ -95,6 +105,11 @@ func TestSearchResultMerge(t *testing.T) {
}
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Total: 2,
MaxScore: 2,
Hits: search.DocumentMatchCollection{

View File

@ -13,6 +13,7 @@ import (
"encoding/json"
"flag"
"io/ioutil"
"math"
"os"
"path/filepath"
"reflect"
@ -25,19 +26,25 @@ import (
// we must explicitly include any functionality we plan on testing
_ "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
// allow choosing alternate kvstores
_ "github.com/blevesearch/bleve/config"
)
var dataset = flag.String("dataset", "", "only test datasets matching this regex")
var onlynum = flag.Int("testnum", -1, "only run the test with this number")
var keepIndex = flag.Bool("keepIndex", false, "keep the index after testing")
var indexType = flag.String("indexType", bleve.Config.DefaultIndexType, "index type to build")
var kvType = flag.String("kvType", bleve.Config.DefaultKVStore, "kv store type to build")
func TestIntegration(t *testing.T) {
flag.Parse()
bleve.Config.DefaultIndexType = *indexType
t.Logf("using index type %s", *indexType)
bleve.Config.DefaultKVStore = *kvType
t.Logf("using index type %s and kv type %s", *indexType, *kvType)
var err error
var datasetRegexp *regexp.Regexp
@ -152,48 +159,54 @@ func runTestDir(t *testing.T, dir, datasetName string) {
// run the searches
for testNum, search := range searches {
res, err := index.Search(search.Search)
if err != nil {
t.Errorf("error running search: %v", err)
}
if res.Total != search.Result.Total {
t.Errorf("test %d - expected total: %d got %d", testNum, search.Result.Total, res.Total)
continue
}
if len(res.Hits) != len(search.Result.Hits) {
t.Errorf("test %d - expected hits len: %d got %d", testNum, len(search.Result.Hits), len(res.Hits))
continue
}
for hi, hit := range search.Result.Hits {
if hit.ID != res.Hits[hi].ID {
t.Errorf("test %d - expected hit %d to have ID %s got %s", testNum, hi, hit.ID, res.Hits[hi].ID)
if *onlynum < 0 || (*onlynum > 0 && testNum == *onlynum) {
res, err := index.Search(search.Search)
if err != nil {
t.Errorf("error running search: %v", err)
}
if hit.Fields != nil {
if !reflect.DeepEqual(hit.Fields, res.Hits[hi].Fields) {
t.Errorf("test %d - expected hit %d to have fields %#v got %#v", testNum, hi, hit.Fields, res.Hits[hi].Fields)
if res.Total != search.Result.Total {
t.Errorf("test %d - expected total: %d got %d", testNum, search.Result.Total, res.Total)
continue
}
if len(res.Hits) != len(search.Result.Hits) {
t.Errorf("test %d - expected hits len: %d got %d", testNum, len(search.Result.Hits), len(res.Hits))
continue
}
for hi, hit := range search.Result.Hits {
if hit.ID != res.Hits[hi].ID {
t.Errorf("test %d - expected hit %d to have ID %s got %s", testNum, hi, hit.ID, res.Hits[hi].ID)
}
if hit.Fields != nil {
if !reflect.DeepEqual(hit.Fields, res.Hits[hi].Fields) {
t.Errorf("test %d - expected hit %d to have fields %#v got %#v", testNum, hi, hit.Fields, res.Hits[hi].Fields)
}
}
if hit.Fragments != nil {
if !reflect.DeepEqual(hit.Fragments, res.Hits[hi].Fragments) {
t.Errorf("test %d - expected hit %d to have fragments %#v got %#v", testNum, hi, hit.Fragments, res.Hits[hi].Fragments)
}
}
if hit.Locations != nil {
if !reflect.DeepEqual(hit.Locations, res.Hits[hi].Locations) {
t.Errorf("test %d - expected hit %d to have locations %v got %v", testNum, hi, hit.Locations, res.Hits[hi].Locations)
}
}
// assert that none of the scores were NaN,+Inf,-Inf
if math.IsInf(res.Hits[hi].Score, 0) || math.IsNaN(res.Hits[hi].Score) {
t.Errorf("test %d - invalid score %f", testNum, res.Hits[hi].Score)
}
}
if hit.Fragments != nil {
if !reflect.DeepEqual(hit.Fragments, res.Hits[hi].Fragments) {
t.Errorf("test %d - expected hit %d to have fragments %#v got %#v", testNum, hi, hit.Fragments, res.Hits[hi].Fragments)
if search.Result.Facets != nil {
if !reflect.DeepEqual(search.Result.Facets, res.Facets) {
t.Errorf("test %d - expected facets: %#v got %#v", testNum, search.Result.Facets, res.Facets)
}
}
if hit.Locations != nil {
if !reflect.DeepEqual(hit.Locations, res.Hits[hi].Locations) {
t.Errorf("test %d - expected hit %d to have locations %v got %v", testNum, hi, hit.Locations, res.Hits[hi].Locations)
}
}
}
if search.Result.Facets != nil {
if !reflect.DeepEqual(search.Result.Facets, res.Facets) {
t.Errorf("test %d - expected facets: %#v got %#v", testNum, search.Result.Facets, res.Facets)
}
}
// check that custom index name is in results
for _, hit := range res.Hits {
if hit.Index != datasetName {
t.Fatalf("expected name: %s, got: %s", datasetName, hit.Index)
// check that custom index name is in results
for _, hit := range res.Hits {
if hit.Index != datasetName {
t.Fatalf("expected name: %s, got: %s", datasetName, hit.Index)
}
}
}
}

View File

@ -616,5 +616,154 @@
"total_hits": 0,
"hits": []
}
},
{
"comment": "test wildcard matching term",
"search": {
"from": 0,
"size": 10,
"query": {
"field": "name",
"wildcard": "mar*"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test boost - term query",
"search": {
"from": 0,
"size": 10,
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"field": "name",
"term": "steve",
"boost": 5.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "test boost - term query",
"search": {
"from": 0,
"size": 10,
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"fuzziness": 1,
"field": "name",
"term": "steve",
"boost": 5.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "test boost - numeric range query",
"search": {
"from": 0,
"size": 10,
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"field": "age",
"min": 25,
"max": 29,
"boost": 50.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "test boost - regexp query",
"search": {
"from": 0,
"size": 10,
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"field": "name",
"regexp": "stev.*",
"boost": 5.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
}
]