Merge branch 'master' into documenting
This commit is contained in:
commit
47dbd85551
|
@ -16,3 +16,4 @@
|
|||
/utils/bleve_registry/bleve_registry
|
||||
/y.output
|
||||
*.test
|
||||
tags
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.4
|
||||
- 1.5
|
||||
|
||||
script:
|
||||
- go get golang.org/x/tools/cmd/vet
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
- go get github.com/mattn/goveralls
|
||||
- go get github.com/kisielk/errcheck
|
||||
- go test -v ./...
|
||||
- go test -v ./test -indexType=firestorm
|
||||
- go vet ./...
|
||||
- errcheck $(go list ./... | grep -v bleve/http/mapping | grep -v bleve/index/store/metrics)
|
||||
- errcheck ./...
|
||||
- docs/project-code-coverage.sh
|
||||
- docs/build_children.sh
|
||||
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package web
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/language/en"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
webt "github.com/blevesearch/bleve/analysis/tokenizers/web"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "web"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(webt.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
|
@ -18,7 +18,7 @@ func BenchmarkAnalysis(b *testing.B) {
|
|||
}
|
||||
|
||||
ts := analyzer.Analyze(bleveWikiArticle)
|
||||
freqs := analysis.TokenFrequency(ts, nil)
|
||||
freqs := analysis.TokenFrequency(ts, nil, true)
|
||||
if len(freqs) != 511 {
|
||||
b.Errorf("expected %d freqs, got %d", 511, len(freqs))
|
||||
}
|
||||
|
|
|
@ -26,6 +26,11 @@ type TokenLocation struct {
|
|||
type TokenFreq struct {
|
||||
Term []byte
|
||||
Locations []*TokenLocation
|
||||
frequency int
|
||||
}
|
||||
|
||||
func (tf *TokenFreq) Frequency() int {
|
||||
return tf.frequency
|
||||
}
|
||||
|
||||
// TokenFrequencies maps document terms to their combined frequencies from all
|
||||
|
@ -42,35 +47,57 @@ func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies)
|
|||
existingTf, exists := tfs[tfk]
|
||||
if exists {
|
||||
existingTf.Locations = append(existingTf.Locations, tf.Locations...)
|
||||
existingTf.frequency = existingTf.frequency + tf.frequency
|
||||
} else {
|
||||
tfs[tfk] = tf
|
||||
tfs[tfk] = &TokenFreq{
|
||||
Term: tf.Term,
|
||||
frequency: tf.frequency,
|
||||
Locations: make([]*TokenLocation, len(tf.Locations)),
|
||||
}
|
||||
copy(tfs[tfk].Locations, tf.Locations)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64) TokenFrequencies {
|
||||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, includeTermVectors bool) TokenFrequencies {
|
||||
rv := make(map[string]*TokenFreq, len(tokens))
|
||||
|
||||
for _, token := range tokens {
|
||||
curr, ok := rv[string(token.Term)]
|
||||
if ok {
|
||||
curr.Locations = append(curr.Locations, &TokenLocation{
|
||||
if includeTermVectors {
|
||||
tls := make([]TokenLocation, len(tokens))
|
||||
tlNext := 0
|
||||
|
||||
for _, token := range tokens {
|
||||
tls[tlNext] = TokenLocation{
|
||||
ArrayPositions: arrayPositions,
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
})
|
||||
} else {
|
||||
rv[string(token.Term)] = &TokenFreq{
|
||||
Term: token.Term,
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
ArrayPositions: arrayPositions,
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
curr, ok := rv[string(token.Term)]
|
||||
if ok {
|
||||
curr.Locations = append(curr.Locations, &tls[tlNext])
|
||||
curr.frequency++
|
||||
} else {
|
||||
rv[string(token.Term)] = &TokenFreq{
|
||||
Term: token.Term,
|
||||
Locations: []*TokenLocation{&tls[tlNext]},
|
||||
frequency: 1,
|
||||
}
|
||||
}
|
||||
|
||||
tlNext++
|
||||
}
|
||||
} else {
|
||||
for _, token := range tokens {
|
||||
curr, exists := rv[string(token.Term)]
|
||||
if exists {
|
||||
curr.frequency++
|
||||
} else {
|
||||
rv[string(token.Term)] = &TokenFreq{
|
||||
Term: token.Term,
|
||||
frequency: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,9 +44,10 @@ func TestTokenFrequency(t *testing.T) {
|
|||
End: 11,
|
||||
},
|
||||
},
|
||||
frequency: 2,
|
||||
},
|
||||
}
|
||||
result := TokenFrequency(tokens, nil)
|
||||
result := TokenFrequency(tokens, nil, true)
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
||||
}
|
||||
|
|
|
@ -26,29 +26,82 @@ func NewUnicodeTokenizer() *UnicodeTokenizer {
|
|||
}
|
||||
|
||||
func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
|
||||
rv := make(analysis.TokenStream, 0, 1)
|
||||
|
||||
rv := make(analysis.TokenStream, 0)
|
||||
ta := []analysis.Token(nil)
|
||||
taNext := 0
|
||||
|
||||
segmenter := segment.NewWordSegmenterDirect(input)
|
||||
start := 0
|
||||
pos := 1
|
||||
|
||||
guessRemaining := func(end int) int {
|
||||
avgSegmentLen := end / (len(rv) + 1)
|
||||
if avgSegmentLen < 1 {
|
||||
avgSegmentLen = 1
|
||||
}
|
||||
|
||||
remainingLen := len(input) - end
|
||||
|
||||
return remainingLen / avgSegmentLen
|
||||
}
|
||||
|
||||
for segmenter.Segment() {
|
||||
segmentBytes := segmenter.Bytes()
|
||||
end := start + len(segmentBytes)
|
||||
if segmenter.Type() != segment.None {
|
||||
token := analysis.Token{
|
||||
Term: segmentBytes,
|
||||
Start: start,
|
||||
End: end,
|
||||
Position: pos,
|
||||
Type: convertType(segmenter.Type()),
|
||||
if taNext >= len(ta) {
|
||||
remainingSegments := guessRemaining(end)
|
||||
if remainingSegments > 1000 {
|
||||
remainingSegments = 1000
|
||||
}
|
||||
if remainingSegments < 1 {
|
||||
remainingSegments = 1
|
||||
}
|
||||
|
||||
ta = make([]analysis.Token, remainingSegments)
|
||||
taNext = 0
|
||||
}
|
||||
rv = append(rv, &token)
|
||||
|
||||
token := &ta[taNext]
|
||||
taNext++
|
||||
|
||||
token.Term = segmentBytes
|
||||
token.Start = start
|
||||
token.End = end
|
||||
token.Position = pos
|
||||
token.Type = convertType(segmenter.Type())
|
||||
|
||||
if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
|
||||
rvx = append(rvx, rv)
|
||||
|
||||
rvCap := cap(rv) * 2
|
||||
if rvCap > 256 {
|
||||
rvCap = 256
|
||||
}
|
||||
|
||||
rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
|
||||
}
|
||||
|
||||
rv = append(rv, token)
|
||||
pos++
|
||||
}
|
||||
start = end
|
||||
|
||||
}
|
||||
|
||||
if len(rvx) > 0 {
|
||||
n := len(rv)
|
||||
for _, r := range rvx {
|
||||
n += len(r)
|
||||
}
|
||||
rall := make(analysis.TokenStream, 0, n)
|
||||
for _, r := range rvx {
|
||||
rall = append(rall, r...)
|
||||
}
|
||||
return append(rall, rv...)
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package web
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/exception"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "web"
|
||||
|
||||
var email = `(?:[a-z0-9!#$%&'*+/=?^_` + "`" + `{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_` + "`" + `{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])`
|
||||
var url = `(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s` + "`" + `!()\[\]{};:'".,<>?«»“”‘’]))`
|
||||
var twitterHandle = `@([a-zA-Z0-9_]){1,15}`
|
||||
var twitterHashtag = `#([a-zA-Z0-9_])+`
|
||||
var exceptions = []string{email, url, twitterHandle, twitterHashtag}
|
||||
|
||||
var exceptionsRegexp = regexp.MustCompile(strings.Join(exceptions, "|"))
|
||||
|
||||
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
remainingTokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return exception.NewExceptionsTokenizer(exceptionsRegexp, remainingTokenizer), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, TokenizerConstructor)
|
||||
}
|
|
@ -0,0 +1,143 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package web
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestWeb(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
[]byte("Hello info@blevesearch.com"),
|
||||
analysis.TokenStream{
|
||||
{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
Term: []byte("Hello"),
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
{
|
||||
Start: 6,
|
||||
End: 26,
|
||||
Term: []byte("info@blevesearch.com"),
|
||||
Position: 2,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]byte("That http://blevesearch.com"),
|
||||
analysis.TokenStream{
|
||||
{
|
||||
Start: 0,
|
||||
End: 4,
|
||||
Term: []byte("That"),
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
{
|
||||
Start: 5,
|
||||
End: 27,
|
||||
Term: []byte("http://blevesearch.com"),
|
||||
Position: 2,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]byte("Hey @blevesearch"),
|
||||
analysis.TokenStream{
|
||||
{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Term: []byte("Hey"),
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
{
|
||||
Start: 4,
|
||||
End: 16,
|
||||
Term: []byte("@blevesearch"),
|
||||
Position: 2,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]byte("This #bleve"),
|
||||
analysis.TokenStream{
|
||||
{
|
||||
Start: 0,
|
||||
End: 4,
|
||||
Term: []byte("This"),
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
{
|
||||
Start: 5,
|
||||
End: 11,
|
||||
Term: []byte("#bleve"),
|
||||
Position: 2,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]byte("What about @blevesearch?"),
|
||||
analysis.TokenStream{
|
||||
{
|
||||
Start: 0,
|
||||
End: 4,
|
||||
Term: []byte("What"),
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
{
|
||||
Start: 5,
|
||||
End: 10,
|
||||
Term: []byte("about"),
|
||||
Position: 2,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
{
|
||||
Start: 11,
|
||||
End: 23,
|
||||
Term: []byte("@blevesearch"),
|
||||
Position: 3,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
tokenizer, err := cache.TokenizerNamed(Name)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
actual := tokenizer.Tokenize(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -28,6 +28,7 @@ const (
|
|||
Shingle
|
||||
Single
|
||||
Double
|
||||
Boolean
|
||||
)
|
||||
|
||||
// Token represents one occurrence of a term at a particular location in a
|
||||
|
|
|
@ -20,6 +20,8 @@ import (
|
|||
"github.com/blevesearch/bleve/index/upside_down"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/blevesearch/bleve/search/highlight/highlighters/html"
|
||||
|
||||
_ "github.com/blevesearch/bleve/index/firestorm"
|
||||
)
|
||||
|
||||
var bleveExpVar = expvar.NewMap("bleve")
|
||||
|
@ -64,6 +66,8 @@ func init() {
|
|||
|
||||
bootDuration := time.Since(bootStart)
|
||||
bleveExpVar.Add("bootDuration", int64(bootDuration))
|
||||
indexStats = NewIndexStats()
|
||||
bleveExpVar.Set("indexes", indexStats)
|
||||
}
|
||||
|
||||
var logger = log.New(ioutil.Discard, "bleve", log.LstdFlags)
|
||||
|
|
|
@ -35,6 +35,7 @@ import (
|
|||
_ "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
|
||||
_ "github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer"
|
||||
_ "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer"
|
||||
_ "github.com/blevesearch/bleve/analysis/analyzers/web"
|
||||
|
||||
// token filters
|
||||
_ "github.com/blevesearch/bleve/analysis/token_filters/apostrophe_filter"
|
||||
|
@ -55,6 +56,7 @@ import (
|
|||
_ "github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer"
|
||||
_ "github.com/blevesearch/bleve/analysis/tokenizers/single_token"
|
||||
_ "github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
||||
_ "github.com/blevesearch/bleve/analysis/tokenizers/web"
|
||||
_ "github.com/blevesearch/bleve/analysis/tokenizers/whitespace_tokenizer"
|
||||
|
||||
// date time parsers
|
||||
|
@ -88,6 +90,7 @@ import (
|
|||
_ "github.com/blevesearch/bleve/index/store/gtreap"
|
||||
|
||||
// index types
|
||||
_ "github.com/blevesearch/bleve/index/firestorm"
|
||||
_ "github.com/blevesearch/bleve/index/upside_down"
|
||||
|
||||
// byte array converters
|
||||
|
|
|
@ -37,6 +37,7 @@ cat acc.out integration-acc.out | go run docs/merge-coverprofile.go > merged.out
|
|||
|
||||
if [ -n "$COVERALLS" ]
|
||||
then
|
||||
export GIT_BRANCH=$TRAVIS_BRANCH
|
||||
goveralls -service drone.io -coverprofile=merged.out -repotoken $COVERALLS
|
||||
fi
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ type Document struct {
|
|||
ID string `json:"id"`
|
||||
Fields []Field `json:"fields"`
|
||||
CompositeFields []*CompositeField
|
||||
Number uint64 `json:"-"`
|
||||
}
|
||||
|
||||
func NewDocument(id string) *Document {
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
const DefaultBooleanIndexingOptions = StoreField | IndexField
|
||||
|
||||
type BooleanField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value []byte
|
||||
}
|
||||
|
||||
func (b *BooleanField) Name() string {
|
||||
return b.name
|
||||
}
|
||||
|
||||
func (b *BooleanField) ArrayPositions() []uint64 {
|
||||
return b.arrayPositions
|
||||
}
|
||||
|
||||
func (b *BooleanField) Options() IndexingOptions {
|
||||
return b.options
|
||||
}
|
||||
|
||||
func (b *BooleanField) Analyze() (int, analysis.TokenFrequencies) {
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(b.value),
|
||||
Term: b.value,
|
||||
Position: 1,
|
||||
Type: analysis.Boolean,
|
||||
})
|
||||
|
||||
fieldLength := len(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, b.arrayPositions, b.options.IncludeTermVectors())
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
func (b *BooleanField) Value() []byte {
|
||||
return b.value
|
||||
}
|
||||
|
||||
func (b *BooleanField) Boolean() (bool, error) {
|
||||
if len(b.value) == 1 {
|
||||
return b.value[0] == 'T', nil
|
||||
}
|
||||
return false, fmt.Errorf("boolean field has %d bytes", len(b.value))
|
||||
}
|
||||
|
||||
func (b *BooleanField) GoString() string {
|
||||
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
|
||||
}
|
||||
|
||||
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
|
||||
return &BooleanField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
}
|
||||
}
|
||||
|
||||
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField {
|
||||
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions)
|
||||
}
|
||||
|
||||
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField {
|
||||
v := []byte("F")
|
||||
if b {
|
||||
v = []byte("T")
|
||||
}
|
||||
return &BooleanField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: v,
|
||||
options: options,
|
||||
}
|
||||
}
|
|
@ -75,7 +75,7 @@ func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
}
|
||||
|
||||
fieldLength := len(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
}
|
||||
|
||||
fieldLength := len(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
}
|
||||
}
|
||||
fieldLength := len(tokens) // number of tokens in this doc field
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors())
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@ func (h *CreateIndexHandler) ServeHTTP(w http.ResponseWriter, req *http.Request)
|
|||
showError(w, req, fmt.Sprintf("error creating index: %v", err), 500)
|
||||
return
|
||||
}
|
||||
newIndex.SetName(indexName)
|
||||
RegisterIndexName(indexName, newIndex)
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,428 +0,0 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the
|
||||
// License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||
// applicable law or agreed to in writing, software distributed under
|
||||
// the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||
// OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping
|
||||
|
||||
//go:generate go-bindata-assetfs -pkg=mapping ./mapping_static/...
|
||||
//go:generate go fmt .
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
|
||||
"github.com/elazarl/go-bindata-assetfs"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
|
||||
"github.com/blevesearch/bleve"
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func AssetFS() *assetfs.AssetFS {
|
||||
return assetFS()
|
||||
}
|
||||
|
||||
// RegisterHandlers registers mapping handlers on a router at the
|
||||
// given pathBase, such as at "/api".
|
||||
func RegisterHandlers(router *mux.Router, pathBase string) {
|
||||
router.HandleFunc(pathBase+"/_analyzerNames", ListAnalyzerNames).Methods("POST")
|
||||
router.HandleFunc(pathBase+"/_datetimeParserNames", ListDateTimeParserNames).Methods("POST")
|
||||
router.HandleFunc(pathBase+"/_charFilterNames", ListCharFilterNames).Methods("POST")
|
||||
router.HandleFunc(pathBase+"/_charFilterTypes", ListCharFilterTypes).Methods("GET")
|
||||
router.HandleFunc(pathBase+"/_tokenizerNames", ListTokenizerNames).Methods("POST")
|
||||
router.HandleFunc(pathBase+"/_tokenizerTypes", ListTokenizerTypes).Methods("GET")
|
||||
router.HandleFunc(pathBase+"/_tokenFilterNames", ListTokenFilterNames).Methods("POST")
|
||||
router.HandleFunc(pathBase+"/_tokenFilterTypes", ListTokenFilterTypes).Methods("GET")
|
||||
router.HandleFunc(pathBase+"/_tokenMapNames", ListTokenMapNames).Methods("POST")
|
||||
router.HandleFunc(pathBase+"/_analyze", AnalyzerText).Methods("POST")
|
||||
router.HandleFunc(pathBase+"/_validateMapping", ValidateMapping).Methods("POST")
|
||||
}
|
||||
|
||||
func ListAnalyzerNames(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// interpret request body as index mapping
|
||||
if len(requestBody) > 0 {
|
||||
err := json.Unmarshal(requestBody, &indexMapping)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// built in analyzer names
|
||||
_, analyzerNames := registry.AnalyzerTypesAndInstances()
|
||||
// add custom analyzer names
|
||||
for name := range indexMapping.CustomAnalysis.Analyzers {
|
||||
analyzerNames = append(analyzerNames, name)
|
||||
}
|
||||
|
||||
sort.Strings(analyzerNames)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
Analyzers []string `json:"analyzers"`
|
||||
}{
|
||||
Status: "ok",
|
||||
Analyzers: analyzerNames,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func AnalyzerText(w http.ResponseWriter, req *http.Request) {
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
var analyzeRequest = struct {
|
||||
Analyzer string `json:"analyzer"`
|
||||
Text string `json:"text"`
|
||||
Mapping *bleve.IndexMapping `json:"mapping"`
|
||||
}{}
|
||||
|
||||
err = json.Unmarshal(requestBody, &analyzeRequest)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
if analyzeRequest.Mapping != nil {
|
||||
mapping = analyzeRequest.Mapping
|
||||
}
|
||||
|
||||
ts, err := mapping.AnalyzeText(analyzeRequest.Analyzer, []byte(analyzeRequest.Text))
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error analyzing text: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
Text string `json:"text"`
|
||||
TokenStream analysis.TokenStream `json:"token_stream"`
|
||||
}{
|
||||
Status: "ok",
|
||||
Text: analyzeRequest.Text,
|
||||
TokenStream: ts,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListDateTimeParserNames(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// interpret request body as index mapping
|
||||
if len(requestBody) > 0 {
|
||||
err := json.Unmarshal(requestBody, &indexMapping)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// built in char filter names
|
||||
_, dateTimeParserNames := registry.DateTimeParserTypesAndInstances()
|
||||
// add custom date time parser names
|
||||
for name := range indexMapping.CustomAnalysis.DateTimeParsers {
|
||||
dateTimeParserNames = append(dateTimeParserNames, name)
|
||||
}
|
||||
|
||||
sort.Strings(dateTimeParserNames)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
DateTimeParsers []string `json:"datetime_parsers"`
|
||||
}{
|
||||
Status: "ok",
|
||||
DateTimeParsers: dateTimeParserNames,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListCharFilterNames(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// interpret request body as index mapping
|
||||
if len(requestBody) > 0 {
|
||||
err := json.Unmarshal(requestBody, &indexMapping)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// built in char filter names
|
||||
_, charFilterNames := registry.CharFilterTypesAndInstances()
|
||||
// add custom char filter names
|
||||
for name := range indexMapping.CustomAnalysis.CharFilters {
|
||||
charFilterNames = append(charFilterNames, name)
|
||||
}
|
||||
|
||||
sort.Strings(charFilterNames)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
CharFilters []string `json:"char_filters"`
|
||||
}{
|
||||
Status: "ok",
|
||||
CharFilters: charFilterNames,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListCharFilterTypes(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
// built in char filter names
|
||||
charFilterTypes, _ := registry.CharFilterTypesAndInstances()
|
||||
|
||||
sort.Strings(charFilterTypes)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
CharFilterTypes []string `json:"char_filter_types"`
|
||||
}{
|
||||
Status: "ok",
|
||||
CharFilterTypes: charFilterTypes,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListTokenizerNames(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// interpret request body as index mapping
|
||||
if len(requestBody) > 0 {
|
||||
err := json.Unmarshal(requestBody, &indexMapping)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// built in char filter names
|
||||
_, tokenizerNames := registry.TokenizerTypesAndInstances()
|
||||
// add custom char filter names
|
||||
for name := range indexMapping.CustomAnalysis.Tokenizers {
|
||||
tokenizerNames = append(tokenizerNames, name)
|
||||
}
|
||||
|
||||
sort.Strings(tokenizerNames)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
Tokenizers []string `json:"tokenizers"`
|
||||
}{
|
||||
Status: "ok",
|
||||
Tokenizers: tokenizerNames,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListTokenizerTypes(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
// built in char filter names
|
||||
tokenizerTypes, _ := registry.TokenizerTypesAndInstances()
|
||||
|
||||
sort.Strings(tokenizerTypes)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
TokenizerTypes []string `json:"tokenizer_types"`
|
||||
}{
|
||||
Status: "ok",
|
||||
TokenizerTypes: tokenizerTypes,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListTokenFilterNames(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// interpret request body as index mapping
|
||||
if len(requestBody) > 0 {
|
||||
err := json.Unmarshal(requestBody, &indexMapping)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// built in char filter names
|
||||
_, tokenFilterNames := registry.TokenFilterTypesAndInstances()
|
||||
// add custom char filter names
|
||||
for name := range indexMapping.CustomAnalysis.TokenFilters {
|
||||
tokenFilterNames = append(tokenFilterNames, name)
|
||||
}
|
||||
|
||||
sort.Strings(tokenFilterNames)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
TokenFilters []string `json:"token_filters"`
|
||||
}{
|
||||
Status: "ok",
|
||||
TokenFilters: tokenFilterNames,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListTokenFilterTypes(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
// built in char filter names
|
||||
tokenFilterTypes, _ := registry.TokenFilterTypesAndInstances()
|
||||
|
||||
sort.Strings(tokenFilterTypes)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
TokenFilterTypes []string `json:"token_filter_types"`
|
||||
}{
|
||||
Status: "ok",
|
||||
TokenFilterTypes: tokenFilterTypes,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ListTokenMapNames(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// interpret request body as index mapping
|
||||
if len(requestBody) > 0 {
|
||||
err := json.Unmarshal(requestBody, &indexMapping)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// built in char filter names
|
||||
_, tokenMapNames := registry.TokenMapTypesAndInstances()
|
||||
// add custom char map names
|
||||
for name := range indexMapping.CustomAnalysis.TokenMaps {
|
||||
tokenMapNames = append(tokenMapNames, name)
|
||||
}
|
||||
|
||||
sort.Strings(tokenMapNames)
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
TokenMaps []string `json:"token_maps"`
|
||||
}{
|
||||
Status: "ok",
|
||||
TokenMaps: tokenMapNames,
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
}
|
||||
|
||||
func ValidateMapping(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// read the request body
|
||||
requestBody, err := ioutil.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
|
||||
return
|
||||
}
|
||||
|
||||
// interpret request body as index mapping
|
||||
if len(requestBody) > 0 {
|
||||
err := json.Unmarshal(requestBody, &indexMapping)
|
||||
if err != nil {
|
||||
showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
rv := struct {
|
||||
Status string `json:"status"`
|
||||
}{
|
||||
Status: "ok",
|
||||
}
|
||||
mustEncode(w, rv)
|
||||
|
||||
}
|
||||
|
||||
func showError(w http.ResponseWriter, r *http.Request,
|
||||
msg string, code int) {
|
||||
log.Printf("Reporting error %v/%v", code, msg)
|
||||
http.Error(w, msg, code)
|
||||
}
|
||||
|
||||
func mustEncode(w io.Writer, i interface{}) {
|
||||
if headered, ok := w.(http.ResponseWriter); ok {
|
||||
headered.Header().Set("Cache-Control", "no-cache")
|
||||
headered.Header().Set("Content-type", "application/json")
|
||||
}
|
||||
|
||||
e := json.NewEncoder(w)
|
||||
if err := e.Encode(i); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
|
@ -1,144 +0,0 @@
|
|||
var AnalyzerModalCtrl = function ($scope, $modalInstance, $http, name, value, mapping) {
|
||||
$scope.origName = name;
|
||||
$scope.name = name;
|
||||
$scope.errorMessage = "";
|
||||
$scope.formpath = "";
|
||||
$scope.mapping = mapping;
|
||||
|
||||
$scope.analyzer = {};
|
||||
// copy in value for editing
|
||||
for (var k in value) {
|
||||
// need deeper copy of nested arrays
|
||||
if (k == "char_filters") {
|
||||
newcharfilters = [];
|
||||
for (var cfi in value.char_filters) {
|
||||
newcharfilters.push(value.char_filters[cfi]);
|
||||
}
|
||||
$scope.analyzer.char_filters = newcharfilters;
|
||||
} else if (k == "token_filters") {
|
||||
newtokenfilters = [];
|
||||
for (var tfi in value.token_filters) {
|
||||
newtokenfilters.push(value.token_filters[tfi]);
|
||||
}
|
||||
$scope.analyzer.token_filters = newtokenfilters;
|
||||
} else {
|
||||
$scope.analyzer[k] = value[k];
|
||||
}
|
||||
}
|
||||
|
||||
$scope.tokenizerNames = [];
|
||||
|
||||
$scope.loadTokenizerNames = function() {
|
||||
$http.post('/api/_tokenizerNames',mapping).success(function(data) {
|
||||
$scope.tokenizerNames = data.tokenizers;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
$scope.loadTokenizerNames();
|
||||
|
||||
$scope.charFilterNames = [];
|
||||
|
||||
$scope.loadCharFilterNames = function() {
|
||||
$http.post('/api/_charFilterNames',mapping).success(function(data) {
|
||||
$scope.charFilterNames = data.char_filters;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
$scope.loadCharFilterNames();
|
||||
|
||||
$scope.addCharFilter = function(scope) {
|
||||
filter = scope.addCharacterFilterName;
|
||||
if (filter !== undefined && filter !== "") {
|
||||
$scope.selectedAnalyzer.char_filters.push(filter);
|
||||
}
|
||||
console.log($scope.selectedAnalyzer.char_filters);
|
||||
};
|
||||
|
||||
$scope.removeCharFilter = function(index) {
|
||||
$scope.selectedAnalyzer.char_filters.splice(index, 1);
|
||||
};
|
||||
|
||||
$scope.tokenFilterNames = [];
|
||||
|
||||
$scope.loadTokenFilterNames = function() {
|
||||
$http.post('/api/_tokenFilterNames',mapping).success(function(data) {
|
||||
$scope.tokenFilterNames = data.token_filters;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
$scope.loadTokenFilterNames();
|
||||
|
||||
$scope.addCharFilter = function(scope) {
|
||||
filter = scope.addCharacterFilterName;
|
||||
if (filter !== undefined && filter !== "") {
|
||||
$scope.analyzer.char_filters.push(filter);
|
||||
}
|
||||
console.log($scope.analyzer.char_filters);
|
||||
};
|
||||
|
||||
$scope.removeCharFilter = function(index) {
|
||||
$scope.analyzer.char_filters.splice(index, 1);
|
||||
};
|
||||
|
||||
$scope.addTokenFilter = function(scope) {
|
||||
filter = scope.addTokenFilterName;
|
||||
if (filter !== undefined && filter !== "") {
|
||||
$scope.analyzer.token_filters.push(filter);
|
||||
}
|
||||
console.log($scope.analyzer.token_filters);
|
||||
};
|
||||
|
||||
$scope.removeTokenFilter = function(index) {
|
||||
$scope.analyzer.token_filters.splice(index, 1);
|
||||
};
|
||||
|
||||
$scope.cancel = function () {
|
||||
$modalInstance.dismiss('cancel');
|
||||
};
|
||||
|
||||
$scope.build = function() {
|
||||
// must have a name
|
||||
if (!$scope.name) {
|
||||
$scope.errorMessage = "Name is required";
|
||||
return;
|
||||
}
|
||||
|
||||
// name must not already be used
|
||||
if ($scope.name != $scope.origName && $scope.mapping.analysis.analyzers[$scope.name]) {
|
||||
$scope.errorMessage = "Analyzer named '" + $scope.name + "' already exists";
|
||||
return;
|
||||
}
|
||||
|
||||
// ensure that this new mapping component is valid
|
||||
analysis = {};
|
||||
for (var ak in $scope.mapping.analysis) {
|
||||
analysis[ak] = $scope.mapping.analysis[ak];
|
||||
}
|
||||
analyzers = {};
|
||||
analyzers[$scope.name] = $scope.analyzer;
|
||||
analysis["analyzers"] = analyzers;
|
||||
testMapping = {
|
||||
"analysis": analysis
|
||||
};
|
||||
$http.post('/api/_validateMapping',testMapping).success(function(data) {
|
||||
// if its valid return it
|
||||
result = {};
|
||||
result[$scope.name] = $scope.analyzer;
|
||||
$modalInstance.close(result);
|
||||
}).
|
||||
error(function(data, code) {
|
||||
// otherwise display error
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
|
||||
};
|
||||
};
|
|
@ -1,105 +0,0 @@
|
|||
var CharFilterModalCtrl = function ($scope, $modalInstance, $http, name, value, mapping) {
|
||||
$scope.origName = name;
|
||||
$scope.name = name;
|
||||
$scope.errorMessage = "";
|
||||
$scope.formpath = "";
|
||||
$scope.mapping = mapping;
|
||||
|
||||
$scope.charfilter = {};
|
||||
// copy in value for editing
|
||||
for (var k in value) {
|
||||
$scope.charfilter[k] = value[k];
|
||||
}
|
||||
|
||||
$scope.unknownCharFilterTypeTemplate = "/static/partials/analysis/charfilters/generic.html";
|
||||
$scope.charFilterTypeTemplates = {
|
||||
"regexp": "/static/partials/analysis/charfilters/regexp.html",
|
||||
};
|
||||
$scope.charFilterTypeDefaults = {
|
||||
"regexp": function() {
|
||||
return {
|
||||
"regexp": "",
|
||||
"replace": ""
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
$scope.charFilterTypes = [];
|
||||
|
||||
updateCharFilterTypes = function() {
|
||||
$http.get('/api/_charFilterTypes').success(function(data) {
|
||||
$scope.charFilterTypes = data.char_filter_types;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
updateCharFilterTypes();
|
||||
|
||||
if (!$scope.charfilter.type) {
|
||||
defaultType = "regexp";
|
||||
if ($scope.charFilterTypeDefaults[defaultType]) {
|
||||
$scope.charfilter = $scope.charFilterTypeDefaults[defaultType]();
|
||||
}
|
||||
else {
|
||||
$scope.charfilter = {};
|
||||
}
|
||||
$scope.charfilter.type = defaultType;
|
||||
|
||||
}
|
||||
$scope.formpath = $scope.charFilterTypeTemplates[$scope.charfilter.type];
|
||||
|
||||
$scope.charFilterTypeChange = function() {
|
||||
newType = $scope.charfilter.type;
|
||||
if ($scope.charFilterTypeDefaults[$scope.charfilter.type]) {
|
||||
$scope.charfilter = $scope.charFilterTypeDefaults[$scope.charfilter.type]();
|
||||
} else {
|
||||
$scope.charfilter = {};
|
||||
}
|
||||
$scope.charfilter.type = newType;
|
||||
if ($scope.charFilterTypeTemplates[$scope.charfilter.type]) {
|
||||
$scope.formpath = $scope.charFilterTypeTemplates[$scope.charfilter.type];
|
||||
} else {
|
||||
$scope.formpath = unknownCharFilterTypeTemplate;
|
||||
}
|
||||
};
|
||||
|
||||
$scope.cancel = function () {
|
||||
$modalInstance.dismiss('cancel');
|
||||
};
|
||||
|
||||
$scope.build = function() {
|
||||
// must have a name
|
||||
if (!$scope.name) {
|
||||
$scope.errorMessage = "Name is required";
|
||||
return;
|
||||
}
|
||||
|
||||
// name must not already be used
|
||||
if ($scope.name != $scope.origName && $scope.mapping.analysis.char_filters[$scope.name]) {
|
||||
$scope.errorMessage = "Character filter named '" + $scope.name + "' already exists";
|
||||
return;
|
||||
}
|
||||
|
||||
// ensure that this new mapping component is valid
|
||||
charFilters = {};
|
||||
charFilters[$scope.name] = $scope.charfilter;
|
||||
testMapping = {
|
||||
"analysis": {
|
||||
"char_filters": charFilters
|
||||
}
|
||||
};
|
||||
$http.post('/api/_validateMapping',testMapping).success(function(data) {
|
||||
// if its valid return it
|
||||
result = {};
|
||||
result[$scope.name] = $scope.charfilter;
|
||||
$modalInstance.close(result);
|
||||
}).
|
||||
error(function(data, code) {
|
||||
// otherwise display error
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
|
||||
};
|
||||
};
|
|
@ -1,179 +0,0 @@
|
|||
var TokenFilterModalCtrl = function ($scope, $modalInstance, $http, name, value, mapping) {
|
||||
$scope.origName = name;
|
||||
$scope.name = name;
|
||||
$scope.errorMessage = "";
|
||||
$scope.formpath = "";
|
||||
$scope.mapping = mapping;
|
||||
|
||||
$scope.tokenfilter = {};
|
||||
// copy in value for editing
|
||||
for (var k in value) {
|
||||
$scope.tokenfilter[k] = value[k];
|
||||
}
|
||||
|
||||
$scope.tokenMapNames = [];
|
||||
|
||||
$scope.loadTokenMapNames = function() {
|
||||
$http.post('/api/_tokenMapNames',mapping).success(function(data) {
|
||||
$scope.tokenMapNames = data.token_maps;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
$scope.loadTokenMapNames();
|
||||
|
||||
$scope.unknownTokenFilterTypeTemplate = "/static/partials/analysis/tokenfilters/generic.html";
|
||||
$scope.tokenFilterTypeTemplates = {
|
||||
"dict_compound": "/static/partials/analysis/tokenfilters/dict_compound.html",
|
||||
"edge_ngram": "/static/partials/analysis/tokenfilters/edge_ngram.html",
|
||||
"elision": "/static/partials/analysis/tokenfilters/elision.html",
|
||||
"keyword_marker": "/static/partials/analysis/tokenfilters/keyword_marker.html",
|
||||
"length": "/static/partials/analysis/tokenfilters/length.html",
|
||||
"ngram": "/static/partials/analysis/tokenfilters/ngram.html",
|
||||
"normalize_unicode": "/static/partials/analysis/tokenfilters/normalize_unicode.html",
|
||||
"shingle": "/static/partials/analysis/tokenfilters/shingle.html",
|
||||
"stop_tokens": "/static/partials/analysis/tokenfilters/stop_tokens.html",
|
||||
"truncate_token": "/static/partials/analysis/tokenfilters/truncate_token.html",
|
||||
};
|
||||
$scope.tokenFilterTypeDefaults = {
|
||||
"dict_compound": function() {
|
||||
return {
|
||||
"dict_token_map": $scope.tokenMapNames[0]
|
||||
};
|
||||
},
|
||||
"edge_ngram": function() {
|
||||
return {
|
||||
"edge": "front",
|
||||
"min": 3,
|
||||
"max": 3,
|
||||
};
|
||||
},
|
||||
"elision": function() {
|
||||
return {
|
||||
"articles_token_map": $scope.tokenMapNames[0]
|
||||
};
|
||||
},
|
||||
"keyword_marker": function() {
|
||||
return {
|
||||
"keywords_token_map": $scope.tokenMapNames[0]
|
||||
};
|
||||
},
|
||||
"length": function() {
|
||||
return {
|
||||
"min": 3,
|
||||
"max": 255
|
||||
};
|
||||
},
|
||||
"ngram": function() {
|
||||
return {
|
||||
"min": 3,
|
||||
"max": 3
|
||||
};
|
||||
},
|
||||
"normalize_unicode": function() {
|
||||
return {
|
||||
"form": "nfc"
|
||||
};
|
||||
},
|
||||
"shingle": function() {
|
||||
return {
|
||||
"min": 2,
|
||||
"max": 2,
|
||||
"output_original": false,
|
||||
"separator": "",
|
||||
"filler": ""
|
||||
};
|
||||
},
|
||||
"stop_tokens": function() {
|
||||
return {
|
||||
"stop_token_map": $scope.tokenMapNames[0]
|
||||
};
|
||||
},
|
||||
"truncate_token": function() {
|
||||
return {
|
||||
"length": 25
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
$scope.tokenFilterTypes = [];
|
||||
|
||||
updateTokenFilterTypes = function() {
|
||||
$http.get('/api/_tokenFilterTypes').success(function(data) {
|
||||
$scope.tokenFilterTypes = data.token_filter_types;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
updateTokenFilterTypes();
|
||||
|
||||
if (!$scope.tokenfilter.type) {
|
||||
defaultType = "length";
|
||||
if ($scope.tokenFilterTypeDefaults[defaultType]) {
|
||||
$scope.tokenfilter = $scope.tokenFilterTypeDefaults[defaultType]();
|
||||
}
|
||||
else {
|
||||
$scope.tokenfilter = {};
|
||||
}
|
||||
$scope.tokenfilter.type = defaultType;
|
||||
}
|
||||
$scope.formpath = $scope.tokenFilterTypeTemplates[$scope.tokenfilter.type];
|
||||
|
||||
$scope.tokenFilterTypeChange = function() {
|
||||
newType = $scope.tokenfilter.type;
|
||||
if ($scope.tokenFilterTypeDefaults[$scope.tokenfilter.type]) {
|
||||
$scope.tokenfilter = $scope.tokenFilterTypeDefaults[$scope.tokenfilter.type]();
|
||||
} else {
|
||||
$scope.tokenfilter = {};
|
||||
}
|
||||
$scope.tokenfilter.type = newType;
|
||||
if ($scope.tokenFilterTypeTemplates[$scope.tokenfilter.type]) {
|
||||
$scope.formpath = $scope.tokenFilterTypeTemplates[$scope.tokenfilter.type];
|
||||
} else {
|
||||
$scope.formpath = $scope.unknownTokenFilterTypeTemplate;
|
||||
}
|
||||
};
|
||||
|
||||
$scope.cancel = function () {
|
||||
$modalInstance.dismiss('cancel');
|
||||
};
|
||||
|
||||
$scope.build = function() {
|
||||
// must have a name
|
||||
if (!$scope.name) {
|
||||
$scope.errorMessage = "Name is required";
|
||||
return;
|
||||
}
|
||||
|
||||
// name must not already be used
|
||||
if ($scope.name != $scope.origName && $scope.mapping.analysis.token_filters[$scope.name]) {
|
||||
$scope.errorMessage = "Token filter named '" + $scope.name + "' already exists";
|
||||
return;
|
||||
}
|
||||
|
||||
// ensure that this new mapping component is valid
|
||||
tokenfilters = {};
|
||||
tokenfilters[$scope.name] = $scope.tokenfilter;
|
||||
testMapping = {
|
||||
"analysis": {
|
||||
"token_filters": tokenfilters,
|
||||
"token_maps": $scope.mapping.analysis.token_maps
|
||||
}
|
||||
};
|
||||
$http.post('/api/_validateMapping',testMapping).success(function(data) {
|
||||
// if its valid return it
|
||||
result = {};
|
||||
result[$scope.name] = $scope.tokenfilter;
|
||||
$modalInstance.close(result);
|
||||
}).
|
||||
error(function(data, code) {
|
||||
// otherwise display error
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
|
||||
};
|
||||
};
|
|
@ -1,138 +0,0 @@
|
|||
var TokenizerModalCtrl = function ($scope, $modalInstance, $http, name, value, mapping) {
|
||||
$scope.origName = name;
|
||||
$scope.name = name;
|
||||
$scope.errorMessage = "";
|
||||
$scope.formpath = "";
|
||||
$scope.mapping = mapping;
|
||||
|
||||
$scope.tokenizer = {};
|
||||
// copy in value for editing
|
||||
for (var k in value) {
|
||||
$scope.tokenizer[k] = value[k];
|
||||
}
|
||||
|
||||
$scope.tokenizerNames = [];
|
||||
|
||||
$scope.loadTokenizerNames = function() {
|
||||
$http.post('/api/_tokenizerNames',mapping).success(function(data) {
|
||||
$scope.tokenizerNames = data.tokenizers;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
$scope.loadTokenizerNames();
|
||||
|
||||
$scope.unknownTokenizerTypeTemplate = "/static/partials/analysis/tokenizers/generic.html";
|
||||
$scope.tokenizerTypeTemplates = {
|
||||
"regexp": "/static/partials/analysis/tokenizers/regexp.html",
|
||||
"exception": "/static/partials/analysis/tokenizers/exception.html"
|
||||
};
|
||||
$scope.tokenizerTypeDefaults = {
|
||||
"regexp": function() {
|
||||
return {
|
||||
"regexp": ""
|
||||
};
|
||||
},
|
||||
"exception": function() {
|
||||
return {
|
||||
"exceptions": [],
|
||||
"tokenizer": "unicode"
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
$scope.tokenizerTypes = [];
|
||||
|
||||
updateTokenizerTypes = function() {
|
||||
$http.get('/api/_tokenizerTypes').success(function(data) {
|
||||
$scope.tokenizerTypes = data.tokenizer_types;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
updateTokenizerTypes();
|
||||
|
||||
if (!$scope.tokenizer.type) {
|
||||
defaultType = "regexp";
|
||||
if ($scope.tokenizerTypeDefaults[defaultType]) {
|
||||
$scope.tokenizer = $scope.tokenizerTypeDefaults[defaultType]();
|
||||
}
|
||||
else {
|
||||
$scope.tokenizer = {};
|
||||
}
|
||||
$scope.tokenizer.type = defaultType;
|
||||
}
|
||||
$scope.formpath = $scope.tokenizerTypeTemplates[$scope.tokenizer.type];
|
||||
|
||||
$scope.tokenizerTypeChange = function() {
|
||||
newType = $scope.tokenizer.type;
|
||||
if ($scope.tokenizerTypeDefaults[$scope.tokenizer.type]) {
|
||||
$scope.tokenizer = $scope.tokenizerTypeDefaults[$scope.tokenizer.type]();
|
||||
} else {
|
||||
$scope.tokenizer = {};
|
||||
}
|
||||
$scope.tokenizer.type = newType;
|
||||
if ($scope.tokenizerTypeTemplates[$scope.tokenizer.type]) {
|
||||
$scope.formpath = $scope.tokenizerTypeTemplates[$scope.tokenizer.type];
|
||||
} else {
|
||||
$scope.formpath = $scope.unknownTokenizerTypeTemplate;
|
||||
}
|
||||
};
|
||||
|
||||
$scope.addException = function(scope) {
|
||||
if (scope.newregexp) {
|
||||
$scope.tokenizer.exceptions.push(scope.newregexp);
|
||||
scope.newregexp = "";
|
||||
}
|
||||
};
|
||||
|
||||
$scope.removeException = function(index) {
|
||||
$scope.tokenizer.exceptions.splice(index, 1);
|
||||
};
|
||||
|
||||
$scope.cancel = function () {
|
||||
$modalInstance.dismiss('cancel');
|
||||
};
|
||||
|
||||
$scope.build = function() {
|
||||
// must have a name
|
||||
if (!$scope.name) {
|
||||
$scope.errorMessage = "Name is required";
|
||||
return;
|
||||
}
|
||||
|
||||
// name must not already be used
|
||||
if ($scope.name != $scope.origName && $scope.mapping.analysis.tokenizers[$scope.name]) {
|
||||
$scope.errorMessage = "Tokenizer named '" + $scope.name + "' already exists";
|
||||
return;
|
||||
}
|
||||
|
||||
// ensure that this new mapping component is valid
|
||||
tokenizers = {};
|
||||
tokenizers[$scope.name] = $scope.tokenizer;
|
||||
// add in all the existing tokenizers, since we might be referencing them
|
||||
for (var t in $scope.mapping.analysis.tokenizers) {
|
||||
tokenizers[t] = $scope.mapping.analysis.tokenizers[t];
|
||||
}
|
||||
testMapping = {
|
||||
"analysis": {
|
||||
"tokenizers": tokenizers
|
||||
}
|
||||
};
|
||||
$http.post('/api/_validateMapping',testMapping).success(function(data) {
|
||||
// if its valid return it
|
||||
result = {};
|
||||
result[$scope.name] = $scope.tokenizer;
|
||||
$modalInstance.close(result);
|
||||
}).
|
||||
error(function(data, code) {
|
||||
// otherwise display error
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
|
||||
};
|
||||
};
|
|
@ -1,51 +0,0 @@
|
|||
var WordListModalCtrl = function ($scope, $modalInstance, name, words, mapping) {
|
||||
$scope.name = name;
|
||||
$scope.origName = name;
|
||||
$scope.errorMessage = "";
|
||||
$scope.newWord = "";
|
||||
$scope.words = words.slice(0); // create copy
|
||||
$scope.selectedWords = [];
|
||||
$scope.mapping = mapping;
|
||||
|
||||
$scope.cancel = function () {
|
||||
$modalInstance.dismiss('cancel');
|
||||
};
|
||||
|
||||
$scope.addWord = function() {
|
||||
if ($scope.newWord) {
|
||||
$scope.words.push($scope.newWord);
|
||||
$scope.newWord = "";
|
||||
}
|
||||
};
|
||||
|
||||
$scope.removeWord = function() {
|
||||
// sort the selected word indexes into descending order
|
||||
// so we can delete items without having to adjust indexes
|
||||
$scope.selectedWords.sort(function(a,b){ return b - a; });
|
||||
for (var index in $scope.selectedWords) {
|
||||
$scope.words.splice($scope.selectedWords[index], 1);
|
||||
}
|
||||
$scope.selectedWords = [];
|
||||
};
|
||||
|
||||
$scope.build = function() {
|
||||
// must have a name
|
||||
if (!$scope.name) {
|
||||
$scope.errorMessage = "Name is required";
|
||||
return;
|
||||
}
|
||||
|
||||
// name must not already be used
|
||||
if ($scope.name != $scope.origName && $scope.mapping.analysis.token_maps[$scope.name]) {
|
||||
$scope.errorMessage = "Word list named '" + $scope.name + "' already exists";
|
||||
return;
|
||||
}
|
||||
|
||||
result = {};
|
||||
result[$scope.name] = {
|
||||
"type": "custom",
|
||||
"tokens": $scope.words
|
||||
};
|
||||
$modalInstance.close(result);
|
||||
};
|
||||
};
|
|
@ -1,379 +0,0 @@
|
|||
// controller responsible for building a custom analysis components
|
||||
|
||||
function AnalysisCtrl($scope, $http, $routeParams, $log, $sce, $location, $modal) {
|
||||
|
||||
// analyzers
|
||||
|
||||
$scope.newAnalyzer = function () {
|
||||
return $scope.editAnalyzer("", {
|
||||
"type": "custom",
|
||||
"char_filters": [],
|
||||
"tokenizer": "unicode",
|
||||
"token_filters": []
|
||||
});
|
||||
};
|
||||
|
||||
$scope.deleteAnalyzer = function (name) {
|
||||
used = $scope.isAnalyzerUsed(name);
|
||||
if (used) {
|
||||
alert("This analyzer cannot be deleted because it is being used by the " + used + ".");
|
||||
return;
|
||||
}
|
||||
if (confirm("Are you sure you want to delete '" + name + "'?")) {
|
||||
delete $scope.$parent.mapping.analysis.analyzers[name];
|
||||
}
|
||||
};
|
||||
|
||||
$scope.isAnalyzerUsed = function(name) {
|
||||
// analyzers are used in mappings (in various places)
|
||||
|
||||
// first check index level default analyzer
|
||||
if ($scope.$parent.mapping.default_analyzer == name) {
|
||||
return "index mapping default analyzer";
|
||||
}
|
||||
|
||||
// then check the default documnt mapping
|
||||
used = $scope.isAnalyzerUsedInDocMapping(name, $scope.$parent.mapping.default_mapping, "");
|
||||
if (used) {
|
||||
return "default document mapping " + used;
|
||||
}
|
||||
|
||||
// then check the document mapping for each type
|
||||
for (var docType in $scope.$parent.mapping.types) {
|
||||
docMapping = $scope.$parent.mapping.types[docType];
|
||||
used = $scope.isAnalyzerUsedInDocMapping(name, docMapping, "");
|
||||
if (used) {
|
||||
return "document mapping type '" + docType + "' ";
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
// a recursive helper
|
||||
$scope.isAnalyzerUsedInDocMapping = function(name, docMapping, path) {
|
||||
// first check the document level default analyzer
|
||||
if (docMapping.default_analyzer == name) {
|
||||
if (path) {
|
||||
return "default analyzer at " + path;
|
||||
} else {
|
||||
return "default analyzer";
|
||||
}
|
||||
}
|
||||
// now check fields at this level
|
||||
for (var fieldIndex in docMapping.fields) {
|
||||
field = docMapping.fields[fieldIndex];
|
||||
if (field.analyzer == name) {
|
||||
if (field.name) {
|
||||
return "in the field named " + field.name;
|
||||
}
|
||||
return "in the field at path " + path;
|
||||
}
|
||||
}
|
||||
|
||||
// now check each nested property
|
||||
for (var propertyName in docMapping.properties) {
|
||||
subDoc = docMapping.properties[propertyName];
|
||||
if (path) {
|
||||
return $scope.isAnalyzerUsedInDocMapping(name, subDoc, path+"."+propertyName);
|
||||
} else {
|
||||
return $scope.isAnalyzerUsedInDocMapping(name, subDoc, propertyName);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
$scope.editAnalyzer = function (name, value) {
|
||||
var modalInstance = $modal.open({
|
||||
animation: $scope.animationsEnabled,
|
||||
templateUrl: '/static/partials/analysis/analyzer.html',
|
||||
controller: 'AnalyzerModalCtrl',
|
||||
resolve: {
|
||||
name: function () {
|
||||
return name;
|
||||
},
|
||||
value: function () {
|
||||
return value;
|
||||
},
|
||||
mapping: function() {
|
||||
return $scope.$parent.mapping;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
modalInstance.result.then(function (result) {
|
||||
// add this result to the mapping
|
||||
for (var resultKey in result) {
|
||||
if (name !== "" && resultKey != name) {
|
||||
// remove the old name
|
||||
delete $scope.$parent.mapping.analysis.analyzers[name];
|
||||
}
|
||||
$scope.$parent.mapping.analysis.analyzers[resultKey] = result[resultKey];
|
||||
// reload parent available analyzers
|
||||
$scope.$parent.loadAnalyzerNames();
|
||||
}
|
||||
}, function () {
|
||||
$log.info('Modal dismissed at: ' + new Date());
|
||||
});
|
||||
};
|
||||
|
||||
// word lists
|
||||
|
||||
$scope.newWordList = function () {
|
||||
return $scope.editWordList("", {tokens:[]});
|
||||
};
|
||||
|
||||
$scope.deleteWordList = function (name) {
|
||||
used = $scope.isWordListUsed(name);
|
||||
if (used) {
|
||||
alert("This word list cannot be deleted because it is being used by the " + used + ".");
|
||||
return;
|
||||
}
|
||||
if (confirm("Are you sure you want to delete '" + name + "'?")) {
|
||||
delete $scope.$parent.mapping.analysis.token_maps[name];
|
||||
}
|
||||
};
|
||||
|
||||
$scope.isWordListUsed = function(name) {
|
||||
// word lists are only used by token filters
|
||||
for (var tokenFilterName in $scope.$parent.mapping.analysis.token_filters) {
|
||||
tokenFilter = $scope.$parent.mapping.analysis.token_filters[tokenFilterName];
|
||||
// word lists are embeded in a variety of different field names
|
||||
if (tokenFilter.dict_token_map == name ||
|
||||
tokenFilter.articles_token_map == name ||
|
||||
tokenFilter.keywords_token_map == name ||
|
||||
tokenFilter.stop_token_map == name) {
|
||||
return "token filter named '" + tokenFilterName + "'";
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
$scope.editWordList = function (name, value) {
|
||||
var modalInstance = $modal.open({
|
||||
animation: $scope.animationsEnabled,
|
||||
templateUrl: '/static/partials/analysis/wordlist.html',
|
||||
controller: 'WordListModalCtrl',
|
||||
resolve: {
|
||||
name: function () {
|
||||
return name;
|
||||
},
|
||||
words: function () {
|
||||
return value.tokens;
|
||||
},
|
||||
mapping: function() {
|
||||
return $scope.$parent.mapping;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
modalInstance.result.then(function (result) {
|
||||
// add this result to the mapping
|
||||
for (var resultKey in result) {
|
||||
if (name !== "" && resultKey != name) {
|
||||
// remove the old name
|
||||
delete $scope.$parent.mapping.analysis.token_maps[name];
|
||||
}
|
||||
$scope.$parent.mapping.analysis.token_maps[resultKey] = result[resultKey];
|
||||
}
|
||||
}, function () {
|
||||
$log.info('Modal dismissed at: ' + new Date());
|
||||
});
|
||||
};
|
||||
|
||||
// character filters
|
||||
|
||||
$scope.newCharFilter = function() {
|
||||
return $scope.editCharFilter("", {});
|
||||
};
|
||||
|
||||
$scope.deleteCharFilter = function(name) {
|
||||
used = $scope.isCharFilterUsed(name);
|
||||
if (used) {
|
||||
alert("This character filter cannot be deleted because it is being used by the " + used + ".");
|
||||
return;
|
||||
}
|
||||
if (confirm("Are you sure you want to delete '" + name + "'?")) {
|
||||
delete $scope.$parent.mapping.analysis.char_filters[name];
|
||||
}
|
||||
};
|
||||
|
||||
$scope.isCharFilterUsed = function(name) {
|
||||
// character filters can only be used by analyzers
|
||||
for (var analyzerName in $scope.$parent.mapping.analysis.analyzers) {
|
||||
analyzer = $scope.$parent.mapping.analysis.analyzers[analyzerName];
|
||||
for (var charFilterIndex in analyzer.char_filters) {
|
||||
charFilterName = analyzer.char_filters[charFilterIndex];
|
||||
if (charFilterName == name) {
|
||||
return "analyzer named '" + analyzerName + "'";
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
$scope.editCharFilter = function (name, value) {
|
||||
var modalInstance = $modal.open({
|
||||
animation: $scope.animationsEnabled,
|
||||
templateUrl: '/static/partials/analysis/charfilter.html',
|
||||
controller: 'CharFilterModalCtrl',
|
||||
resolve: {
|
||||
name: function () {
|
||||
return name;
|
||||
},
|
||||
value: function () {
|
||||
return value;
|
||||
},
|
||||
mapping: function() {
|
||||
return $scope.$parent.mapping;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
modalInstance.result.then(function (result) {
|
||||
// add this result to the mapping
|
||||
for (var resultKey in result) {
|
||||
if (name !== "" && resultKey != name) {
|
||||
// remove the old name
|
||||
delete $scope.$parent.mapping.analysis.char_filters[name];
|
||||
}
|
||||
$scope.$parent.mapping.analysis.char_filters[resultKey] = result[resultKey];
|
||||
}
|
||||
}, function () {
|
||||
$log.info('Modal dismissed at: ' + new Date());
|
||||
});
|
||||
};
|
||||
|
||||
// tokenizers
|
||||
|
||||
$scope.newTokenizer = function () {
|
||||
return $scope.editTokenizer("", {});
|
||||
};
|
||||
|
||||
$scope.deleteTokenizer = function (name) {
|
||||
used = $scope.isTokenizerUsed(name);
|
||||
if (used) {
|
||||
alert("This tokenizer cannot be deleted because it is being used by the " + used + ".");
|
||||
return;
|
||||
}
|
||||
if (confirm("Are you sure you want to delete '" + name + "'?")) {
|
||||
delete $scope.$parent.mapping.analysis.tokenizers[name];
|
||||
}
|
||||
};
|
||||
|
||||
$scope.isTokenizerUsed = function(name) {
|
||||
// tokenizers can be used by *other* tokenizers
|
||||
for (var tokenizerName in $scope.$parent.mapping.analysis.tokenizers) {
|
||||
tokenizer = $scope.$parent.mapping.analysis.tokenizers[tokenizerName];
|
||||
if (tokenizer.tokenizer == name) {
|
||||
return "tokenizer named '" + tokenizerName + "'";
|
||||
}
|
||||
}
|
||||
|
||||
// tokenizers can be used by analyzers
|
||||
for (var analyzerName in $scope.$parent.mapping.analysis.analyzers) {
|
||||
analyzer = $scope.$parent.mapping.analysis.analyzers[analyzerName];
|
||||
if (analyzer.tokenizer == name) {
|
||||
return "analyzer named '" + analyzerName + "'";
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
$scope.editTokenizer = function (name, value) {
|
||||
var modalInstance = $modal.open({
|
||||
animation: $scope.animationsEnabled,
|
||||
templateUrl: '/static/partials/analysis/tokenizer.html',
|
||||
controller: 'TokenizerModalCtrl',
|
||||
resolve: {
|
||||
name: function () {
|
||||
return name;
|
||||
},
|
||||
value: function () {
|
||||
return value;
|
||||
},
|
||||
mapping: function() {
|
||||
return $scope.$parent.mapping;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
modalInstance.result.then(function (result) {
|
||||
// add this result to the mapping
|
||||
for (var resultKey in result) {
|
||||
if (name !== "" && resultKey != name) {
|
||||
// remove the old name
|
||||
delete $scope.$parent.mapping.analysis.tokenizers[name];
|
||||
}
|
||||
$scope.$parent.mapping.analysis.tokenizers[resultKey] = result[resultKey];
|
||||
}
|
||||
}, function () {
|
||||
$log.info('Modal dismissed at: ' + new Date());
|
||||
});
|
||||
};
|
||||
|
||||
// token filters
|
||||
|
||||
$scope.newTokenFilter = function () {
|
||||
return $scope.editTokenFilter("", {});
|
||||
};
|
||||
|
||||
$scope.deleteTokenFilter = function (name) {
|
||||
used = $scope.isTokenFilterUsed(name);
|
||||
if (used) {
|
||||
alert("This token filter cannot be deleted because it is being used by the " + used + ".");
|
||||
return;
|
||||
}
|
||||
if (confirm("Are you sure you want to delete '" + name + "'?")) {
|
||||
delete $scope.$parent.mapping.analysis.token_filters[name];
|
||||
}
|
||||
};
|
||||
|
||||
$scope.isTokenFilterUsed = function(name) {
|
||||
// token filters can only be used by analyzers
|
||||
for (var analyzerName in $scope.$parent.mapping.analysis.analyzers) {
|
||||
analyzer = $scope.$parent.mapping.analysis.analyzers[analyzerName];
|
||||
for (var tokenFilterIndex in analyzer.token_filters) {
|
||||
tokenFilterName = analyzer.token_filters[tokenFilterIndex];
|
||||
if (tokenFilterName == name) {
|
||||
return "analyzer named '" + analyzerName + "'";
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
$scope.editTokenFilter = function (name, value) {
|
||||
var modalInstance = $modal.open({
|
||||
animation: $scope.animationsEnabled,
|
||||
templateUrl: '/static/partials/analysis/tokenfilter.html',
|
||||
controller: 'TokenFilterModalCtrl',
|
||||
resolve: {
|
||||
name: function () {
|
||||
return name;
|
||||
},
|
||||
value: function () {
|
||||
return value;
|
||||
},
|
||||
mapping: function() {
|
||||
return $scope.$parent.mapping;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
modalInstance.result.then(function (result) {
|
||||
// add this result to the mapping
|
||||
for (var resultKey in result) {
|
||||
if (name !== "" && resultKey != name) {
|
||||
// remove the old name
|
||||
delete $scope.$parent.mapping.analysis.token_filters[name];
|
||||
}
|
||||
$scope.$parent.mapping.analysis.token_filters[resultKey] = result[resultKey];
|
||||
}
|
||||
}, function () {
|
||||
$log.info('Modal dismissed at: ' + new Date());
|
||||
});
|
||||
};
|
||||
|
||||
}
|
|
@ -1,110 +0,0 @@
|
|||
// controller responsible for building a mapping
|
||||
|
||||
function MappingCtrl($scope, $http, $routeParams, $log, $sce, $location) {
|
||||
|
||||
newFieldSection = function() {
|
||||
return {
|
||||
"enabled": true,
|
||||
"dynamic": true,
|
||||
"default_analyzer": "",
|
||||
"properties": {},
|
||||
"fields": [
|
||||
{
|
||||
"type": "",
|
||||
"index": true,
|
||||
"store": true,
|
||||
"include_in_all": true,
|
||||
"include_term_vectors": true
|
||||
}
|
||||
]
|
||||
};
|
||||
};
|
||||
|
||||
$scope.$parent.mapping = {
|
||||
"default_mapping": newFieldSection(),
|
||||
"type_field": "_type",
|
||||
"default_type": "_default",
|
||||
"default_analyzer": "standard",
|
||||
"default_datetime_parser": "dateTimeOptional",
|
||||
"default_field": "_all",
|
||||
"byte_array_converter": "json",
|
||||
"analysis": {
|
||||
"analyzers": {},
|
||||
"token_maps": {},
|
||||
"char_filters": {},
|
||||
"tokenizers": {},
|
||||
"token_filters": {}
|
||||
}
|
||||
};
|
||||
|
||||
$scope.analyzerNames = [];
|
||||
|
||||
$scope.loadAnalyzerNames = function() {
|
||||
$http.post('/api/_analyzerNames',$scope.$parent.mapping).success(function(data) {
|
||||
$scope.analyzerNames = data.analyzers;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
$scope.loadAnalyzerNames();
|
||||
|
||||
$scope.datetimeParserNames = [];
|
||||
|
||||
$scope.loadDatetimeParserNames = function() {
|
||||
$http.post('/api/_datetimeParserNames',$scope.$parent.mapping).success(function(data) {
|
||||
$scope.datetimeParserNames = data.datetime_parsers;
|
||||
}).
|
||||
error(function(data, code) {
|
||||
$scope.errorMessage = data;
|
||||
});
|
||||
};
|
||||
|
||||
$scope.loadDatetimeParserNames();
|
||||
|
||||
$scope.mappingType = "default";
|
||||
$scope.selectedItem = null;
|
||||
$scope.selectedLabel = "";
|
||||
|
||||
$scope.fieldTypes = [
|
||||
{
|
||||
"name": "text",
|
||||
"label": "Text",
|
||||
"description": "a text field"
|
||||
},
|
||||
{
|
||||
"name": "number",
|
||||
"label": "Number",
|
||||
"description": "a numerical value, indexed to facilitate range queries"
|
||||
},
|
||||
{
|
||||
"name": "datetime",
|
||||
"label": "Date/Time",
|
||||
"description": "a date/time value, indexed to facilitate range queries"
|
||||
},
|
||||
{
|
||||
"name": "disabled",
|
||||
"label": "Disabled",
|
||||
"description": "a section of JSON to be completely ignored"
|
||||
}
|
||||
];
|
||||
|
||||
$scope.clickItem = function(x, y) {
|
||||
$scope.selectedItem = x;
|
||||
$scope.selectedLabel = y;
|
||||
};
|
||||
|
||||
$scope.clickItem($scope.$parent.mapping.default_mapping);
|
||||
|
||||
$scope.addField = function(scope) {
|
||||
if (scope.newFieldName) {
|
||||
$scope.selectedItem.properties[scope.newFieldName] = newFieldSection();
|
||||
scope.newFieldName = "";
|
||||
console.log($scope.selectedItem);
|
||||
}
|
||||
};
|
||||
|
||||
$scope.changeType = function(scope) {
|
||||
};
|
||||
}
|
|
@ -1,72 +0,0 @@
|
|||
<div class="modal-header">
|
||||
<h3 class="modal-title">Custom Analyzer</h3>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
|
||||
<div ng-show="errorMessage" class="alert alert-danger ng-cloak" role="alert"> {{errorMessage}}
|
||||
</div>
|
||||
|
||||
<form class="form" role="form">
|
||||
<div class="form-group">
|
||||
<label for="aname">Name</label>
|
||||
<input ng-model="name" type="text" class="form-control" id="tname" placeholder="Name">
|
||||
</div>
|
||||
|
||||
|
||||
<div class="form-group">
|
||||
<label for="exampleInputPassword1">Character Filters</label>
|
||||
<ul class="list-group" ng-show="analyzer.char_filters.length < 1">
|
||||
<li class="list-group-item">None</li>
|
||||
</ul>
|
||||
<ul class="list-group" ng-show="analyzer.char_filters.length > 0" ui-sortable ng-model="analyzer.char_filters">
|
||||
<li class="list-group-item" ng-repeat="analyzerCharFilter in analyzer.char_filters track by $index"><span class="glyphicon glyphicon-minus"></span> {{ analyzerCharFilter }}<span ng-click="removeCharFilter($index)" class="glyphicon glyphicon-remove pull-right"></span></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="exampleInputPassword2"></label>
|
||||
<div class="col-sm-10">
|
||||
<select ng-change="addCharFilterChanged()" ng-model="addCharacterFilterName" class="form-control" id="addCharacterFilters">
|
||||
<option ng-repeat="charFilter in charFilterNames">{{charFilter}}</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-sm-2">
|
||||
<button ng-click="addCharFilter(this)" type="button" class="btn btn-default pull-right">Add</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="analyzerTokenizer">Tokenizer</label>
|
||||
<select ng-change="tokenizerChanged()" ng-model="analyzer.tokenizer" class="form-control" id="analyzerTokenizer">
|
||||
<option ng-repeat="tokenizer in tokenizerNames">{{tokenizer}}</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="exampleInputPassword2">Token Filters</label>
|
||||
<ul class="list-group" ng-show="analyzer.token_filters.length < 1">
|
||||
<li class="list-group-item">None</li>
|
||||
</ul>
|
||||
<ul class="list-group" ng-show="analyzer.token_filters.length > 0" ui-sortable ng-model="analyzer.token_filters">
|
||||
<li class="list-group-item" ng-repeat="analyzerTokenFilter in analyzer.token_filters"><span class="glyphicon glyphicon-minus"></span> {{ analyzerTokenFilter }}<span ng-click="removeTokenFilter($index)" class="glyphicon glyphicon-remove pull-right"></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="exampleInputPassword2"></label>
|
||||
<div class="col-sm-10">
|
||||
<select ng-change="addTokenFilterChanged()" ng-model="addTokenFilterName" class="form-control" id="addTokenFilters">
|
||||
<option ng-repeat="tokenFilter in tokenFilterNames">{{tokenFilter}}</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-sm-2">
|
||||
<button ng-click="addTokenFilter(this)" type="button" class="btn btn-default pull-right">Add</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button class="btn btn-default" ng-click="cancel()">Cancel</button>
|
||||
<button ng-click="build()" type="button" class="btn btn-primary pull-right">Save</button>
|
||||
</div>
|
|
@ -1,34 +0,0 @@
|
|||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr ng-repeat="(aname,aval) in mapping.analysis.analyzers">
|
||||
<td>{{aname}}</td>
|
||||
<td>
|
||||
<div class="btn-group btn-group-xs" role="group">
|
||||
<button ng-click="editAnalyzer(aname, aval)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-edit" aria-hidden="true"></span> Edit
|
||||
</button>
|
||||
<button ng-click="deleteAnalyzer(aname)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-trash" aria-hidden="true"></span> Delete
|
||||
</button>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr ng-show="Utils.keys(mapping.analysis.analyzers).length < 1">
|
||||
<td colspan="2">None</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tfoot>
|
||||
<tr>
|
||||
<td colspan="2">
|
||||
<button ng-click="newAnalyzer()" type="button" class="btn btn-sm btn-default pull-right">New Analyzer</button>
|
||||
</td>
|
||||
</tr>
|
||||
</tfoot>
|
||||
</table>
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
<div class="modal-header">
|
||||
<h3 class="modal-title">Custom Char Filter</h3>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
|
||||
<div ng-show="errorMessage" class="alert alert-danger ng-cloak" role="alert"> {{errorMessage}}
|
||||
</div>
|
||||
|
||||
<form class="form" role="form">
|
||||
<div class="form-group">
|
||||
<label for="tname">Name</label>
|
||||
<input ng-model="name" type="text" class="form-control" id="tname" placeholder="Name">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="charfiltertype">Type</label>
|
||||
<div class="col-sm-12 input-group">
|
||||
<select ng-change="charFilterTypeChange()" ng-model="charfilter.type" class="form-control" id="charfiltertype">
|
||||
<option ng-repeat="charFilterTyp in charFilterTypes">{{charFilterTyp}}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div ng-show="charfilter.type" ng-include src="formpath"/>
|
||||
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button class="btn btn-default" ng-click="cancel()">Cancel</button>
|
||||
<button ng-click="build()" type="button" class="btn btn-primary pull-right">Save</button>
|
||||
</div>
|
|
@ -1,36 +0,0 @@
|
|||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr ng-repeat="(cfname,cfval) in mapping.analysis.char_filters">
|
||||
<td>{{cfname}}</td>
|
||||
<td>{{cfval.type}}</td>
|
||||
<td>
|
||||
<div class="btn-group btn-group-xs" role="group">
|
||||
<button ng-click="editCharFilter(cfname, cfval)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-edit" aria-hidden="true"></span> Edit
|
||||
</button>
|
||||
<button ng-click="deleteCharFilter(cfname)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-trash" aria-hidden="true"></span> Delete
|
||||
</button>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr ng-show="Utils.keys(mapping.analysis.char_filters).length < 1">
|
||||
<td colspan="3">None</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tfoot>
|
||||
<tr>
|
||||
<td colspan="3">
|
||||
<button ng-click="newCharFilter()" type="button" class="btn btn-sm btn-default pull-right">New Character Filter</button>
|
||||
</td>
|
||||
</tr>
|
||||
</tfoot>
|
||||
</table>
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="charfilterRegexp">Regular Expression</label>
|
||||
<input ng-model="charfilter.regexp" type="text" class="form-control" id="charfilterRegexp" placeholder="">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="charfilterReplace">Replacement</label>
|
||||
<input ng-model="charfilter.replace" type="text" class="form-control" id="charfilterReplace" placeholder="">
|
||||
</div>
|
|
@ -1,34 +0,0 @@
|
|||
<div class="modal-header">
|
||||
<h3 class="modal-title">Custom Token Filter</h3>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
|
||||
<div ng-show="errorMessage" class="alert alert-danger ng-cloak" role="alert"> {{errorMessage}}
|
||||
</div>
|
||||
|
||||
<form class="form" role="form">
|
||||
<div class="form-group">
|
||||
<label for="tname">Name</label>
|
||||
<input ng-model="name" type="text" class="form-control" id="tname" placeholder="Name">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfiltertype">Type</label>
|
||||
<div class="col-sm-12 input-group">
|
||||
<select ng-change="tokenFilterTypeChange()" ng-model="tokenfilter.type" class="form-control" id="tokenfiltertype">
|
||||
<option ng-repeat="tokenFilterTyp in tokenFilterTypes">{{tokenFilterTyp}}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div ng-show="tokenfilter.type" ng-include src="formpath"/>
|
||||
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button class="btn btn-default" ng-click="cancel()">Cancel</button>
|
||||
<button ng-click="build()" type="button" class="btn btn-primary pull-right">Save</button>
|
||||
</div>
|
|
@ -1,36 +0,0 @@
|
|||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr ng-repeat="(tfname,tfval) in mapping.analysis.token_filters">
|
||||
<td>{{tfname}}</td>
|
||||
<td>{{tfval.type}}</td>
|
||||
<td>
|
||||
<div class="btn-group btn-group-xs" role="group">
|
||||
<button ng-click="editTokenFilter(tfname, tfval)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-edit" aria-hidden="true"></span> Edit
|
||||
</button>
|
||||
<button ng-click="deleteTokenFilter(tfname)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-trash" aria-hidden="true"></span> Delete
|
||||
</button>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr ng-show="Utils.keys(mapping.analysis.token_filters).length < 1">
|
||||
<td colspan="3">None</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tfoot>
|
||||
<tr>
|
||||
<td colspan="3">
|
||||
<button ng-click="newTokenFilter()" type="button" class="btn btn-sm btn-default pull-right">New Token Filter</button>
|
||||
</td>
|
||||
</tr>
|
||||
</tfoot>
|
||||
</table>
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterTokenMaps">Sub Words</label>
|
||||
<select ng-model="tokenfilter.dict_token_map" class="form-control" id="tokenfilterTokenMaps">
|
||||
<option ng-repeat="tokenMap in tokenMapNames">{{tokenMap}}</option>
|
||||
</select>
|
||||
</div>
|
|
@ -1,17 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterEdge">Edge</label>
|
||||
<select class="form-control" id="tokenfilterEdge" ng-model="tokenfilter.edge">
|
||||
<option>front</option>
|
||||
<option>back</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterMin">Min</label>
|
||||
<input ng-model="tokenfilter.min" type="number" class="form-control" id="tokenfilterMin" placeholder="">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterMax">Max</label>
|
||||
<input ng-model="tokenfilter.max" type="number" class="form-control" id="tokenfilterMax" placeholder="">
|
||||
</div>
|
|
@ -1,6 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterTokenMaps">Articles</label>
|
||||
<select ng-model="tokenfilter.articles_token_map" class="form-control" id="tokenfilterTokenMaps">
|
||||
<option ng-repeat="tokenMap in tokenMapNames">{{tokenMap}}</option>
|
||||
</select>
|
||||
</div>
|
|
@ -1,6 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterTokenMaps">Keywords</label>
|
||||
<select ng-model="tokenfilter.keywords_token_map" class="form-control" id="tokenfilterTokenMaps">
|
||||
<option ng-repeat="tokenMap in tokenMapNames">{{tokenMap}}</option>
|
||||
</select>
|
||||
</div>
|
|
@ -1,9 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterMin">Min</label>
|
||||
<input ng-model="tokenfilter.min" type="number" class="form-control" id="tokenfilterMin" placeholder="">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterMax">Max</label>
|
||||
<input ng-model="tokenfilter.max" type="number" class="form-control" id="tokenfilterMax" placeholder="">
|
||||
</div>
|
|
@ -1,9 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterMin">Min</label>
|
||||
<input ng-model="tokenfilter.min" type="number" class="form-control" id="tokenfilterMin" placeholder="">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterMax">Max</label>
|
||||
<input ng-model="tokenfilter.max" type="number" class="form-control" id="tokenfilterMax" placeholder="">
|
||||
</div>
|
|
@ -1,9 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterNormalizeUnicode">Form</label>
|
||||
<select class="form-control" id="tokenfilterNormalizeUnicode" ng-model="tokenfilter.form">
|
||||
<option>nfc</option>
|
||||
<option>nfd</option>
|
||||
<option>nfkc</option>
|
||||
<option>nfkd</option>
|
||||
</select>
|
||||
</div>
|
|
@ -1,24 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterMin">Min</label>
|
||||
<input ng-model="tokenfilter.min" type="number" class="form-control" id="tokenfilterMin" placeholder="">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterMax">Max</label>
|
||||
<input ng-model="tokenfilter.max" type="number" class="form-control" id="tokenfilterMax" placeholder="">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterInclude">Include Original Token</label>
|
||||
<input ng-model="tokenfilter.output_original" type="checkbox" class="form-control" id="tokenfilterInclude">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterSep">Separator</label>
|
||||
<input ng-model="tokenfilter.separator" type="text" class="form-control" id="tokenfilterSep" placeholder="">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenfilterFiller">Filler</label>
|
||||
<input ng-model="tokenfilter.filler" type="text" class="form-control" id="tokenfilterFiller" placeholder="">
|
||||
</div>
|
|
@ -1,6 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterTokenMaps">Stop Words</label>
|
||||
<select ng-model="tokenfilter.stop_token_map" class="form-control" id="tokenfilterTokenMaps">
|
||||
<option ng-repeat="tokenMap in tokenMapNames">{{tokenMap}}</option>
|
||||
</select>
|
||||
</div>
|
|
@ -1,4 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenfilterLen">Length</label>
|
||||
<input ng-model="tokenfilter.length" type="number" class="form-control" id="tokenfilterLen" placeholder="">
|
||||
</div>
|
|
@ -1,3 +0,0 @@
|
|||
<select ng-model="tokenfilter.word_map" class="form-control" id="tokenfilterTokenMaps">
|
||||
<option ng-repeat="tokenMap in tokenMapNames">{{tokenMap}}</option>
|
||||
</select>
|
|
@ -1,34 +0,0 @@
|
|||
<div class="modal-header">
|
||||
<h3 class="modal-title">Custom Tokenizer</h3>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
|
||||
<div ng-show="errorMessage" class="alert alert-danger ng-cloak" role="alert"> {{errorMessage}}
|
||||
</div>
|
||||
|
||||
<form class="form" role="form">
|
||||
<div class="form-group">
|
||||
<label for="tname">Name</label>
|
||||
<input ng-model="name" type="text" class="form-control" id="tname" placeholder="Name">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="tokenizertype">Type</label>
|
||||
<div class="col-sm-12 input-group">
|
||||
<select ng-change="tokenizerTypeChange()" ng-model="tokenizer.type" class="form-control" id="tokenizertype">
|
||||
<option ng-repeat="tokenizerTyp in tokenizerTypes">{{tokenizerTyp}}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div ng-show="tokenizer.type" ng-include src="formpath"/>
|
||||
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button class="btn btn-default" ng-click="cancel()">Cancel</button>
|
||||
<button ng-click="build()" type="button" class="btn btn-primary pull-right">Save</button>
|
||||
</div>
|
|
@ -1,36 +0,0 @@
|
|||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr ng-repeat="(tname,tval) in mapping.analysis.tokenizers">
|
||||
<td>{{tname}}</td>
|
||||
<td>{{tval.type}}</td>
|
||||
<td>
|
||||
<div class="btn-group btn-group-xs" role="group">
|
||||
<button ng-click="editTokenizer(tname, tval)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-edit" aria-hidden="true"></span> Edit
|
||||
</button>
|
||||
<button ng-click="deleteTokenizer(tname)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-trash" aria-hidden="true"></span> Delete
|
||||
</button>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr ng-show="Utils.keys(mapping.analysis.tokenizers).length < 1">
|
||||
<td colspan="3">None</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tfoot>
|
||||
<tr>
|
||||
<td colspan="3">
|
||||
<button ng-click="newTokenizer()" type="button" class="btn btn-sm btn-default pull-right">New Tokenizer</button>
|
||||
</td>
|
||||
</tr>
|
||||
</tfoot>
|
||||
</table>
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="exampleInputPassword1">Exception Patterns</label>
|
||||
<ul class="list-group" ng-show="tokenizer.exceptions.length < 1">
|
||||
<li class="list-group-item">None</li>
|
||||
</ul>
|
||||
<ul class="list-group" ng-show="tokenizer.exceptions.length > 0" ui-sortable ng-model="tokenizer.exceptions">
|
||||
<li class="list-group-item" ng-repeat="e in tokenizer.exceptions track by $index"><span class="glyphicon glyphicon-minus"></span> {{ e }}<span ng-click="removeException($index)" class="glyphicon glyphicon-remove pull-right"></span></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="what"></label>
|
||||
<div class="col-sm-10">
|
||||
<input ng-model="newregexp" type="text" class="form-control" id="exceptionRegexp" placeholder="">
|
||||
</div>
|
||||
<div class="col-sm-2">
|
||||
<button ng-click="addException(this)" type="button" class="btn btn-default pull-right">Add</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="analyzerTokenizer">Tokenizer for Remaining Input</label>
|
||||
<select ng-change="tokenizerChanged()" ng-model="tokenizer.tokenizer" class="form-control" id="tokenizer">
|
||||
<option ng-repeat="tokenizer in tokenizerNames">{{tokenizer}}</option>
|
||||
</select>
|
||||
</div>
|
|
@ -1,4 +0,0 @@
|
|||
<div class="form-group">
|
||||
<label for="tokenizerRegexp">Regular Expression</label>
|
||||
<input ng-model="tokenizer.regexp" type="text" class="form-control" id="tokenizerRegexp" placeholder="">
|
||||
</div>
|
|
@ -1,39 +0,0 @@
|
|||
<div class="modal-header">
|
||||
<h3 class="modal-title">Custom Word List</h3>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
|
||||
<div ng-show="errorMessage" class="alert alert-danger ng-cloak" role="alert"> {{errorMessage}}
|
||||
</div>
|
||||
|
||||
<form class="form" role="form">
|
||||
<div class="form-group">
|
||||
<label for="tname">Name</label>
|
||||
<input ng-model="name" type="text" class="form-control" id="tname" placeholder="Name">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="words">Words</label>
|
||||
<select ng-model="selectedWords" multiple ng-multiple="true" id="words" size="5" class="form-control" ng-options="idx as word for (idx, word) in words">
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="what"></label>
|
||||
<div class="col-sm-8">
|
||||
<input ng-model="newWord" type="text" class="form-control" id="newWord" placeholder="word">
|
||||
</div>
|
||||
<div class="col-sm-4">
|
||||
<button ng-click="addWord()" type="button" class="btn btn-sm btn-default">Add</button>
|
||||
<button ng-click="removeWord()" ng-disabled="selectedWords.length < 1" type="button" class="btn btn-sm btn-default pull-right">Remove</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="modal-footer">
|
||||
<button class="btn btn-default" ng-click="cancel()">Cancel</button>
|
||||
<button ng-click="build()" type="button" class="btn btn-primary pull-right">Save</button>
|
||||
</div>
|
|
@ -1,34 +0,0 @@
|
|||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr ng-repeat="(tmname,tmval) in mapping.analysis.token_maps">
|
||||
<td>{{tmname}}</td>
|
||||
<td>
|
||||
<div class="btn-group btn-group-xs" role="group">
|
||||
<button ng-click="editWordList(tmname, tmval)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-edit" aria-hidden="true"></span> Edit
|
||||
</button>
|
||||
<button ng-click="deleteWordList(tmname)" type="button" class="btn btn-default btn-xs">
|
||||
<span class="glyphicon glyphicon-trash" aria-hidden="true"></span> Delete
|
||||
</button>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr ng-show="Utils.keys(mapping.analysis.token_maps).length < 1">
|
||||
<td colspan="2">None</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tfoot>
|
||||
<tr>
|
||||
<td colspan="2">
|
||||
<button ng-click="newWordList()" type="button" class="btn btn-sm btn-default pull-right">New Word List</button>
|
||||
</td>
|
||||
</tr>
|
||||
</tfoot>
|
||||
</table>
|
||||
|
|
@ -1,119 +0,0 @@
|
|||
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
|
||||
<div class="panel panel-default">
|
||||
<div class="panel-heading">
|
||||
<h3 class="panel-title"><span class="glyphicon glyphicon-file" aria-hidden="true"></span> Document Structure</h3>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
|
||||
<ul class="list-custom">
|
||||
<span class="list-item" ng-click="clickItem(mapping.default_mapping, '<document root>')" ng-class="{selected: mapping.default_mapping==selectedItem}">
|
||||
• <document root>
|
||||
</span>
|
||||
<ul class="list-custom">
|
||||
<li ng-repeat="(pname,pval) in mapping.default_mapping.properties" ng-include="'/static/partials/mapping/mapping-node.html'" ng-init="parent = pname"></li>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
<div class="form-group form-group-sm">
|
||||
<div class="col-sm-10">
|
||||
<input ng-model="newFieldName" type="text" class="form-control" id="fieldName" placeholder="field name">
|
||||
</div>
|
||||
<button ng-click="addField(this)" type="button" class="btn btn-sm btn-default">Add</button>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="panel panel-default">
|
||||
<div class="panel-heading">
|
||||
<h3 class="panel-title"><span class="glyphicon glyphicon-list" aria-hidden="true"></span> Indexing Behavior <small>{{selectedLabel}}</small></h3>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
|
||||
<div ng-show="selectedItem == null">Select an item in the document structure.</div>
|
||||
<div ng-hide="selectedItem == null">
|
||||
|
||||
<div class="form-group form-group-sm">
|
||||
<div class="col-sm-10">
|
||||
<label>Type
|
||||
<select ng-change="changeType(this)" ng-model="selectedItem.fields[0].type" ng-options="t.name as t.label for t in fieldTypes">
|
||||
<option value="">Object</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div ng-switch="selectedItem.fields[0].type">
|
||||
<div ng-switch-when="text">
|
||||
|
||||
<div class="form-group form-group-sm">
|
||||
<div class="col-sm-10">
|
||||
<label>Analyzer
|
||||
<select ng-change="changeType(this)" ng-model="selectedItem.fields[0].analyzer" ng-options="t as t for t in analyzerNames">
|
||||
<option value="">Inherit</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div ng-switch-when="datetime">
|
||||
|
||||
<div class="form-group form-group-sm">
|
||||
<div class="col-sm-10">
|
||||
<label>Date/TimeParser
|
||||
<select ng-change="changeType(this)" ng-model="selectedItem.fields[0].date_format" ng-options="t as t for t in datetimeParserNames">
|
||||
<option value="">Inherit</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div ng-if="selectedItem.fields[0].type != '' && selectedItem.fields[0].type != 'disabled'">
|
||||
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input ng-model="selectedItem.fields[0].index" type="checkbox"> Index
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input ng-model="selectedItem.fields[0].store" type="checkbox"> Store
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input ng-model="selectedItem.fields[0].include_in_all" type="checkbox"> Include in 'All' Field
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div ng-if="selectedItem.fields[0].type == 'text'">
|
||||
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input ng-model="selectedItem.fields[0].include_term_vectors" type="checkbox"> Include Term Vectors
|
||||
</label>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
|
@ -1,6 +0,0 @@
|
|||
<span class="list-item"ng-click="clickItem(pval,pname)" ng-class="{selected: pval==selectedItem}">• {{pname}}</span>
|
||||
<ul class="list-custom">
|
||||
<li ng-repeat="(pname,pval) in pval.properties" ng-init="parent = parent + '.' + pname">
|
||||
<span class="list-item" ng-click="clickItem(pval,parent)" ng-class="{selected: pval==selectedItem}">• {{pname}}</span>
|
||||
</li>
|
||||
</ul>
|
|
@ -1,59 +0,0 @@
|
|||
|
||||
|
||||
<div ng-show="errorMessage" class="alert alert-danger ng-cloak" role="alert"> {{errorMessage}}
|
||||
</div>
|
||||
|
||||
<form class="form-horizontal" role="form">
|
||||
|
||||
<div class="form-group">
|
||||
<label for="inputDoc" class="col-sm-2 control-label">Index Mapping</label>
|
||||
|
||||
|
||||
<div class="col-sm-10">
|
||||
<div class="radio">
|
||||
<label>
|
||||
<input ng-model="mappingType" type="radio" name="mappingType" value="default" checked>
|
||||
Default
|
||||
</label>
|
||||
</div>
|
||||
<div class="radio">
|
||||
<label>
|
||||
<input ng-model="mappingType" type="radio" name="mappingType" value="custom">
|
||||
Custom
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="form-group" ng-show="mappingType == 'custom'">
|
||||
<label for="inputDoc" class="col-sm-2 control-label"> </label>
|
||||
<div class="col-sm-10">
|
||||
<div ng-include src="'/static/partials/mapping/mapping-custom.html'"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-group" ng-controller="AnalysisCtrl" ng-show="mappingType == 'custom'">
|
||||
<label for="inputDoc" class="col-sm-2 control-label">Custom Analysis</label>
|
||||
<div class="col-sm-10">
|
||||
<tabset>
|
||||
<tab heading="Analyzers">
|
||||
<div ng-include src="'/static/partials/analysis/analyzers.html'"/>
|
||||
</tab>
|
||||
<tab heading="Character Filters">
|
||||
<div ng-include src="'/static/partials/analysis/charfilters.html'"/>
|
||||
</tab>
|
||||
<tab heading="Tokenizers">
|
||||
<div ng-include src="'/static/partials/analysis/tokenizers.html'"/>
|
||||
</tab>
|
||||
<tab heading="Token Filters">
|
||||
<div ng-include src="'/static/partials/analysis/tokenfilters.html'"/>
|
||||
</tab>
|
||||
<tab heading="Word Lists">
|
||||
<div ng-include src="'/static/partials/analysis/wordlists.html'"/>
|
||||
</tab>
|
||||
</tabset>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</form>
|
|
@ -18,7 +18,6 @@ import (
|
|||
|
||||
var indexNameMapping map[string]bleve.Index
|
||||
var indexNameMappingLock sync.RWMutex
|
||||
var indexStats = bleve.IndexStats{}
|
||||
|
||||
func RegisterIndexName(name string, idx bleve.Index) {
|
||||
indexNameMappingLock.Lock()
|
||||
|
@ -28,7 +27,6 @@ func RegisterIndexName(name string, idx bleve.Index) {
|
|||
indexNameMapping = make(map[string]bleve.Index)
|
||||
}
|
||||
indexNameMapping[name] = idx
|
||||
indexStats[name] = idx.Stats()
|
||||
}
|
||||
|
||||
func UnregisterIndexByName(name string) bleve.Index {
|
||||
|
@ -42,7 +40,6 @@ func UnregisterIndexByName(name string) bleve.Index {
|
|||
if rv != nil {
|
||||
delete(indexNameMapping, name)
|
||||
}
|
||||
delete(indexStats, name)
|
||||
return rv
|
||||
}
|
||||
|
||||
|
@ -66,10 +63,6 @@ func IndexNames() []string {
|
|||
return rv
|
||||
}
|
||||
|
||||
func IndexStats() bleve.IndexStats {
|
||||
return indexStats
|
||||
}
|
||||
|
||||
func UpdateAlias(alias string, add, remove []string) error {
|
||||
indexNameMappingLock.Lock()
|
||||
defer indexNameMappingLock.Unlock()
|
||||
|
|
23
index.go
23
index.go
|
@ -71,7 +71,7 @@ func (b *Batch) Size() int {
|
|||
return len(b.internal.IndexOps) + len(b.internal.InternalOps)
|
||||
}
|
||||
|
||||
// String prints a user friendly string represenation of what
|
||||
// String prints a user friendly string representation of what
|
||||
// is inside this batch.
|
||||
func (b *Batch) String() string {
|
||||
return b.internal.String()
|
||||
|
@ -174,8 +174,22 @@ type Index interface {
|
|||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
|
||||
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)
|
||||
|
||||
// DumpAll returns a channel receiving all index rows as
|
||||
// UpsideDownCouchRow, in lexicographic byte order. If the enumeration
|
||||
// fails, an error is sent. The channel is closed once the enumeration
|
||||
// completes or an error is encountered. The caller must consume all
|
||||
// channel entries until the channel is closed to ensure the transaction
|
||||
// and other resources associated with the enumeration are released.
|
||||
//
|
||||
// DumpAll exists for debugging and tooling purpose and may change in the
|
||||
// future.
|
||||
DumpAll() chan interface{}
|
||||
|
||||
// DumpDoc works like DumpAll but returns only StoredRows and
|
||||
// TermFrequencyRows related to a document.
|
||||
DumpDoc(id string) chan interface{}
|
||||
|
||||
// DumpFields works like DumpAll but returns only FieldRows.
|
||||
DumpFields() chan interface{}
|
||||
|
||||
Close() error
|
||||
|
@ -188,6 +202,11 @@ type Index interface {
|
|||
SetInternal(key, val []byte) error
|
||||
DeleteInternal(key []byte) error
|
||||
|
||||
// Name returns the name of the index (by default this is the path)
|
||||
Name() string
|
||||
// SetName lets you assign your own logical name to this index
|
||||
SetName(string)
|
||||
|
||||
// Advanced returns the indexer and data store, exposing lower level
|
||||
// methods to enumerate records and access data.
|
||||
Advanced() (index.Index, store.KVStore, error)
|
||||
|
@ -211,7 +230,7 @@ func New(path string, mapping *IndexMapping) (Index, error) {
|
|||
// The provided mapping will be used for all
|
||||
// Index/Search operations.
|
||||
// The specified index type will be used
|
||||
// The specified kvstore implemenation will be used
|
||||
// The specified kvstore implementation will be used
|
||||
// and the provided kvconfig will be passed to its
|
||||
// constructor.
|
||||
func NewUsing(path string, mapping *IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (Index, error) {
|
||||
|
|
|
@ -50,6 +50,11 @@ func (f *FieldCache) FieldNamed(field string, createIfMissing bool) (uint16, boo
|
|||
// trade read lock for write lock
|
||||
f.mutex.RUnlock()
|
||||
f.mutex.Lock()
|
||||
// need to check again with write lock
|
||||
if index, ok := f.fieldIndexes[field]; ok {
|
||||
f.mutex.Unlock()
|
||||
return index, true
|
||||
}
|
||||
// assign next field id
|
||||
index := uint16(f.lastFieldIndex + 1)
|
||||
f.fieldIndexes[field] = index
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
|
||||
|
||||
rv := &index.AnalysisResult{
|
||||
DocID: d.ID,
|
||||
Rows: make([]index.IndexRow, 0, 100),
|
||||
}
|
||||
|
||||
docIDBytes := []byte(d.ID)
|
||||
|
||||
// add the _id row
|
||||
rv.Rows = append(rv.Rows, NewTermFreqRow(0, nil, docIDBytes, d.Number, 0, 0, nil))
|
||||
|
||||
// information we collate as we merge fields with same name
|
||||
fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies)
|
||||
fieldLengths := make(map[uint16]int)
|
||||
fieldIncludeTermVectors := make(map[uint16]bool)
|
||||
fieldNames := make(map[uint16]string)
|
||||
|
||||
analyzeField := func(field document.Field, storable bool) {
|
||||
fieldIndex, newFieldRow := f.fieldIndexOrNewRow(field.Name())
|
||||
if newFieldRow != nil {
|
||||
rv.Rows = append(rv.Rows, newFieldRow)
|
||||
}
|
||||
fieldNames[fieldIndex] = field.Name()
|
||||
|
||||
if field.Options().IsIndexed() {
|
||||
fieldLength, tokenFreqs := field.Analyze()
|
||||
existingFreqs := fieldTermFreqs[fieldIndex]
|
||||
if existingFreqs == nil {
|
||||
fieldTermFreqs[fieldIndex] = tokenFreqs
|
||||
} else {
|
||||
existingFreqs.MergeAll(field.Name(), tokenFreqs)
|
||||
fieldTermFreqs[fieldIndex] = existingFreqs
|
||||
}
|
||||
fieldLengths[fieldIndex] += fieldLength
|
||||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
|
||||
}
|
||||
|
||||
if storable && field.Options().IsStored() {
|
||||
storeRow := f.storeField(docIDBytes, d.Number, field, fieldIndex)
|
||||
rv.Rows = append(rv.Rows, storeRow)
|
||||
}
|
||||
}
|
||||
|
||||
for _, field := range d.Fields {
|
||||
analyzeField(field, true)
|
||||
}
|
||||
|
||||
if len(d.CompositeFields) > 0 {
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
|
||||
}
|
||||
}
|
||||
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
analyzeField(compositeField, false)
|
||||
}
|
||||
}
|
||||
|
||||
rowsCapNeeded := len(rv.Rows)
|
||||
for _, tokenFreqs := range fieldTermFreqs {
|
||||
rowsCapNeeded += len(tokenFreqs)
|
||||
}
|
||||
|
||||
rows := make([]index.IndexRow, 0, rowsCapNeeded)
|
||||
rv.Rows = append(rows, rv.Rows...)
|
||||
|
||||
// walk through the collated information and proccess
|
||||
// once for each indexed field (unique name)
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
fieldLength := fieldLengths[fieldIndex]
|
||||
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
|
||||
|
||||
rv.Rows = f.indexField(docIDBytes, d.Number, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows)
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (f *Firestorm) indexField(docID []byte, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow) []index.IndexRow {
|
||||
|
||||
tfrs := make([]TermFreqRow, len(tokenFreqs))
|
||||
|
||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||
|
||||
if !includeTermVectors {
|
||||
i := 0
|
||||
for _, tf := range tokenFreqs {
|
||||
rows = append(rows, InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, nil))
|
||||
i++
|
||||
}
|
||||
return rows
|
||||
}
|
||||
|
||||
i := 0
|
||||
for _, tf := range tokenFreqs {
|
||||
var tv []*TermVector
|
||||
tv, rows = f.termVectorsFromTokenFreq(fieldIndex, tf, rows)
|
||||
rows = append(rows, InitTermFreqRow(&tfrs[i], fieldIndex, tf.Term, docID, docNum, uint64(tf.Frequency()), fieldNorm, tv))
|
||||
i++
|
||||
}
|
||||
return rows
|
||||
}
|
||||
|
||||
func (f *Firestorm) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) {
|
||||
rv := make([]*TermVector, len(tf.Locations))
|
||||
|
||||
for i, l := range tf.Locations {
|
||||
var newFieldRow *FieldRow
|
||||
fieldIndex := field
|
||||
if l.Field != "" {
|
||||
// lookup correct field
|
||||
fieldIndex, newFieldRow = f.fieldIndexOrNewRow(l.Field)
|
||||
if newFieldRow != nil {
|
||||
rows = append(rows, newFieldRow)
|
||||
}
|
||||
}
|
||||
tv := NewTermVector(fieldIndex, uint64(l.Position), uint64(l.Start), uint64(l.End), l.ArrayPositions)
|
||||
rv[i] = tv
|
||||
}
|
||||
|
||||
return rv, rows
|
||||
}
|
||||
|
||||
func (f *Firestorm) storeField(docID []byte, docNum uint64, field document.Field, fieldIndex uint16) index.IndexRow {
|
||||
fieldValue := make([]byte, 1+len(field.Value()))
|
||||
fieldValue[0] = encodeFieldType(field)
|
||||
copy(fieldValue[1:], field.Value())
|
||||
storedRow := NewStoredRow(docID, docNum, fieldIndex, field.ArrayPositions(), fieldValue)
|
||||
return storedRow
|
||||
}
|
||||
|
||||
func encodeFieldType(f document.Field) byte {
|
||||
fieldType := byte('x')
|
||||
switch f.(type) {
|
||||
case *document.TextField:
|
||||
fieldType = 't'
|
||||
case *document.NumericField:
|
||||
fieldType = 'n'
|
||||
case *document.DateTimeField:
|
||||
fieldType = 'd'
|
||||
case *document.BooleanField:
|
||||
fieldType = 'b'
|
||||
case *document.CompositeField:
|
||||
fieldType = 'c'
|
||||
}
|
||||
return fieldType
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
"github.com/blevesearch/bleve/index/store/null"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestAnalysis(t *testing.T) {
|
||||
|
||||
aq := index.NewAnalysisQueue(1)
|
||||
f, err := NewFirestorm(gtreap.Name, nil, aq)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = f.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rows := []index.IndexRow{
|
||||
NewFieldRow(0, IDFieldName),
|
||||
}
|
||||
|
||||
kvwriter, err := f.(*Firestorm).store.Writer()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, row := range rows {
|
||||
wb := kvwriter.NewBatch()
|
||||
wb.Set(row.Key(), row.Value())
|
||||
err := kvwriter.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
err = kvwriter.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvreader, err := f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// warmup to load field cache and set maxRead correctly
|
||||
err = f.(*Firestorm).warmup(kvreader)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
d *document.Document
|
||||
r *index.AnalysisResult
|
||||
}{
|
||||
{
|
||||
d: document.NewDocument("a").
|
||||
AddField(
|
||||
document.NewTextFieldWithIndexingOptions("name", nil, []byte("test"), document.IndexField|document.StoreField|document.IncludeTermVectors)),
|
||||
r: &index.AnalysisResult{
|
||||
DocID: "a",
|
||||
Rows: []index.IndexRow{
|
||||
NewTermFreqRow(0, nil, []byte("a"), 1, 0, 0.0, nil),
|
||||
NewFieldRow(1, "name"),
|
||||
NewStoredRow([]byte("a"), 1, 1, nil, []byte("ttest")),
|
||||
NewTermFreqRow(1, []byte("test"), []byte("a"), 1, 1, 1.0, []*TermVector{NewTermVector(1, 1, 0, 4, nil)}),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
test.d.Number = 1
|
||||
actual := f.Analyze(test.d)
|
||||
if !reflect.DeepEqual(actual, test.r) {
|
||||
t.Errorf("expected: %v got %v", test.r, actual)
|
||||
}
|
||||
}
|
||||
|
||||
err = kvreader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnalysisBug328(t *testing.T) {
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewFirestorm(gtreap.Name, nil, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
d := document.NewDocument("1")
|
||||
f := document.NewTextFieldCustom("title", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer)
|
||||
d.AddField(f)
|
||||
f = document.NewTextFieldCustom("body", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer)
|
||||
d.AddField(f)
|
||||
cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, document.IndexField|document.IncludeTermVectors)
|
||||
d.AddField(cf)
|
||||
|
||||
rv := idx.Analyze(d)
|
||||
fieldIndexes := make(map[uint16]string)
|
||||
for _, row := range rv.Rows {
|
||||
if row, ok := row.(*FieldRow); ok {
|
||||
fieldIndexes[row.index] = row.Name()
|
||||
}
|
||||
if row, ok := row.(*TermFreqRow); ok && string(row.term) == "bleve" {
|
||||
for _, vec := range row.Vectors() {
|
||||
if vec.GetField() != uint32(row.field) {
|
||||
if fieldIndexes[row.field] != "_all" {
|
||||
t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.GetField())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAnalyze(b *testing.B) {
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewFirestorm(null.Name, nil, analysisQueue)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
d := document.NewDocument("1")
|
||||
f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer)
|
||||
d.AddField(f)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
rv := idx.Analyze(d)
|
||||
if len(rv.Rows) < 92 || len(rv.Rows) > 93 {
|
||||
b.Fatalf("expected 512-13 rows, got %d", len(rv.Rows))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var bleveWikiArticle1K = []byte(`Boiling liquid expanding vapor explosion
|
||||
From Wikipedia, the free encyclopedia
|
||||
See also: Boiler explosion and Steam explosion
|
||||
|
||||
Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
|
||||
|
||||
This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
|
||||
A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
|
||||
Contents [hide]
|
||||
1 Mechanism
|
||||
1.1 Water example
|
||||
1.2 BLEVEs without chemical reactions
|
||||
2 Fires
|
||||
3 Incidents
|
||||
4 Safety measures
|
||||
5 See also
|
||||
6 References
|
||||
7 External links
|
||||
Mechanism[edit]
|
||||
|
||||
This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
|
||||
There are three characteristics of liquids which are relevant to the discussion of a BLEVE:`)
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store/boltdb"
|
||||
)
|
||||
|
||||
var boltTestConfig = map[string]interface{}{
|
||||
"path": "test",
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 1)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 2)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkBoltDBIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkBoltBIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkBoltBIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkBoltBIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
_ "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
var benchmarkDocBodies = []string{
|
||||
"A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.",
|
||||
"A boiler explosion is a catastrophic failure of a boiler. As seen today, boiler explosions are of two kinds. One kind is a failure of the pressure parts of the steam and water sides. There can be many different causes, such as failure of the safety valve, corrosion of critical parts of the boiler, or low water level. Corrosion along the edges of lap joints was a common cause of early boiler explosions.",
|
||||
"A boiler is a closed vessel in which water or other fluid is heated. The fluid does not necessarily boil. (In North America the term \"furnace\" is normally used if the purpose is not actually to boil the fluid.) The heated or vaporized fluid exits the boiler for use in various processes or heating applications,[1][2] including central heating, boiler-based power generation, cooking, and sanitation.",
|
||||
"A pressure vessel is a closed container designed to hold gases or liquids at a pressure substantially different from the ambient pressure.",
|
||||
"Pressure (symbol: p or P) is the ratio of force to the area over which that force is distributed.",
|
||||
"Liquid is one of the four fundamental states of matter (the others being solid, gas, and plasma), and is the only state with a definite volume but no fixed shape.",
|
||||
"The boiling point of a substance is the temperature at which the vapor pressure of the liquid equals the pressure surrounding the liquid[1][2] and the liquid changes into a vapor.",
|
||||
"Vapor pressure or equilibrium vapor pressure is defined as the pressure exerted by a vapor in thermodynamic equilibrium with its condensed phases (solid or liquid) at a given temperature in a closed system.",
|
||||
"Industrial gases are a group of gases that are specifically manufactured for use in a wide range of industries, which include oil and gas, petrochemicals, chemicals, power, mining, steelmaking, metals, environmental protection, medicine, pharmaceuticals, biotechnology, food, water, fertilizers, nuclear power, electronics and aerospace.",
|
||||
"The expansion ratio of a liquefied and cryogenic substance is the volume of a given amount of that substance in liquid form compared to the volume of the same amount of substance in gaseous form, at room temperature and normal atmospheric pressure.",
|
||||
}
|
||||
|
||||
type KVStoreDestroy func() error
|
||||
|
||||
func DestroyTest() error {
|
||||
return os.RemoveAll("test")
|
||||
}
|
||||
|
||||
func CommonBenchmarkIndex(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers int) {
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed("standard")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
indexDocument := document.NewDocument("").
|
||||
AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[0]), analyzer))
|
||||
|
||||
b.ResetTimer()
|
||||
b.StopTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
analysisQueue := index.NewAnalysisQueue(analysisWorkers)
|
||||
idx, err := NewFirestorm(storeName, storeConfig, analysisQueue)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
indexDocument.ID = strconv.Itoa(i)
|
||||
// just time the indexing portion
|
||||
b.StartTimer()
|
||||
err = idx.Update(indexDocument)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.StopTimer()
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
err = destroy()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
analysisQueue.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func CommonBenchmarkIndexBatch(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers, batchSize int) {
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed("standard")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.StopTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(analysisWorkers)
|
||||
idx, err := NewFirestorm(storeName, storeConfig, analysisQueue)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
b.StartTimer()
|
||||
batch := index.NewBatch()
|
||||
for j := 0; j < 1000; j++ {
|
||||
if j%batchSize == 0 {
|
||||
if len(batch.IndexOps) > 0 {
|
||||
err := idx.Batch(batch)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
batch = index.NewBatch()
|
||||
}
|
||||
indexDocument := document.NewDocument("").
|
||||
AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[j%10]), analyzer))
|
||||
indexDocument.ID = strconv.Itoa(i) + "-" + strconv.Itoa(j)
|
||||
batch.Update(indexDocument)
|
||||
}
|
||||
// close last batch
|
||||
if len(batch.IndexOps) > 0 {
|
||||
err := idx.Batch(batch)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
b.StopTimer()
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
err = destroy()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
analysisQueue.Close()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/blevex/cznicb"
|
||||
)
|
||||
|
||||
func DestroyCznicB() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, cznicb.Name, nil, DestroyCznicB, 1)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, cznicb.Name, nil, DestroyCznicB, 2)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, cznicb.Name, nil, DestroyCznicB, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkCznicBIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkCznicBIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, cznicb.Name, nil, DestroyCznicB, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build forestdb
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/index/store/forestdb"
|
||||
)
|
||||
|
||||
func CreateForestDB() (store.KVStore, error) {
|
||||
err := os.MkdirAll("testdir", 0700)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s, err := forestdb.New("testdir/test", true, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func DestroyForestDB() error {
|
||||
return os.RemoveAll("testdir")
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 1)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 2)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkForestDBIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkForestDBIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store/goleveldb"
|
||||
)
|
||||
|
||||
var goLevelDBTestOptions = map[string]interface{}{
|
||||
"create_if_missing": true,
|
||||
"path": "test",
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkGoLevelDBIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkGoLevelDBIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build rocksdb
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
var rocksdbTestOptions = map[string]interface{}{
|
||||
"create_if_missing": true,
|
||||
}
|
||||
|
||||
func CreateGoRocksDB() (store.KVStore, error) {
|
||||
return rocksdb.New("test", rocksdbTestOptions)
|
||||
}
|
||||
|
||||
func DestroyGoRocksDB() error {
|
||||
return os.RemoveAll("test")
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 1)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 2)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkRocksDBIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkRocksDBIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
)
|
||||
|
||||
func DestroyGTreap() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyGTreap, 1)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyGTreap, 2)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyGTreap, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyGTreap, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build leveldb full
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/index/store/leveldb"
|
||||
)
|
||||
|
||||
var leveldbTestOptions = map[string]interface{}{
|
||||
"create_if_missing": true,
|
||||
}
|
||||
|
||||
func CreateLevelDB() (store.KVStore, error) {
|
||||
return leveldb.New("test", leveldbTestOptions)
|
||||
}
|
||||
|
||||
func DestroyLevelDB() error {
|
||||
return os.RemoveAll("test")
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 1)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 2)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkLevelDBIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkLevelDBIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store/null"
|
||||
)
|
||||
|
||||
func DestroyNull() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, null.Name, nil, DestroyNull, 1)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, null.Name, nil, DestroyNull, 2)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, null.Name, nil, DestroyNull, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkNullIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyNull, 4, 1000)
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/steveyen/gtreap"
|
||||
"github.com/willf/bitset"
|
||||
)
|
||||
|
||||
type Compensator struct {
|
||||
inFlightMutex sync.RWMutex
|
||||
maxRead uint64
|
||||
inFlight *gtreap.Treap
|
||||
deletedMutex sync.RWMutex
|
||||
deletedDocNumbers *bitset.BitSet
|
||||
}
|
||||
|
||||
func NewCompensator() *Compensator {
|
||||
rv := Compensator{
|
||||
inFlight: gtreap.NewTreap(inFlightItemCompare),
|
||||
deletedDocNumbers: bitset.New(1000000),
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
type Snapshot struct {
|
||||
maxRead uint64
|
||||
inFlight *gtreap.Treap
|
||||
deletedDocNumbers *bitset.BitSet
|
||||
}
|
||||
|
||||
// returns which doc number is valid
|
||||
// if none, then 0
|
||||
func (s *Snapshot) Which(docID []byte, docNumList DocNumberList) uint64 {
|
||||
inFlightVal := s.inFlight.Get(&InFlightItem{docID: docID})
|
||||
|
||||
sort.Sort(docNumList) // Descending ordering.
|
||||
|
||||
for _, docNum := range docNumList {
|
||||
if docNum > 0 && docNum <= s.maxRead &&
|
||||
(inFlightVal == nil || inFlightVal.(*InFlightItem).docNum == docNum) &&
|
||||
!s.deletedDocNumbers.Test(uint(docNum)) {
|
||||
return docNum
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *Snapshot) Valid(docID []byte, docNum uint64) bool {
|
||||
logger.Printf("checking validity of: '%s' - % x - %d", docID, docID, docNum)
|
||||
if docNum > s.maxRead {
|
||||
return false
|
||||
}
|
||||
logger.Printf("<= maxRead")
|
||||
inFlightVal := s.inFlight.Get(&InFlightItem{docID: docID})
|
||||
if inFlightVal != nil && inFlightVal.(*InFlightItem).docNum != docNum {
|
||||
return false
|
||||
}
|
||||
logger.Printf("not in flight")
|
||||
if s.deletedDocNumbers.Test(uint(docNum)) {
|
||||
return false
|
||||
}
|
||||
logger.Printf("not deleted")
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *Compensator) Mutate(docID []byte, docNum uint64) {
|
||||
c.inFlightMutex.Lock()
|
||||
defer c.inFlightMutex.Unlock()
|
||||
c.inFlight = c.inFlight.Upsert(&InFlightItem{docID: docID, docNum: docNum}, rand.Int())
|
||||
if docNum != 0 {
|
||||
c.maxRead = docNum
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Compensator) MutateBatch(inflightItems []*InFlightItem, lastDocNum uint64) {
|
||||
c.inFlightMutex.Lock()
|
||||
defer c.inFlightMutex.Unlock()
|
||||
for _, item := range inflightItems {
|
||||
c.inFlight = c.inFlight.Upsert(item, rand.Int())
|
||||
}
|
||||
c.maxRead = lastDocNum
|
||||
}
|
||||
|
||||
func (c *Compensator) Migrate(docID []byte, docNum uint64, oldDocNums []uint64) {
|
||||
c.inFlightMutex.Lock()
|
||||
defer c.inFlightMutex.Unlock()
|
||||
c.deletedMutex.Lock()
|
||||
defer c.deletedMutex.Unlock()
|
||||
|
||||
// clone deleted doc numbers and mutate
|
||||
if len(oldDocNums) > 0 {
|
||||
newDeletedDocNumbers := c.deletedDocNumbers.Clone()
|
||||
for _, oldDocNum := range oldDocNums {
|
||||
newDeletedDocNumbers.Set(uint(oldDocNum))
|
||||
}
|
||||
// update pointer
|
||||
c.deletedDocNumbers = newDeletedDocNumbers
|
||||
}
|
||||
|
||||
// remove entry from in-flight if it still has same doc num
|
||||
val := c.inFlight.Get(&InFlightItem{docID: docID})
|
||||
if val != nil && val.(*InFlightItem).docNum == docNum {
|
||||
c.inFlight = c.inFlight.Delete(&InFlightItem{docID: docID})
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Compensator) GarbageCollect(docNums []uint64) {
|
||||
c.deletedMutex.Lock()
|
||||
defer c.deletedMutex.Unlock()
|
||||
|
||||
for _, docNum := range docNums {
|
||||
c.deletedDocNumbers.Clear(uint(docNum))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Compensator) Snapshot() *Snapshot {
|
||||
c.inFlightMutex.RLock()
|
||||
defer c.inFlightMutex.RUnlock()
|
||||
c.deletedMutex.RLock()
|
||||
defer c.deletedMutex.RUnlock()
|
||||
|
||||
rv := Snapshot{
|
||||
maxRead: c.maxRead,
|
||||
inFlight: c.inFlight,
|
||||
deletedDocNumbers: c.deletedDocNumbers,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (c *Compensator) GarbageCount() uint64 {
|
||||
return uint64(c.deletedDocNumbers.Count())
|
||||
}
|
||||
|
||||
//**************
|
||||
|
||||
type InFlightItem struct {
|
||||
docID []byte
|
||||
docNum uint64
|
||||
}
|
||||
|
||||
func inFlightItemCompare(a, b interface{}) int {
|
||||
return bytes.Compare(a.(*InFlightItem).docID, b.(*InFlightItem).docID)
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
const DefaultDictUpdateThreshold = 10
|
||||
|
||||
var DefaultDictUpdateSleep = 1 * time.Second
|
||||
|
||||
type DictUpdater struct {
|
||||
f *Firestorm
|
||||
dictUpdateSleep time.Duration
|
||||
quit chan struct{}
|
||||
incoming chan map[string]int64
|
||||
|
||||
mutex sync.RWMutex
|
||||
workingSet map[string]int64
|
||||
closeWait sync.WaitGroup
|
||||
|
||||
batchesStarted uint64
|
||||
batchesFlushed uint64
|
||||
}
|
||||
|
||||
func NewDictUpdater(f *Firestorm) *DictUpdater {
|
||||
rv := DictUpdater{
|
||||
f: f,
|
||||
dictUpdateSleep: DefaultDictUpdateSleep,
|
||||
workingSet: make(map[string]int64),
|
||||
batchesStarted: 1,
|
||||
quit: make(chan struct{}),
|
||||
incoming: make(chan map[string]int64, 8),
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (d *DictUpdater) Notify(term string, usage int64) {
|
||||
d.mutex.Lock()
|
||||
defer d.mutex.Unlock()
|
||||
d.workingSet[term] += usage
|
||||
}
|
||||
|
||||
func (d *DictUpdater) NotifyBatch(termUsages map[string]int64) {
|
||||
d.incoming <- termUsages
|
||||
}
|
||||
|
||||
func (d *DictUpdater) Start() {
|
||||
d.closeWait.Add(1)
|
||||
go d.runIncoming()
|
||||
go d.run()
|
||||
}
|
||||
|
||||
func (d *DictUpdater) Stop() {
|
||||
close(d.quit)
|
||||
d.closeWait.Wait()
|
||||
}
|
||||
|
||||
func (d *DictUpdater) runIncoming() {
|
||||
for {
|
||||
select {
|
||||
case <-d.quit:
|
||||
return
|
||||
case termUsages, ok := <-d.incoming:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
d.mutex.Lock()
|
||||
for term, usage := range termUsages {
|
||||
d.workingSet[term] += usage
|
||||
}
|
||||
d.mutex.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DictUpdater) run() {
|
||||
tick := time.Tick(d.dictUpdateSleep)
|
||||
for {
|
||||
select {
|
||||
case <-d.quit:
|
||||
logger.Printf("dictionary updater asked to quit")
|
||||
d.closeWait.Done()
|
||||
return
|
||||
case <-tick:
|
||||
logger.Printf("dictionary updater ticked")
|
||||
d.update()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DictUpdater) update() {
|
||||
d.mutex.Lock()
|
||||
oldWorkingSet := d.workingSet
|
||||
d.workingSet = make(map[string]int64)
|
||||
atomic.AddUint64(&d.batchesStarted, 1)
|
||||
d.mutex.Unlock()
|
||||
|
||||
// open a writer
|
||||
writer, err := d.f.store.Writer()
|
||||
if err != nil {
|
||||
_ = writer.Close()
|
||||
logger.Printf("dict updater fatal: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// prepare batch
|
||||
wb := writer.NewBatch()
|
||||
|
||||
dictionaryTermDelta := make([]byte, 8)
|
||||
for term, delta := range oldWorkingSet {
|
||||
binary.LittleEndian.PutUint64(dictionaryTermDelta, uint64(delta))
|
||||
wb.Merge([]byte(term), dictionaryTermDelta)
|
||||
}
|
||||
|
||||
err = writer.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
_ = writer.Close()
|
||||
logger.Printf("dict updater fatal: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
atomic.AddUint64(&d.batchesFlushed, 1)
|
||||
|
||||
err = writer.Close()
|
||||
}
|
||||
|
||||
// this is not intended to be used publicly, only for unit tests
|
||||
// which depend on consistency we no longer provide
|
||||
func (d *DictUpdater) waitTasksDone(dur time.Duration) error {
|
||||
initial := atomic.LoadUint64(&d.batchesStarted)
|
||||
timeout := time.After(dur)
|
||||
tick := time.Tick(100 * time.Millisecond)
|
||||
for {
|
||||
select {
|
||||
// Got a timeout! fail with a timeout error
|
||||
case <-timeout:
|
||||
flushed := atomic.LoadUint64(&d.batchesFlushed)
|
||||
return fmt.Errorf("timeout, %d/%d", initial, flushed)
|
||||
// Got a tick, we should check on doSomething()
|
||||
case <-tick:
|
||||
flushed := atomic.LoadUint64(&d.batchesFlushed)
|
||||
if flushed > initial {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
)
|
||||
|
||||
func TestDictUpdater(t *testing.T) {
|
||||
aq := index.NewAnalysisQueue(1)
|
||||
f, err := NewFirestorm(gtreap.Name, nil, aq)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = f.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dictBatch := map[string]int64{
|
||||
string([]byte{'d', 1, 0, 'c', 'a', 't'}): 3,
|
||||
}
|
||||
dictExpect := map[string]int64{
|
||||
string([]byte{'d', 1, 0, 'c', 'a', 't'}): 3,
|
||||
}
|
||||
|
||||
f.(*Firestorm).dictUpdater.NotifyBatch(dictBatch)
|
||||
|
||||
// invoke updater manually
|
||||
for len(f.(*Firestorm).dictUpdater.incoming) > 0 {
|
||||
runtime.Gosched()
|
||||
}
|
||||
err = f.(*Firestorm).dictUpdater.waitTasksDone(5 * time.Second)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// assert that dictionary rows are correct
|
||||
reader, err := f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for key, _ := range dictBatch {
|
||||
v, err := reader.Get([]byte(key))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if v == nil {
|
||||
t.Fatal("unexpected dictionary value missing")
|
||||
}
|
||||
dr, err := NewDictionaryRowKV([]byte(key), v)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expect := dictExpect[key]
|
||||
if int64(dr.Count()) != expect {
|
||||
t.Errorf("expected %d, got %d", expect, dr.Count())
|
||||
}
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// update it again
|
||||
dictBatch = map[string]int64{
|
||||
string([]byte{'d', 1, 0, 'c', 'a', 't'}): 1,
|
||||
}
|
||||
dictExpect = map[string]int64{
|
||||
string([]byte{'d', 1, 0, 'c', 'a', 't'}): 4,
|
||||
}
|
||||
|
||||
f.(*Firestorm).dictUpdater.NotifyBatch(dictBatch)
|
||||
|
||||
// invoke updater manually
|
||||
for len(f.(*Firestorm).dictUpdater.incoming) > 0 {
|
||||
runtime.Gosched()
|
||||
}
|
||||
f.(*Firestorm).dictUpdater.update()
|
||||
|
||||
// assert that dictionary rows are correct
|
||||
reader, err = f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for key, _ := range dictBatch {
|
||||
v, err := reader.Get([]byte(key))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
dr, err := NewDictionaryRowKV([]byte(key), v)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expect := dictExpect[key]
|
||||
if int64(dr.Count()) != expect {
|
||||
t.Errorf("expected %d, got %d", expect, dr.Count())
|
||||
}
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// update it again (decrement this time)
|
||||
dictBatch = map[string]int64{
|
||||
string([]byte{'d', 1, 0, 'c', 'a', 't'}): -2,
|
||||
}
|
||||
dictExpect = map[string]int64{
|
||||
string([]byte{'d', 1, 0, 'c', 'a', 't'}): 2,
|
||||
}
|
||||
|
||||
f.(*Firestorm).dictUpdater.NotifyBatch(dictBatch)
|
||||
|
||||
// invoke updater manually
|
||||
for len(f.(*Firestorm).dictUpdater.incoming) > 0 {
|
||||
runtime.Gosched()
|
||||
}
|
||||
f.(*Firestorm).dictUpdater.update()
|
||||
|
||||
// assert that dictionary rows are correct
|
||||
reader, err = f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for key, _ := range dictBatch {
|
||||
v, err := reader.Get([]byte(key))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
dr, err := NewDictionaryRowKV([]byte(key), v)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expect := dictExpect[key]
|
||||
if int64(dr.Count()) != expect {
|
||||
t.Errorf("expected %d, got %d", expect, dr.Count())
|
||||
}
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
|
||||
"github.com/golang/protobuf/proto"
|
||||
)
|
||||
|
||||
const ByteSeparator byte = 0xff
|
||||
|
||||
var DictionaryKeyPrefix = []byte{'d'}
|
||||
|
||||
type DictionaryRow struct {
|
||||
field uint16
|
||||
term []byte
|
||||
value DictionaryValue
|
||||
}
|
||||
|
||||
func NewDictionaryRow(field uint16, term []byte, count uint64) *DictionaryRow {
|
||||
rv := DictionaryRow{
|
||||
field: field,
|
||||
term: term,
|
||||
}
|
||||
rv.value.Count = proto.Uint64(count)
|
||||
return &rv
|
||||
}
|
||||
|
||||
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
|
||||
rv := DictionaryRow{}
|
||||
buf := bytes.NewBuffer(key)
|
||||
_, err := buf.ReadByte() // type
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = binary.Read(buf, binary.LittleEndian, &rv.field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv.term, err = buf.ReadBytes(ByteSeparator)
|
||||
// there is no separator expected here, should get EOF
|
||||
if err != io.EOF {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
|
||||
err := dr.value.Unmarshal(value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
|
||||
rv, err := NewDictionaryRowK(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = rv.parseDictionaryV(value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) Count() uint64 {
|
||||
return dr.value.GetCount()
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) SetCount(count uint64) {
|
||||
dr.value.Count = proto.Uint64(count)
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) KeySize() int {
|
||||
return 3 + len(dr.term)
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
|
||||
copy(buf[0:], DictionaryKeyPrefix)
|
||||
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
|
||||
copy(buf[3:], dr.term)
|
||||
return 3 + len(dr.term), nil
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) Key() []byte {
|
||||
buf := make([]byte, dr.KeySize())
|
||||
n, _ := dr.KeyTo(buf)
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) ValueSize() int {
|
||||
return dr.value.Size()
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) {
|
||||
return dr.value.MarshalTo(buf)
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) Value() []byte {
|
||||
buf := make([]byte, dr.ValueSize())
|
||||
n, _ := dr.ValueTo(buf)
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
func DictionaryRowKey(field uint16, term []byte) []byte {
|
||||
buf := make([]byte, 3+len(term))
|
||||
copy(buf[0:], DictionaryKeyPrefix)
|
||||
binary.LittleEndian.PutUint16(buf[1:3], field)
|
||||
copy(buf[3:], term)
|
||||
return buf
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
func TestDictionaryRows(t *testing.T) {
|
||||
tests := []struct {
|
||||
input index.IndexRow
|
||||
outKey []byte
|
||||
outVal []byte
|
||||
}{
|
||||
{
|
||||
NewDictionaryRow(0, []byte("test"), 3),
|
||||
[]byte{DictionaryKeyPrefix[0], 0, 0, 't', 'e', 's', 't'},
|
||||
[]byte{8, 3},
|
||||
},
|
||||
{
|
||||
NewDictionaryRow(3, []byte("dictionary"), 734),
|
||||
[]byte{DictionaryKeyPrefix[0], 3, 0, 'd', 'i', 'c', 't', 'i', 'o', 'n', 'a', 'r', 'y'},
|
||||
[]byte{8, 222, 5},
|
||||
},
|
||||
}
|
||||
|
||||
// test going from struct to k/v bytes
|
||||
for i, test := range tests {
|
||||
rk := test.input.Key()
|
||||
if !reflect.DeepEqual(rk, test.outKey) {
|
||||
t.Errorf("Expected key to be %v got: %v", test.outKey, rk)
|
||||
}
|
||||
rv := test.input.Value()
|
||||
if !reflect.DeepEqual(rv, test.outVal) {
|
||||
t.Errorf("Expected value to be %v got: %v for %d", test.outVal, rv, i)
|
||||
}
|
||||
}
|
||||
|
||||
// now test going back from k/v bytes to struct
|
||||
for i, test := range tests {
|
||||
row, err := NewDictionaryRowKV(test.outKey, test.outVal)
|
||||
if err != nil {
|
||||
t.Errorf("error parsking key/value: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(row, test.input) {
|
||||
t.Errorf("Expected: %#v got: %#v for %d", test.input, row, i)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
// the functions in this file are only intended to be used by
|
||||
// the bleve_dump utility and the debug http handlers
|
||||
// if your application relies on them, you're doing something wrong
|
||||
// they may change or be removed at any time
|
||||
|
||||
func (f *Firestorm) dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) error {
|
||||
return visitPrefix(kvreader, prefix, func(key, val []byte) (bool, error) {
|
||||
row, err := parseFromKeyValue(key, val)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return false, err
|
||||
}
|
||||
rv <- row
|
||||
return true, nil
|
||||
})
|
||||
}
|
||||
|
||||
func (f *Firestorm) dumpDoc(kvreader store.KVReader, rv chan interface{}, docID []byte) error {
|
||||
// without a back index we have no choice but to walk the term freq and stored rows
|
||||
|
||||
// walk the term freqs
|
||||
err := visitPrefix(kvreader, TermFreqKeyPrefix, func(key, val []byte) (bool, error) {
|
||||
tfr, err := NewTermFreqRowKV(key, val)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return false, err
|
||||
}
|
||||
if bytes.Compare(tfr.DocID(), docID) == 0 {
|
||||
rv <- tfr
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// now walk the stored
|
||||
err = visitPrefix(kvreader, StoredKeyPrefix, func(key, val []byte) (bool, error) {
|
||||
sr, err := NewStoredRowKV(key, val)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return false, err
|
||||
}
|
||||
if bytes.Compare(sr.DocID(), docID) == 0 {
|
||||
rv <- sr
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func parseFromKeyValue(key, value []byte) (index.IndexRow, error) {
|
||||
if len(key) > 0 {
|
||||
switch key[0] {
|
||||
case VersionKey[0]:
|
||||
return NewVersionRowV(value)
|
||||
case FieldKeyPrefix[0]:
|
||||
return NewFieldRowKV(key, value)
|
||||
case DictionaryKeyPrefix[0]:
|
||||
return NewDictionaryRowKV(key, value)
|
||||
case TermFreqKeyPrefix[0]:
|
||||
return NewTermFreqRowKV(key, value)
|
||||
case StoredKeyPrefix[0]:
|
||||
return NewStoredRowKV(key, value)
|
||||
case InternalKeyPrefix[0]:
|
||||
return NewInternalRowKV(key, value)
|
||||
}
|
||||
return nil, fmt.Errorf("Unknown field type '%s'", string(key[0]))
|
||||
}
|
||||
return nil, fmt.Errorf("Invalid empty key")
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
)
|
||||
|
||||
var dictWaitDuration = 5 * time.Second
|
||||
|
||||
func TestDump(t *testing.T) {
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewFirestorm(gtreap.Name, nil, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var expectedCount uint64
|
||||
docCount, err := idx.DocCount()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if docCount != expectedCount {
|
||||
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
|
||||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.IndexField|document.StoreField))
|
||||
dateField, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.IndexField|document.StoreField)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
doc.AddField(dateField)
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test2"), document.IndexField|document.StoreField))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.IndexField|document.StoreField))
|
||||
dateField, err = document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.IndexField|document.StoreField)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
doc.AddField(dateField)
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
fieldsCount := 0
|
||||
fieldsRows := idx.DumpFields()
|
||||
for _ = range fieldsRows {
|
||||
fieldsCount++
|
||||
}
|
||||
if fieldsCount != 4 { // _id field is automatic
|
||||
t.Errorf("expected 4 fields, got %d", fieldsCount)
|
||||
}
|
||||
|
||||
// 1 id term
|
||||
// 1 text term
|
||||
// 16 numeric terms
|
||||
// 16 date terms
|
||||
// 3 stored fields
|
||||
expectedDocRowCount := int(1 + 1 + (2 * (64 / document.DefaultPrecisionStep)) + 3)
|
||||
docRowCount := 0
|
||||
docRows := idx.DumpDoc("1")
|
||||
for _ = range docRows {
|
||||
docRowCount++
|
||||
}
|
||||
if docRowCount != expectedDocRowCount {
|
||||
t.Errorf("expected %d rows for document, got %d", expectedDocRowCount, docRowCount)
|
||||
}
|
||||
|
||||
docRowCount = 0
|
||||
docRows = idx.DumpDoc("2")
|
||||
for _ = range docRows {
|
||||
docRowCount++
|
||||
}
|
||||
if docRowCount != expectedDocRowCount {
|
||||
t.Errorf("expected %d rows for document, got %d", expectedDocRowCount, docRowCount)
|
||||
}
|
||||
|
||||
err = idx.(*Firestorm).dictUpdater.waitTasksDone(dictWaitDuration)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// 1 version
|
||||
// fieldsCount field rows
|
||||
// 2 docs * expectedDocRowCount
|
||||
// 2 text term row count (2 different text terms)
|
||||
// 16 numeric term row counts (shared for both docs, same numeric value)
|
||||
// 16 date term row counts (shared for both docs, same date value)
|
||||
//
|
||||
expectedAllRowCount := int(1 + fieldsCount + (2 * expectedDocRowCount) + 2 + int((2 * (64 / document.DefaultPrecisionStep))))
|
||||
allRowCount := 0
|
||||
allRows := idx.DumpAll()
|
||||
for _ = range allRows {
|
||||
allRowCount++
|
||||
}
|
||||
if allRowCount != expectedAllRowCount {
|
||||
t.Errorf("expected %d rows for all, got %d", expectedAllRowCount, allRowCount)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/golang/protobuf/proto"
|
||||
)
|
||||
|
||||
var FieldKeyPrefix = []byte{'f'}
|
||||
|
||||
func (f *Firestorm) fieldIndexOrNewRow(name string) (uint16, *FieldRow) {
|
||||
index, existed := f.fieldCache.FieldNamed(name, true)
|
||||
if !existed {
|
||||
return index, NewFieldRow(uint16(index), name)
|
||||
}
|
||||
return index, nil
|
||||
}
|
||||
|
||||
func (f *Firestorm) loadFields(reader store.KVReader) (err error) {
|
||||
|
||||
err = visitPrefix(reader, FieldKeyPrefix, func(key, val []byte) (bool, error) {
|
||||
fieldRow, err := NewFieldRowKV(key, val)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
f.fieldCache.AddExisting(fieldRow.Name(), fieldRow.Index())
|
||||
return true, nil
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type FieldRow struct {
|
||||
index uint16
|
||||
value FieldValue
|
||||
}
|
||||
|
||||
func NewFieldRow(i uint16, name string) *FieldRow {
|
||||
rv := FieldRow{
|
||||
index: i,
|
||||
}
|
||||
rv.value.Name = proto.String(name)
|
||||
return &rv
|
||||
}
|
||||
|
||||
func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
|
||||
rv := FieldRow{}
|
||||
|
||||
buf := bytes.NewBuffer(key)
|
||||
_, err := buf.ReadByte() // type
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = binary.Read(buf, binary.LittleEndian, &rv.index)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = rv.value.Unmarshal(value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (fr *FieldRow) KeySize() int {
|
||||
return 3
|
||||
}
|
||||
|
||||
func (fr *FieldRow) KeyTo(buf []byte) (int, error) {
|
||||
buf[0] = 'f'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], fr.index)
|
||||
return 3, nil
|
||||
}
|
||||
|
||||
func (fr *FieldRow) Key() []byte {
|
||||
buf := make([]byte, fr.KeySize())
|
||||
n, _ := fr.KeyTo(buf)
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
func (fr *FieldRow) ValueSize() int {
|
||||
return fr.value.Size()
|
||||
}
|
||||
|
||||
func (fr *FieldRow) ValueTo(buf []byte) (int, error) {
|
||||
return fr.value.MarshalTo(buf)
|
||||
}
|
||||
|
||||
func (fr *FieldRow) Value() []byte {
|
||||
buf := make([]byte, fr.ValueSize())
|
||||
n, _ := fr.ValueTo(buf)
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
func (fr *FieldRow) Index() uint16 {
|
||||
return fr.index
|
||||
}
|
||||
|
||||
func (fr *FieldRow) Name() string {
|
||||
return fr.value.GetName()
|
||||
}
|
||||
|
||||
func (fr *FieldRow) String() string {
|
||||
return fmt.Sprintf("FieldRow - Field: %d - Name: %s\n", fr.index, fr.Name())
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
func TestFieldRows(t *testing.T) {
|
||||
tests := []struct {
|
||||
input index.IndexRow
|
||||
outKey []byte
|
||||
outVal []byte
|
||||
}{
|
||||
{
|
||||
NewFieldRow(0, "_id"),
|
||||
[]byte{FieldKeyPrefix[0], 0, 0},
|
||||
[]byte{10, 3, '_', 'i', 'd'},
|
||||
},
|
||||
{
|
||||
NewFieldRow(1, "name"),
|
||||
[]byte{FieldKeyPrefix[0], 1, 0},
|
||||
[]byte{10, 4, 'n', 'a', 'm', 'e'},
|
||||
},
|
||||
}
|
||||
|
||||
// test going from struct to k/v bytes
|
||||
for i, test := range tests {
|
||||
rk := test.input.Key()
|
||||
if !reflect.DeepEqual(rk, test.outKey) {
|
||||
t.Errorf("Expected key to be %v got: %v", test.outKey, rk)
|
||||
}
|
||||
rv := test.input.Value()
|
||||
if !reflect.DeepEqual(rv, test.outVal) {
|
||||
t.Errorf("Expected value to be %v got: %v for %d", test.outVal, rv, i)
|
||||
}
|
||||
}
|
||||
|
||||
// now test going back from k/v bytes to struct
|
||||
for i, test := range tests {
|
||||
row, err := NewFieldRowKV(test.outKey, test.outVal)
|
||||
if err != nil {
|
||||
t.Errorf("error parsking key/value: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(row, test.input) {
|
||||
t.Errorf("Expected: %#v got: %#v for %d", test.input, row, i)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,551 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "firestorm"
|
||||
|
||||
var UnsafeBatchUseDetected = fmt.Errorf("bleve.Batch is NOT thread-safe, modification after execution detected")
|
||||
|
||||
type Firestorm struct {
|
||||
storeName string
|
||||
storeConfig map[string]interface{}
|
||||
store store.KVStore
|
||||
compensator *Compensator
|
||||
analysisQueue *index.AnalysisQueue
|
||||
fieldCache *index.FieldCache
|
||||
highDocNumber uint64
|
||||
docCount *uint64
|
||||
garbageCollector *GarbageCollector
|
||||
lookuper *Lookuper
|
||||
dictUpdater *DictUpdater
|
||||
stats *indexStat
|
||||
}
|
||||
|
||||
func NewFirestorm(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
|
||||
initialCount := uint64(0)
|
||||
rv := Firestorm{
|
||||
storeName: storeName,
|
||||
storeConfig: storeConfig,
|
||||
compensator: NewCompensator(),
|
||||
analysisQueue: analysisQueue,
|
||||
fieldCache: index.NewFieldCache(),
|
||||
docCount: &initialCount,
|
||||
highDocNumber: 0,
|
||||
stats: &indexStat{},
|
||||
}
|
||||
rv.stats.f = &rv
|
||||
rv.garbageCollector = NewGarbageCollector(&rv)
|
||||
rv.lookuper = NewLookuper(&rv)
|
||||
rv.dictUpdater = NewDictUpdater(&rv)
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (f *Firestorm) Open() (err error) {
|
||||
|
||||
// open the kv store
|
||||
storeConstructor := registry.KVStoreConstructorByName(f.storeName)
|
||||
if storeConstructor == nil {
|
||||
err = index.ErrorUnknownStorageType
|
||||
return
|
||||
}
|
||||
|
||||
// now open the store
|
||||
f.store, err = storeConstructor(&mergeOperator, f.storeConfig)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// start a reader
|
||||
var kvreader store.KVReader
|
||||
kvreader, err = f.store.Reader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// assert correct version, and find out if this is new index
|
||||
var newIndex bool
|
||||
newIndex, err = f.checkVersion(kvreader)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !newIndex {
|
||||
// process existing index before opening
|
||||
err = f.warmup(kvreader)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = kvreader.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if newIndex {
|
||||
// prepare a new index
|
||||
err = f.bootstrap()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// start the garbage collector
|
||||
f.garbageCollector.Start()
|
||||
|
||||
// start the lookuper
|
||||
f.lookuper.Start()
|
||||
|
||||
// start the dict updater
|
||||
f.dictUpdater.Start()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (f *Firestorm) Close() error {
|
||||
f.garbageCollector.Stop()
|
||||
f.lookuper.Stop()
|
||||
f.dictUpdater.Stop()
|
||||
return f.store.Close()
|
||||
}
|
||||
|
||||
func (f *Firestorm) DocCount() (uint64, error) {
|
||||
count := atomic.LoadUint64(f.docCount)
|
||||
return count, nil
|
||||
|
||||
}
|
||||
|
||||
func (f *Firestorm) Update(doc *document.Document) (err error) {
|
||||
|
||||
// assign this document a number
|
||||
doc.Number = atomic.AddUint64(&f.highDocNumber, 1)
|
||||
|
||||
// do analysis before acquiring write lock
|
||||
analysisStart := time.Now()
|
||||
resultChan := make(chan *index.AnalysisResult)
|
||||
aw := index.NewAnalysisWork(f, doc, resultChan)
|
||||
|
||||
// put the work on the queue
|
||||
f.analysisQueue.Queue(aw)
|
||||
|
||||
// wait for the result
|
||||
result := <-resultChan
|
||||
close(resultChan)
|
||||
atomic.AddUint64(&f.stats.analysisTime, uint64(time.Since(analysisStart)))
|
||||
|
||||
// start a writer for this update
|
||||
indexStart := time.Now()
|
||||
var kvwriter store.KVWriter
|
||||
kvwriter, err = f.store.Writer()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if cerr := kvwriter.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
var dictionaryDeltas map[string]int64
|
||||
dictionaryDeltas, err = f.batchRows(kvwriter, [][]index.IndexRow{result.Rows}, nil)
|
||||
if err != nil {
|
||||
_ = kvwriter.Close()
|
||||
atomic.AddUint64(&f.stats.errors, 1)
|
||||
return
|
||||
}
|
||||
|
||||
f.compensator.Mutate([]byte(doc.ID), doc.Number)
|
||||
f.lookuper.NotifyBatch([]*InFlightItem{&InFlightItem{[]byte(doc.ID), doc.Number}})
|
||||
f.dictUpdater.NotifyBatch(dictionaryDeltas)
|
||||
|
||||
atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
return
|
||||
}
|
||||
|
||||
func (f *Firestorm) Delete(id string) error {
|
||||
indexStart := time.Now()
|
||||
f.compensator.Mutate([]byte(id), 0)
|
||||
f.lookuper.NotifyBatch([]*InFlightItem{&InFlightItem{[]byte(id), 0}})
|
||||
atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Firestorm) batchRows(writer store.KVWriter, rowsOfRows [][]index.IndexRow, deleteKeys [][]byte) (map[string]int64, error) {
|
||||
|
||||
dictionaryDeltas := make(map[string]int64)
|
||||
|
||||
// count up bytes needed for buffering.
|
||||
addNum := 0
|
||||
addKeyBytes := 0
|
||||
addValBytes := 0
|
||||
|
||||
deleteNum := 0
|
||||
deleteKeyBytes := 0
|
||||
|
||||
var kbuf []byte
|
||||
|
||||
prepareBuf := func(buf []byte, sizeNeeded int) []byte {
|
||||
if cap(buf) < sizeNeeded {
|
||||
return make([]byte, sizeNeeded, sizeNeeded+128)
|
||||
}
|
||||
return buf[0:sizeNeeded]
|
||||
}
|
||||
|
||||
for _, rows := range rowsOfRows {
|
||||
for _, row := range rows {
|
||||
tfr, ok := row.(*TermFreqRow)
|
||||
if ok {
|
||||
if tfr.Field() != 0 {
|
||||
kbuf = prepareBuf(kbuf, tfr.DictionaryRowKeySize())
|
||||
klen, err := tfr.DictionaryRowKeyTo(kbuf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dictionaryDeltas[string(kbuf[0:klen])] += 1
|
||||
}
|
||||
}
|
||||
|
||||
addKeyBytes += row.KeySize()
|
||||
addValBytes += row.ValueSize()
|
||||
}
|
||||
addNum += len(rows)
|
||||
}
|
||||
|
||||
for _, dk := range deleteKeys {
|
||||
deleteKeyBytes += len(dk)
|
||||
}
|
||||
deleteNum += len(deleteKeys)
|
||||
|
||||
// prepare batch
|
||||
totBytes := addKeyBytes + addValBytes + deleteKeyBytes
|
||||
|
||||
buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
|
||||
TotalBytes: totBytes,
|
||||
NumSets: addNum,
|
||||
NumDeletes: deleteNum,
|
||||
NumMerges: 0,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
_ = wb.Close()
|
||||
}()
|
||||
|
||||
for _, rows := range rowsOfRows {
|
||||
for _, row := range rows {
|
||||
klen, err := row.KeyTo(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
vlen, err := row.ValueTo(buf[klen:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
wb.Set(buf[0:klen], buf[klen:klen+vlen])
|
||||
|
||||
buf = buf[klen+vlen:]
|
||||
}
|
||||
}
|
||||
|
||||
for _, dk := range deleteKeys {
|
||||
dklen := copy(buf, dk)
|
||||
wb.Delete(buf[0:dklen])
|
||||
buf = buf[dklen:]
|
||||
}
|
||||
|
||||
// write out the batch
|
||||
err = writer.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return dictionaryDeltas, nil
|
||||
}
|
||||
|
||||
func (f *Firestorm) Batch(batch *index.Batch) (err error) {
|
||||
|
||||
// acquire enough doc numbers for all updates in the batch
|
||||
// FIXME we actually waste doc numbers because deletes are in the
|
||||
// same map and we don't need numbers for them
|
||||
lastDocNumber := atomic.AddUint64(&f.highDocNumber, uint64(len(batch.IndexOps)))
|
||||
firstDocNumber := lastDocNumber - uint64(len(batch.IndexOps)) + 1
|
||||
|
||||
analysisStart := time.Now()
|
||||
resultChan := make(chan *index.AnalysisResult)
|
||||
|
||||
var docsUpdated uint64
|
||||
var docsDeleted uint64
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
doc.Number = firstDocNumber // actually assign doc numbers here
|
||||
firstDocNumber++
|
||||
docsUpdated++
|
||||
} else {
|
||||
docsDeleted++
|
||||
}
|
||||
}
|
||||
|
||||
var detectedUnsafeMutex sync.RWMutex
|
||||
detectedUnsafe := false
|
||||
|
||||
go func() {
|
||||
sofar := uint64(0)
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
sofar++
|
||||
if sofar > docsUpdated {
|
||||
detectedUnsafeMutex.Lock()
|
||||
detectedUnsafe = true
|
||||
detectedUnsafeMutex.Unlock()
|
||||
return
|
||||
}
|
||||
aw := index.NewAnalysisWork(f, doc, resultChan)
|
||||
// put the work on the queue
|
||||
f.analysisQueue.Queue(aw)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// extra 1 capacity for internal updates.
|
||||
collectRows := make([][]index.IndexRow, 0, docsUpdated+1)
|
||||
|
||||
// wait for the result
|
||||
var itemsDeQueued uint64
|
||||
for itemsDeQueued < docsUpdated {
|
||||
result := <-resultChan
|
||||
collectRows = append(collectRows, result.Rows)
|
||||
itemsDeQueued++
|
||||
}
|
||||
close(resultChan)
|
||||
|
||||
detectedUnsafeMutex.RLock()
|
||||
defer detectedUnsafeMutex.RUnlock()
|
||||
if detectedUnsafe {
|
||||
return UnsafeBatchUseDetected
|
||||
}
|
||||
|
||||
atomic.AddUint64(&f.stats.analysisTime, uint64(time.Since(analysisStart)))
|
||||
|
||||
var deleteKeys [][]byte
|
||||
if len(batch.InternalOps) > 0 {
|
||||
// add the internal ops
|
||||
updateInternalRows := make([]index.IndexRow, 0, len(batch.InternalOps))
|
||||
for internalKey, internalValue := range batch.InternalOps {
|
||||
if internalValue == nil {
|
||||
// delete
|
||||
deleteInternalRow := NewInternalRow([]byte(internalKey), nil)
|
||||
deleteKeys = append(deleteKeys, deleteInternalRow.Key())
|
||||
} else {
|
||||
updateInternalRow := NewInternalRow([]byte(internalKey), internalValue)
|
||||
updateInternalRows = append(updateInternalRows, updateInternalRow)
|
||||
}
|
||||
}
|
||||
collectRows = append(collectRows, updateInternalRows)
|
||||
}
|
||||
|
||||
inflightItems := make([]*InFlightItem, 0, len(batch.IndexOps))
|
||||
for docID, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
inflightItems = append(inflightItems,
|
||||
&InFlightItem{[]byte(docID), doc.Number})
|
||||
} else {
|
||||
inflightItems = append(inflightItems,
|
||||
&InFlightItem{[]byte(docID), 0})
|
||||
}
|
||||
}
|
||||
|
||||
indexStart := time.Now()
|
||||
|
||||
// start a writer for this batch
|
||||
var kvwriter store.KVWriter
|
||||
kvwriter, err = f.store.Writer()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var dictionaryDeltas map[string]int64
|
||||
dictionaryDeltas, err = f.batchRows(kvwriter, collectRows, deleteKeys)
|
||||
if err != nil {
|
||||
_ = kvwriter.Close()
|
||||
atomic.AddUint64(&f.stats.errors, 1)
|
||||
return
|
||||
}
|
||||
|
||||
f.compensator.MutateBatch(inflightItems, lastDocNumber)
|
||||
|
||||
err = kvwriter.Close()
|
||||
|
||||
f.lookuper.NotifyBatch(inflightItems)
|
||||
f.dictUpdater.NotifyBatch(dictionaryDeltas)
|
||||
|
||||
atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
|
||||
if err == nil {
|
||||
atomic.AddUint64(&f.stats.updates, docsUpdated)
|
||||
atomic.AddUint64(&f.stats.deletes, docsDeleted)
|
||||
atomic.AddUint64(&f.stats.batches, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&f.stats.errors, 1)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (f *Firestorm) SetInternal(key, val []byte) (err error) {
|
||||
internalRow := NewInternalRow(key, val)
|
||||
var writer store.KVWriter
|
||||
writer, err = f.store.Writer()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if cerr := writer.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
wb := writer.NewBatch()
|
||||
wb.Set(internalRow.Key(), internalRow.Value())
|
||||
|
||||
return writer.ExecuteBatch(wb)
|
||||
}
|
||||
|
||||
func (f *Firestorm) DeleteInternal(key []byte) (err error) {
|
||||
internalRow := NewInternalRow(key, nil)
|
||||
var writer store.KVWriter
|
||||
writer, err = f.store.Writer()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if cerr := writer.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
wb := writer.NewBatch()
|
||||
wb.Delete(internalRow.Key())
|
||||
|
||||
return writer.ExecuteBatch(wb)
|
||||
}
|
||||
|
||||
func (f *Firestorm) DumpAll() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
defer close(rv)
|
||||
|
||||
// start an isolated reader for use during the dump
|
||||
kvreader, err := f.store.Reader()
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
cerr := kvreader.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
|
||||
err = f.dumpPrefix(kvreader, rv, nil)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (f *Firestorm) DumpDoc(docID string) chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
defer close(rv)
|
||||
|
||||
// start an isolated reader for use during the dump
|
||||
kvreader, err := f.store.Reader()
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
cerr := kvreader.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
|
||||
err = f.dumpDoc(kvreader, rv, []byte(docID))
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (f *Firestorm) DumpFields() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
defer close(rv)
|
||||
|
||||
// start an isolated reader for use during the dump
|
||||
kvreader, err := f.store.Reader()
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
cerr := kvreader.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
|
||||
err = f.dumpPrefix(kvreader, rv, FieldKeyPrefix)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (f *Firestorm) Reader() (index.IndexReader, error) {
|
||||
return newFirestormReader(f)
|
||||
}
|
||||
|
||||
func (f *Firestorm) Stats() json.Marshaler {
|
||||
return f.stats
|
||||
|
||||
}
|
||||
|
||||
func (f *Firestorm) Wait(timeout time.Duration) error {
|
||||
return f.dictUpdater.waitTasksDone(timeout)
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterIndexType(Name, NewFirestorm)
|
||||
}
|
|
@ -0,0 +1,382 @@
|
|||
# Firestorm
|
||||
|
||||
A new indexing scheme for Bleve.
|
||||
|
||||
## Background
|
||||
|
||||
### Goals
|
||||
|
||||
- Avoid a single writer that must pause writing to perform computation
|
||||
- either by allowing multiple writers, if computation cannot be avoided
|
||||
- or by having a single writer which can insert rows uninterrupted
|
||||
- Avoid the need for a back index
|
||||
- the back index is expensive from a space perspective
|
||||
- by not writing it out, we should be able to obtain a higher indexing throughput
|
||||
- consulting the backindex is one of the read/think/update cycles mentioned above
|
||||
|
||||
### Considerations
|
||||
- The cost for not maintaining a back index is paid in two places
|
||||
- Searches may need to read more rows, because old/deleted rows may still exist
|
||||
- These rows can be excluded, so correctness is not affected, but they will be slower
|
||||
- Old/Deleted rows need to be cleaned up at some point
|
||||
- This could either be through an explicit cleanup thread, the job of which is to constantly walk the kvstore looking for rows to delete
|
||||
- Or, it could be integrated with a KV stores natural merge/compaction process (aka RocksDB)
|
||||
|
||||
### Semantics
|
||||
|
||||
It is helpful to review the desired semantics between the Index/Delete operations and Term Searches.
|
||||
|
||||
#### Index(doc_id, doc)
|
||||
|
||||
- Empty Index
|
||||
- Term Search for "cat" = empty result set
|
||||
|
||||
The Index operation should update the index such that after the operation returns, a matching search would return the document.
|
||||
|
||||
- Index("a", "small cat")
|
||||
- Term Search for "cat" = {"a"}
|
||||
|
||||
Calling the Index operation again for the same doc_id should update the index such that after the operation returns, only searches matching the newest version return the document.
|
||||
|
||||
- Index("a", "big dog")
|
||||
- Term Search for "cat" = empty result set
|
||||
- Term Search for "dog" = {"a"}
|
||||
|
||||
NOTE:
|
||||
|
||||
- At no point during the second index operation would concurrent searches for "cat" and "dog" both return 0 results.
|
||||
- At no point during the second index operation would concurrent searches for "cat" and "dog" both return 1 result.
|
||||
|
||||
#### Delete(doc_id)
|
||||
|
||||
- Index("a", "small cat")
|
||||
- Term Search for "cat" = {"a"}
|
||||
- Delete("a")
|
||||
- Term Search for "cat" = empty result set
|
||||
|
||||
Once the Delete operation returns, the document should no longer be returned by any search.
|
||||
|
||||
## Details
|
||||
|
||||
### Terminology
|
||||
|
||||
Document ID (`doc_id`)
|
||||
:The user specified identifier (utf8 string). This never changes for a document.
|
||||
|
||||
Document Number (`doc_number`)
|
||||
:The Bleve internal identifier (uint64). These numbers are generated from an atomic counter.
|
||||
|
||||
DocIdNumber
|
||||
: Concatenation of `<doc_id> 0xff <doc_number>`
|
||||
|
||||
### Theory of Operation
|
||||
|
||||
By including a new unique identifier as a part of every row generated, the index operation no longer concerns itself with updating existing values or deleting previous values.
|
||||
|
||||
Removal of old rows is handled indepenently by separate threads.
|
||||
|
||||
Ensuring of correct semantics with respect to added/updated/deleted documents is maintained through synchronized in-memory data structures, to compensate for the decoupling of these other operations.
|
||||
|
||||
The Dictionary becomes a best effort data element. In kill-9 scenarios it could become incorrect, but it is believed that this will generally only affect scoring not correctness, and we can pursue read-repair operations.
|
||||
|
||||
### Index State
|
||||
|
||||
The following pseudo-structure will be used to explain changes to the internal state. Keep in mind the datatypes shown represent the logical structure required for correct behavior. The actual implementation may be different to achieve performance goals.
|
||||
|
||||
indexState {
|
||||
docCount uint64
|
||||
fieldCache map[string]uint16
|
||||
nextDocNumber uint64
|
||||
docIdNumberMutex sync.RWMutex // for protecting fields below
|
||||
maxReadDocNumber uint64
|
||||
inFlightDocIds map[string]uint64
|
||||
deletedDocIdNumbers [][]byte
|
||||
}
|
||||
|
||||
### Operation
|
||||
|
||||
#### Creating New Index
|
||||
|
||||
- New KV Batch
|
||||
- SET VersionRow{version=X}
|
||||
- SET FieldRow{field_id=0 field_name="_id"}
|
||||
- Execute Batch
|
||||
- Index State intialized to:
|
||||
|
||||
{
|
||||
docCount = 0
|
||||
fieldCache = {
|
||||
"_id": 0
|
||||
}
|
||||
nextDocNumber = 1
|
||||
maxReadDocNumber = 0
|
||||
inFlightDocIds = {}
|
||||
deletedDocIdNumbers = {}
|
||||
}
|
||||
|
||||
- Garbage Collector Thread is started
|
||||
- Old Doc Number Lookup Thread is started
|
||||
- Index marked open
|
||||
|
||||
#### Opening an Existing Index
|
||||
|
||||
- GET VersionRow, assert current version or exit
|
||||
- ITERATE all FieldRows{}
|
||||
- ITERATE all TermFrequencyRow{ where field_id = 0 }
|
||||
- Identify consecutive rows with same doc_id but different doc_number
|
||||
- Lower document numbers are added to the deletedDocIdNumbers list
|
||||
- Count all non-duplicate rows, seed the docCount
|
||||
- Observe highest document number seen, seed nextDocNumber
|
||||
|
||||
- Index State intialized to:
|
||||
|
||||
{
|
||||
docCount = <as counted above>
|
||||
fieldCache = {
|
||||
"_id": 0
|
||||
<as scanned above>
|
||||
}
|
||||
nextDocNumber = <as scanned above> + 1
|
||||
maxReadDocNumber = <same as nextDocNumber>
|
||||
inFlightDocIds = {}
|
||||
deletedDocIdNumbers = {<as scanned above>}
|
||||
}
|
||||
|
||||
- Garbage Collector Thread is started
|
||||
- Old Doc Number Lookup Thread is started
|
||||
- Index marked open
|
||||
|
||||
#### Garbage Collector Thread
|
||||
|
||||
The role of the Garbage Collector thread is to clean up rows referring to document numbers that are no longer relevant (document was deleted or updated).
|
||||
|
||||
Currently, only two types of rows include document numbers:
|
||||
- Term Frequency Rows
|
||||
- Stored Rows
|
||||
|
||||
The current thought is that the garbage collector thread will use a single iterator to iterate the following key spaces:
|
||||
|
||||
- TermFrequencyRow { where field_id > 0}
|
||||
- StoredRow {all}
|
||||
|
||||
For any row refering to a document number on the deletedDocNumbers list, that key will be DELETED.
|
||||
|
||||
The garbage collector will track loop iterations or start key for each deletedDocNumber so that it knows when it has walked a full circle for a given doc number. At point the following happen in order:
|
||||
|
||||
- docNumber is removed from the deletecDocNumbers list
|
||||
- DELETE is issued on TermFreqRow{ field_id=0, term=doc_id, doc_id=doc_id_number }
|
||||
|
||||
The last thing we do is delete the TermFreqRow for field 0. If anything crashes at any point prior to this, we will again read this record on our next warmup and that doc_id_number will again go through the garbage collection process.
|
||||
|
||||
#### Old Doc Number Lookup Thread
|
||||
|
||||
The role of the Old Doc Number Lookup thread is to asynchronously lookup old document numbers in use for a give document id.
|
||||
|
||||
Waits in a select loop reading from a channel. Through this channel it is notified of a doc_id where work is to be done. When a doc_id comes in, the following is performed:
|
||||
|
||||
- Acquire indexState.docIdNumberMutex for reading:
|
||||
- Read maxReadDocNumber
|
||||
- Find doc_id/doc_number k/v pair in the inFlightDocIds map
|
||||
- Release indexState.docIdNumberMutex
|
||||
- Start Iterator at TermFrequency{ field_id=0 term=doc_id}
|
||||
- Iterator until term != doc_id
|
||||
|
||||
All doc_numbers found that are less than maxReadDocNumber and != doc_number in the inFlightDocIds map are now scheduled for deletion.
|
||||
|
||||
- Acquire indexState.docIdNumberMutex for writing:
|
||||
- add doc numbers to deletedDocIdNumbers
|
||||
- check if doc_number in inFlightDocIds is still the same
|
||||
- if so delete it
|
||||
- if not, it was updated again, so we must leave it
|
||||
- Release indexState.docIdNumberMutex
|
||||
|
||||
Notify Garbage Collector Thread directly of new doc_numbers.
|
||||
|
||||
#### Term Dictionary Updater Thread
|
||||
|
||||
The role of the Term Dictionary Updater thread is to asynchronously perform best-effort updates to the Term Dictionary. Note the contents of the Term Dictionary only affect scoring, and not correctness of query results.
|
||||
|
||||
NOTE: one case where correctness could be affected is if the dictionary is completely missing a term which has non-zero usage. Since the garbage collector thread is continually looking at these rows, its help could be enlisted to detect/repair this situation.
|
||||
|
||||
It is notified via a channel of increased term usage (by index ops) and of decresed term usage (by garbage collector cleaing up old usage)
|
||||
|
||||
#### Indexing a Document
|
||||
|
||||
- Perform all analysis on the document.
|
||||
- new_doc_number = indexState.nextDocNumber++
|
||||
- Create New Batch
|
||||
- Batch will contain SET operations for:
|
||||
- any new Fields
|
||||
- Term Frequency Rows for indexed fields terms
|
||||
- Stored Rows for stored fields
|
||||
- Execute Batch
|
||||
- Acquire indexState.docIdNumberMutex for writing:
|
||||
- set maxReadDocNumber new_doc_number
|
||||
- set inFlightDocIds{ docId = new_doc_number }
|
||||
- Release indexState.docIdNumberMutex
|
||||
- Notify Term Frequency Updater thread of increased term usage.
|
||||
- Notify Old Doc Number Lookup Thread of doc_id.
|
||||
|
||||
The key property is that a search matching the updated document *SHOULD* return the document once this method returns. If the document was an update, it should return the previous document until this method returns. There should be no period of time where neither document matches.
|
||||
|
||||
#### Deleting a Document
|
||||
|
||||
- Acquire indexState.docIdNumberMutex for writing:
|
||||
- set inFlightDocIds{ docId = 0 } // 0 is a doc number we never use, indicates pending deltion of docId
|
||||
- Release indexState.docIdNumberMutex
|
||||
- Notify Old Doc Number Lookup Thread of doc_id.
|
||||
|
||||
#### Batch Operations
|
||||
|
||||
Batch operations look largely just like the indexing/deleting operations. Two other optimizations come into play.
|
||||
|
||||
- More SET operations in the underlying batch
|
||||
- Larger aggregated updates can be passed to the Term Frequency Updater Thread
|
||||
|
||||
#### Term Field Iteration
|
||||
|
||||
- Acquire indexState.docIdNumberMutex for reading:
|
||||
- Get copy of: (it is assumed some COW data structure is used, or MVCC is accomodated in some way by the impl)
|
||||
- maxReadDocNumber
|
||||
- inFlightDocIds
|
||||
- deletedDocIdNumbers
|
||||
- Release indexState.docIdNumberMutex
|
||||
|
||||
Term Field Iteration is used by the basic term search. It produces the set of documents (and related info like term vectors) which used the specified term in the specified field.
|
||||
|
||||
Iterator starts at key:
|
||||
|
||||
```'t' <field id uint16> <term utf8> 0xff```
|
||||
|
||||
Iterator ends when the term does not match.
|
||||
|
||||
- Any row with doc_number > maxReadDocNumber MUST be ignored.
|
||||
- Any row with doc_id_number on the deletedDocIdNumber list MUST be ignored.
|
||||
- Any row with the same doc_id as an entry in the inFlightDocIds map, MUST have the same number.
|
||||
|
||||
Any row satisfying the above conditions is a candidate document.
|
||||
|
||||
### Row Encoding
|
||||
|
||||
All keys are manually encoded to ensure a precise row ordering.
|
||||
|
||||
Internal Row values are opaque byte arrays.
|
||||
|
||||
All other values are encoded using protobuf for a balance of efficiency and flexibility. Dictionary and TermFrequency rows are the most likely to take advantage of this flexibility, but other rows are read/written infrequently enough that the flexibility outweighs any overhead.
|
||||
|
||||
#### Version
|
||||
|
||||
There is a single version row which records which version of the firestorm indexing scheme is in use.
|
||||
|
||||
| Key | Value |
|
||||
|---------|------------|
|
||||
|```'v'```|```<VersionValue protobuf>```|
|
||||
|
||||
message VersionValue {
|
||||
required uint64 version = 1;
|
||||
}
|
||||
|
||||
#### Field
|
||||
|
||||
Field rows map field names to numeric values
|
||||
|
||||
| Key | Value |
|
||||
|---------|------------|
|
||||
|```'f' <field id uint16>```|```<FieldValue protobuf>```|
|
||||
|
||||
message FieldValue {
|
||||
required string name = 1;
|
||||
}
|
||||
|
||||
#### Dictionary
|
||||
|
||||
Dictionary rows record which terms are used in a particular field. The value can be used to store additional information about the term usage. The value will be encoded using protobuf so that future versions can add data to this structure.
|
||||
|
||||
| Key | Value |
|
||||
|---------|------------|
|
||||
|```'d' <field id uint16> <term utf8>```|```<DictionaryValue protobuf>```|
|
||||
|
||||
message DictionaryValue {
|
||||
optional uint64 count = 1; // number of documents using this term in this field
|
||||
}
|
||||
|
||||
#### Term Frequency
|
||||
|
||||
Term Freqquency rows record which documents use a term in a particular field. The value must record how often the term occurs. It may optionally include other details such as a normalization value (precomputed scoring adjustment for the length of the field) and term vectors (where the term occurred within the field). The value will be encoded using protobuf so that future versions can add data to this structure.
|
||||
|
||||
| Key | Value |
|
||||
|---------|------------|
|
||||
|```'t' <field id uint16> <term utf8> 0xff <doc_id utf8 > 0xff <doc number uint64>```|```<TermFreqValue protobuf>```|
|
||||
|
||||
|
||||
message TermVectorEntry {
|
||||
optional uint32 field = 1; // field optional if redundant, required for composite fields
|
||||
optional uint64 pos = 2; // positional offset within the field
|
||||
optional uint64 start = 3; // start byte offset
|
||||
optional uint64 end = 4; // end byte offset
|
||||
repeated uint64 arrayPositions = 5; // array positions
|
||||
}
|
||||
|
||||
message TermFrequencyValue {
|
||||
required uint64 freq = 1; // frequency of the term occurance within this field
|
||||
optional float norm = 2; // normalization factor
|
||||
repeated TermVectorEntry vectors = 3; // term vectors
|
||||
}
|
||||
|
||||
#### Stored
|
||||
|
||||
Stored rows record the original values used to produce the index. At the row encoding level this is an opaque sequence of bytes.
|
||||
|
||||
| Key | Value |
|
||||
|---------------------------|-------------------------|
|
||||
|```'s' <doc id utf8> 0xff <doc number uint64> <field id uint16>```|```<StoredValue protobuf>```|
|
||||
|
||||
message StoredValue {
|
||||
optional bytes raw = 1; // raw bytes
|
||||
}
|
||||
|
||||
NOTE: we currently encode stored values as raw bytes, however we have other proposals in flight to do something better than this. By using protobuf here as well, we can support existing functionality through the raw field, but allow for more strongly typed information in the future.
|
||||
|
||||
#### Internal
|
||||
|
||||
Internal rows are a reserved keyspace which the layer above can use for anything it wants.
|
||||
|
||||
| Key | Value |
|
||||
|---------------------------|-------------------------|
|
||||
|```'i' <application key []byte>```|```<application value []byte>```|
|
||||
|
||||
### FAQ
|
||||
|
||||
1. How do you ensure correct semantics while updating a document in the index?
|
||||
|
||||
Let us consider 5 possible states:
|
||||
|
||||
a. Document X#1 is in the index, maxReadDocNumber=1, inFlightDocIds{}, deletedDocIdNumbers{}
|
||||
|
||||
b. Document X#1 and X#2 are in the index, maxReadDocNumber=1, inFlightDocIds{}, deletedDocIdNumbers{}
|
||||
|
||||
c. Document X#1 and X#2 are in the index, maxReadDocNumber=2, inFlightDocIds{X:2}, deletedDocIdNumbers{}
|
||||
|
||||
d. Document X#1 and X#2 are in the index, maxReadDocNumber=2, inFlightDocIds{}, deletedDocIdNumbers{X#1}
|
||||
|
||||
e. Document X#2 is in the index, maxReadDocNumber=2, inFlightDocIds{}, deletedDocIdNumbers{}
|
||||
|
||||
In state a, we have a steady state where one document has been indexed with id X.
|
||||
|
||||
In state b, we have executed the batch that writes the new rows corresponding to the new version of X, but we have not yet updated our in memory compensation data structures. This is OK, because maxReadDocNumber is still 1, all readers will ignore the new rows we just wrote. This is also OK because we are still inside the Index() method, so there is not yet any expectation to see the udpated document.
|
||||
|
||||
In state c, we have updated both the maxReadDocNumber to 2 and added X:2 to the inFlightDocIds map. This means that searchers could find rows corresponding to X#1 and X#2. However, they are forced to disregard any row for X where the document number is not 2.
|
||||
|
||||
In state d, we have completed the lookup for the old document numbers of X, and found 1. Now deletedDocIdNumbers contains X#1. Now readers that encounter this doc_id_number will ignore it.
|
||||
|
||||
In state e, the garbage collector has removed all record of X#1.
|
||||
|
||||
The Index method returns after it has transitioned to state c, which maintains the semantics we desire.
|
||||
|
||||
2\. Wait, what happens if I kill -9 the process, won't you forget about the deleted documents?
|
||||
|
||||
No, our proposal is for a warmup process to walk a subset of the keyspace (TermFreq{ where field_id=0 }). This warmup process will identify all not-yet cleaned up document numbers, and seed the deletedDocIdNumbers state as well as the Garbage Collector Thread.
|
||||
|
||||
3\. Wait, but what will happen to the inFlightDocIds in a kill -9 scenario?
|
||||
|
||||
It turns out they actually don't matter. That list was just an optimization to get us through the window of time while we hadn't yet looked up the old document numbers for a given document id. But, during the warmup phase we still identify all those keys and they go directly onto deletedDocIdNumbers list.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,31 @@
|
|||
package firestorm;
|
||||
|
||||
message VersionValue {
|
||||
required uint64 version = 1;
|
||||
}
|
||||
|
||||
message FieldValue {
|
||||
required string name = 1;
|
||||
}
|
||||
|
||||
message DictionaryValue {
|
||||
optional uint64 count = 1; // number of documents using this term in this field
|
||||
}
|
||||
|
||||
message TermVector {
|
||||
optional uint32 field = 1; // field optional if redundant, required for composite fields
|
||||
optional uint64 pos = 2; // positional offset within the field
|
||||
optional uint64 start = 3; // start byte offset
|
||||
optional uint64 end = 4; // end byte offset
|
||||
repeated uint64 arrayPositions = 5; // array positions
|
||||
}
|
||||
|
||||
message TermFreqValue {
|
||||
required uint64 freq = 1; // frequency of the term occurance within this field
|
||||
optional float norm = 2; // normalization factor
|
||||
repeated TermVector vectors = 3; // term vectors
|
||||
}
|
||||
|
||||
message StoredValue {
|
||||
optional bytes raw = 1; // raw bytes
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,235 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const DefaultGarbageThreshold = 10
|
||||
const DefaultMaxDocsPerPass = 1000
|
||||
|
||||
var DefaultGarbageSleep = 15 * time.Second
|
||||
|
||||
type GarbageCollector struct {
|
||||
f *Firestorm
|
||||
garbageThreshold int
|
||||
garbageSleep time.Duration
|
||||
maxDocsPerPass int
|
||||
quit chan struct{}
|
||||
|
||||
mutex sync.RWMutex
|
||||
workingSet map[uint64][]byte
|
||||
closeWait sync.WaitGroup
|
||||
}
|
||||
|
||||
func NewGarbageCollector(f *Firestorm) *GarbageCollector {
|
||||
rv := GarbageCollector{
|
||||
f: f,
|
||||
garbageThreshold: DefaultGarbageThreshold,
|
||||
garbageSleep: DefaultGarbageSleep,
|
||||
maxDocsPerPass: DefaultMaxDocsPerPass,
|
||||
quit: make(chan struct{}),
|
||||
workingSet: make(map[uint64][]byte),
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) Notify(docNum uint64, docId []byte) {
|
||||
gc.mutex.Lock()
|
||||
defer gc.mutex.Unlock()
|
||||
gc.workingSet[docNum] = docId
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) Start() {
|
||||
gc.closeWait.Add(1)
|
||||
go gc.run()
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) Stop() {
|
||||
close(gc.quit)
|
||||
gc.closeWait.Wait()
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) run() {
|
||||
tick := time.Tick(gc.garbageSleep)
|
||||
for {
|
||||
select {
|
||||
case <-gc.quit:
|
||||
logger.Printf("garbage collector asked to quit")
|
||||
gc.closeWait.Done()
|
||||
return
|
||||
case <-tick:
|
||||
logger.Printf("garbage collector ticked")
|
||||
garbageSize := gc.f.compensator.GarbageCount()
|
||||
docSize, err := gc.f.DocCount()
|
||||
if err != nil {
|
||||
logger.Printf("garbage collector error getting doc count: %v", err)
|
||||
continue
|
||||
}
|
||||
if docSize == 0 {
|
||||
continue
|
||||
}
|
||||
garbageRatio := int(uint64(garbageSize) / docSize)
|
||||
if garbageRatio > gc.garbageThreshold {
|
||||
gc.cleanup()
|
||||
} else {
|
||||
logger.Printf("garbage ratio only %d, waiting", garbageRatio)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) NextBatch(n int) []uint64 {
|
||||
gc.mutex.RLock()
|
||||
defer gc.mutex.RUnlock()
|
||||
|
||||
rv := make([]uint64, 0, n)
|
||||
i := 0
|
||||
for k := range gc.workingSet {
|
||||
rv = append(rv, k)
|
||||
i++
|
||||
if i > n {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) cleanup() {
|
||||
logger.Printf("garbage collector starting")
|
||||
// get list of deleted doc numbers to work on this pass
|
||||
deletedDocNumsList := gc.NextBatch(gc.maxDocsPerPass) //gc.f.deletedDocNumbers.Keys(gc.maxDocsPerPass)
|
||||
logger.Printf("found %d doc numbers to cleanup", len(deletedDocNumsList))
|
||||
|
||||
// put these documents numbers in a map, for faster checking
|
||||
// and for organized keys to be deleted
|
||||
deletedDocNums := make(map[uint64][][]byte)
|
||||
for _, deletedDocNum := range deletedDocNumsList {
|
||||
deletedDocNums[deletedDocNum] = make([][]byte, 0)
|
||||
}
|
||||
|
||||
reader, err := gc.f.store.Reader()
|
||||
if err != nil {
|
||||
logger.Printf("garbage collector fatal: %v", err)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if cerr := reader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
// walk all the term freq rows (where field > 0)
|
||||
termFreqStart := TermFreqIteratorStart(0, []byte{ByteSeparator})
|
||||
termFreqEnd := TermFreqIteratorStart(math.MaxUint16, []byte{ByteSeparator})
|
||||
|
||||
var tfr TermFreqRow
|
||||
dictionaryDeltas := make(map[string]int64)
|
||||
err = visitRange(reader, termFreqStart, termFreqEnd, func(key, val []byte) (bool, error) {
|
||||
err := tfr.ParseKey(key)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
docNum := tfr.DocNum()
|
||||
if docNumKeys, deleted := deletedDocNums[docNum]; deleted {
|
||||
// this doc number has been deleted, place key into map
|
||||
deletedDocNums[docNum] = append(docNumKeys, key)
|
||||
if tfr.Field() != 0 {
|
||||
drk := tfr.DictionaryRowKey()
|
||||
dictionaryDeltas[string(drk)] -= 1
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
logger.Printf("garbage collector fatal: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// walk all the stored rows
|
||||
var sr StoredRow
|
||||
err = visitPrefix(reader, StoredKeyPrefix, func(key, val []byte) (bool, error) {
|
||||
err := sr.ParseKey(key)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
docNum := sr.DocNum()
|
||||
if docNumKeys, deleted := deletedDocNums[docNum]; deleted {
|
||||
// this doc number has been deleted, place key into map
|
||||
deletedDocNums[docNum] = append(docNumKeys, key)
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
logger.Printf("garbage collector fatal: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// now process each doc one at a time
|
||||
for docNum, docKeys := range deletedDocNums {
|
||||
|
||||
// delete keys for a doc number
|
||||
logger.Printf("deleting keys for %d", docNum)
|
||||
// open a writer
|
||||
writer, err := gc.f.store.Writer()
|
||||
if err != nil {
|
||||
_ = writer.Close()
|
||||
logger.Printf("garbage collector fatal: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// prepare batch
|
||||
wb := writer.NewBatch()
|
||||
|
||||
for _, k := range docKeys {
|
||||
wb.Delete(k)
|
||||
}
|
||||
|
||||
err = writer.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
_ = writer.Close()
|
||||
logger.Printf("garbage collector fatal: %v", err)
|
||||
return
|
||||
}
|
||||
logger.Printf("deleted %d keys", len(docKeys))
|
||||
|
||||
// remove it from delete keys list
|
||||
docID := gc.workingSet[docNum]
|
||||
delete(gc.workingSet, docNum)
|
||||
gc.f.compensator.GarbageCollect([]uint64{docNum})
|
||||
|
||||
// now delete the original marker row (field 0)
|
||||
tfidrow := NewTermFreqRow(0, nil, docID, docNum, 0, 0, nil)
|
||||
markerRowKey := tfidrow.Key()
|
||||
|
||||
markerBatch := writer.NewBatch()
|
||||
markerBatch.Delete(markerRowKey)
|
||||
err = writer.ExecuteBatch(markerBatch)
|
||||
if err != nil {
|
||||
logger.Printf("garbage collector fatal: %v", err)
|
||||
return
|
||||
}
|
||||
err = writer.Close()
|
||||
if err != nil {
|
||||
logger.Printf("garbage collector fatal: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// updating dictionary in one batch
|
||||
gc.f.dictUpdater.NotifyBatch(dictionaryDeltas)
|
||||
|
||||
logger.Printf("garbage collector finished")
|
||||
}
|
|
@ -0,0 +1,132 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
)
|
||||
|
||||
func TestGarbageCleanup(t *testing.T) {
|
||||
aq := index.NewAnalysisQueue(1)
|
||||
f, err := NewFirestorm(gtreap.Name, nil, aq)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = f.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvwriter, err := f.(*Firestorm).store.Writer()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rows := []struct {
|
||||
row index.IndexRow
|
||||
garbage bool
|
||||
}{
|
||||
// needed for warmup to work
|
||||
{NewFieldRow(0, IDFieldName), false},
|
||||
// 3 documents, with 2 older versions
|
||||
{NewTermFreqRow(0, nil, []byte("a"), 1, 0, 0.0, nil), true},
|
||||
{NewTermFreqRow(0, nil, []byte("a"), 2, 0, 0.0, nil), false},
|
||||
{NewTermFreqRow(0, nil, []byte("b"), 3, 0, 0.0, nil), false},
|
||||
{NewTermFreqRow(0, nil, []byte("c"), 4, 0, 0.0, nil), true},
|
||||
{NewTermFreqRow(0, nil, []byte("c"), 5, 0, 0.0, nil), false},
|
||||
// additional records for these docs which should be removed
|
||||
{NewTermFreqRow(1, []byte("cat"), []byte("a"), 1, 3, 2.0, nil), true},
|
||||
{NewTermFreqRow(1, []byte("cat"), []byte("c"), 4, 1, 1.0, nil), true},
|
||||
{NewStoredRow([]byte("a"), 1, 1, nil, []byte("tcat")), true},
|
||||
{NewStoredRow([]byte("c"), 4, 1, nil, []byte("tcat")), true},
|
||||
}
|
||||
|
||||
for _, row := range rows {
|
||||
wb := kvwriter.NewBatch()
|
||||
wb.Set(row.row.Key(), row.row.Value())
|
||||
err = kvwriter.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
err = kvwriter.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvreader, err := f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// warmup ensures that deletedDocNums is seeded correctly
|
||||
err = f.(*Firestorm).warmup(kvreader)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = kvreader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// now invoke garbage collector cleanup manually
|
||||
f.(*Firestorm).garbageCollector.cleanup()
|
||||
|
||||
// assert that garbage rows are gone
|
||||
reader, err := f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, row := range rows {
|
||||
v, err := reader.Get(row.row.Key())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if v != nil && row.garbage {
|
||||
t.Errorf("garbage row not deleted, key: %s", row.row.Key())
|
||||
}
|
||||
if v == nil && !row.garbage {
|
||||
t.Errorf("non-garbage row deleted, key: %s", row.row.Key())
|
||||
}
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// assert that deletedDocsNumbers size is 0
|
||||
if f.(*Firestorm).compensator.GarbageCount() != 0 {
|
||||
t.Errorf("expected deletedDocsNumbers size to be 0, got %d", f.(*Firestorm).compensator.GarbageCount())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestGarbageDontPanicOnEmptyDocs(t *testing.T) {
|
||||
idx, err := NewFirestorm("", nil, index.NewAnalysisQueue(1))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
f := idx.(*Firestorm)
|
||||
gc := NewGarbageCollector(f)
|
||||
gc.garbageSleep = 30 * time.Millisecond
|
||||
|
||||
gc.Start()
|
||||
time.Sleep(40 * time.Millisecond)
|
||||
gc.Stop()
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import "fmt"
|
||||
|
||||
var InternalKeyPrefix = []byte{'i'}
|
||||
|
||||
type InternalRow struct {
|
||||
key []byte
|
||||
val []byte
|
||||
}
|
||||
|
||||
func NewInternalRow(key, val []byte) *InternalRow {
|
||||
rv := InternalRow{
|
||||
key: key,
|
||||
val: val,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func NewInternalRowKV(key, value []byte) (*InternalRow, error) {
|
||||
rv := InternalRow{}
|
||||
rv.key = key[1:]
|
||||
rv.val = value
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (ir *InternalRow) KeySize() int {
|
||||
return 1 + len(ir.key)
|
||||
}
|
||||
|
||||
func (ir *InternalRow) KeyTo(buf []byte) (int, error) {
|
||||
buf[0] = 'i'
|
||||
copy(buf[1:], ir.key)
|
||||
return 1 + len(ir.key), nil
|
||||
}
|
||||
|
||||
func (ir *InternalRow) Key() []byte {
|
||||
buf := make([]byte, ir.KeySize())
|
||||
n, _ := ir.KeyTo(buf)
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
func (ir *InternalRow) ValueSize() int {
|
||||
return len(ir.val)
|
||||
}
|
||||
|
||||
func (ir *InternalRow) ValueTo(buf []byte) (int, error) {
|
||||
copy(buf, ir.val)
|
||||
return len(ir.val), nil
|
||||
}
|
||||
|
||||
func (ir *InternalRow) Value() []byte {
|
||||
return ir.val
|
||||
}
|
||||
|
||||
func (ir *InternalRow) String() string {
|
||||
return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", ir.key, ir.key, ir.val, ir.val)
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
func TestInternalRows(t *testing.T) {
|
||||
tests := []struct {
|
||||
input index.IndexRow
|
||||
outKey []byte
|
||||
outVal []byte
|
||||
}{
|
||||
{
|
||||
NewInternalRow([]byte("key"), []byte("val")),
|
||||
[]byte{'i', 'k', 'e', 'y'},
|
||||
[]byte{'v', 'a', 'l'},
|
||||
},
|
||||
}
|
||||
|
||||
// test going from struct to k/v bytes
|
||||
for i, test := range tests {
|
||||
rk := test.input.Key()
|
||||
if !reflect.DeepEqual(rk, test.outKey) {
|
||||
t.Errorf("Expected key to be %v got: %v", test.outKey, rk)
|
||||
}
|
||||
rv := test.input.Value()
|
||||
if !reflect.DeepEqual(rv, test.outVal) {
|
||||
t.Errorf("Expected value to be %v got: %v for %d", test.outVal, rv, i)
|
||||
}
|
||||
}
|
||||
|
||||
// now test going back from k/v bytes to struct
|
||||
for i, test := range tests {
|
||||
row, err := NewInternalRowKV(test.outKey, test.outVal)
|
||||
if err != nil {
|
||||
t.Errorf("error parsking key/value: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(row, test.input) {
|
||||
t.Errorf("Expected: %#v got: %#v for %d", test.input, row, i)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
const channelBufferSize = 1000
|
||||
|
||||
type Lookuper struct {
|
||||
f *Firestorm
|
||||
workChan chan []*InFlightItem
|
||||
quit chan struct{}
|
||||
closeWait sync.WaitGroup
|
||||
|
||||
tasksQueued uint64
|
||||
tasksDone uint64
|
||||
}
|
||||
|
||||
func NewLookuper(f *Firestorm) *Lookuper {
|
||||
rv := Lookuper{
|
||||
f: f,
|
||||
workChan: make(chan []*InFlightItem, channelBufferSize),
|
||||
quit: make(chan struct{}),
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (l *Lookuper) NotifyBatch(items []*InFlightItem) {
|
||||
atomic.AddUint64(&l.tasksQueued, 1)
|
||||
l.workChan <- items
|
||||
}
|
||||
|
||||
func (l *Lookuper) Start() {
|
||||
l.closeWait.Add(1)
|
||||
go l.run()
|
||||
}
|
||||
|
||||
func (l *Lookuper) Stop() {
|
||||
close(l.quit)
|
||||
l.closeWait.Wait()
|
||||
}
|
||||
|
||||
func (l *Lookuper) run() {
|
||||
for {
|
||||
|
||||
select {
|
||||
case <-l.quit:
|
||||
logger.Printf("lookuper asked to quit")
|
||||
l.closeWait.Done()
|
||||
return
|
||||
case items, ok := <-l.workChan:
|
||||
if !ok {
|
||||
logger.Printf("lookuper work channel closed unexpectedly, stopping")
|
||||
return
|
||||
}
|
||||
l.lookupItems(items)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lookuper) lookupItems(items []*InFlightItem) {
|
||||
for _, item := range items {
|
||||
l.lookup(item)
|
||||
}
|
||||
atomic.AddUint64(&l.tasksDone, 1)
|
||||
}
|
||||
|
||||
func (l *Lookuper) lookup(item *InFlightItem) {
|
||||
reader, err := l.f.store.Reader()
|
||||
if err != nil {
|
||||
logger.Printf("lookuper fatal: %v", err)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if cerr := reader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
prefix := TermFreqPrefixFieldTermDocId(0, nil, item.docID)
|
||||
logger.Printf("lookuper prefix - % x", prefix)
|
||||
var tfk TermFreqRow
|
||||
docNums := make(DocNumberList, 0)
|
||||
err = visitPrefix(reader, prefix, func(key, val []byte) (bool, error) {
|
||||
logger.Printf("lookuper sees key % x", key)
|
||||
err := tfk.ParseKey(key)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
docNum := tfk.DocNum()
|
||||
docNums = append(docNums, docNum)
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
logger.Printf("lookuper fatal: %v", err)
|
||||
return
|
||||
}
|
||||
oldDocNums := make(DocNumberList, 0, len(docNums))
|
||||
for _, docNum := range docNums {
|
||||
if item.docNum == 0 || docNum < item.docNum {
|
||||
oldDocNums = append(oldDocNums, docNum)
|
||||
}
|
||||
}
|
||||
logger.Printf("lookup migrating '%s' - %d - oldDocNums: %v", item.docID, item.docNum, oldDocNums)
|
||||
l.f.compensator.Migrate(item.docID, item.docNum, oldDocNums)
|
||||
if len(oldDocNums) == 0 && item.docNum != 0 {
|
||||
// this was an add, not an update
|
||||
atomic.AddUint64(l.f.docCount, 1)
|
||||
} else if len(oldDocNums) > 0 && item.docNum == 0 {
|
||||
// this was a delete (and it previously existed)
|
||||
atomic.AddUint64(l.f.docCount, ^uint64(0))
|
||||
}
|
||||
}
|
||||
|
||||
// this is not intended to be used publicly, only for unit tests
|
||||
// which depend on consistency we no longer provide
|
||||
func (l *Lookuper) waitTasksDone(d time.Duration) error {
|
||||
timeout := time.After(d)
|
||||
tick := time.Tick(100 * time.Millisecond)
|
||||
for {
|
||||
select {
|
||||
// Got a timeout! fail with a timeout error
|
||||
case <-timeout:
|
||||
return fmt.Errorf("timeout")
|
||||
// Got a tick, we should check on doSomething()
|
||||
case <-tick:
|
||||
queued := atomic.LoadUint64(&l.tasksQueued)
|
||||
done := atomic.LoadUint64(&l.tasksDone)
|
||||
if queued == done {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
)
|
||||
|
||||
func TestLookups(t *testing.T) {
|
||||
aq := index.NewAnalysisQueue(1)
|
||||
f, err := NewFirestorm(gtreap.Name, nil, aq)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = f.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvwriter, err := f.(*Firestorm).store.Writer()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rows := []struct {
|
||||
row index.IndexRow
|
||||
garbage bool
|
||||
}{
|
||||
// needed for warmup to work
|
||||
{NewFieldRow(0, IDFieldName), false},
|
||||
// 3 documents, with 2 older versions
|
||||
{NewTermFreqRow(0, nil, []byte("a"), 1, 0, 0.0, nil), true},
|
||||
{NewTermFreqRow(0, nil, []byte("a"), 2, 0, 0.0, nil), false},
|
||||
{NewTermFreqRow(0, nil, []byte("b"), 3, 0, 0.0, nil), false},
|
||||
{NewTermFreqRow(0, nil, []byte("c"), 4, 0, 0.0, nil), true},
|
||||
{NewTermFreqRow(0, nil, []byte("c"), 5, 0, 0.0, nil), false},
|
||||
}
|
||||
|
||||
for _, row := range rows {
|
||||
wb := kvwriter.NewBatch()
|
||||
wb.Set(row.row.Key(), row.row.Value())
|
||||
err = kvwriter.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// also see the compensator
|
||||
if tfr, ok := row.row.(*TermFreqRow); ok {
|
||||
f.(*Firestorm).compensator.Mutate(tfr.DocID(), tfr.DocNum())
|
||||
// expect this mutation to be in the in-flight list
|
||||
val := f.(*Firestorm).compensator.inFlight.Get(&InFlightItem{docID: tfr.DocID()})
|
||||
if val == nil {
|
||||
t.Errorf("expected key: % x to be in the inflight list", tfr.DocID())
|
||||
}
|
||||
f.(*Firestorm).lookuper.lookup(&InFlightItem{docID: tfr.DocID(), docNum: tfr.DocNum()})
|
||||
// now expect this mutation to NOT be in the in-flight list
|
||||
val = f.(*Firestorm).compensator.inFlight.Get(&InFlightItem{docID: tfr.DocID()})
|
||||
if val != nil {
|
||||
t.Errorf("expected key: % x to NOT be in the inflight list, got %v", tfr.DocID(), val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check that doc count is 3 at the end of this
|
||||
docCount, err := f.DocCount()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if docCount != 3 {
|
||||
t.Errorf("expected doc count 3, got %d", docCount)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
var mergeOperator firestormMerge
|
||||
|
||||
var dictionaryTermIncr []byte
|
||||
var dictionaryTermDecr []byte
|
||||
|
||||
func init() {
|
||||
dictionaryTermIncr = make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1))
|
||||
dictionaryTermDecr = make([]byte, 8)
|
||||
var negOne = int64(-1)
|
||||
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne))
|
||||
}
|
||||
|
||||
type firestormMerge struct{}
|
||||
|
||||
func (m *firestormMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) {
|
||||
// set up record based on key
|
||||
dr, err := NewDictionaryRowK(key)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
if len(existingValue) > 0 {
|
||||
// if existing value, parse it
|
||||
err = dr.parseDictionaryV(existingValue)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
|
||||
// now process operands
|
||||
for _, operand := range operands {
|
||||
next := int64(binary.LittleEndian.Uint64(operand))
|
||||
if next < 0 && uint64(-next) > dr.Count() {
|
||||
// subtracting next from existing would overflow
|
||||
dr.SetCount(0)
|
||||
} else if next < 0 {
|
||||
dr.SetCount(dr.Count() - uint64(-next))
|
||||
} else {
|
||||
dr.SetCount(dr.Count() + uint64(next))
|
||||
}
|
||||
}
|
||||
|
||||
return dr.Value(), true
|
||||
}
|
||||
|
||||
func (m *firestormMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) {
|
||||
left := int64(binary.LittleEndian.Uint64(leftOperand))
|
||||
right := int64(binary.LittleEndian.Uint64(rightOperand))
|
||||
rv := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(rv, uint64(left+right))
|
||||
return rv, true
|
||||
}
|
||||
|
||||
func (m *firestormMerge) Name() string {
|
||||
return "firestormMerge"
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
func TestPartialMerge(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
in [][]byte
|
||||
out uint64
|
||||
}{
|
||||
{
|
||||
in: [][]byte{dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr},
|
||||
out: 5,
|
||||
},
|
||||
}
|
||||
|
||||
mo := &firestormMerge{}
|
||||
for _, test := range tests {
|
||||
curr := test.in[0]
|
||||
for _, next := range test.in[1:] {
|
||||
var ok bool
|
||||
curr, ok = mo.PartialMerge([]byte("key"), curr, next)
|
||||
if !ok {
|
||||
t.Errorf("expected partial merge ok")
|
||||
}
|
||||
}
|
||||
actual := decodeCount(curr)
|
||||
if actual != test.out {
|
||||
t.Errorf("expected %d, got %d", test.out, actual)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func decodeCount(in []byte) uint64 {
|
||||
buf := bytes.NewBuffer(in)
|
||||
count, _ := binary.ReadUvarint(buf)
|
||||
return count
|
||||
}
|
||||
|
||||
func TestFullMerge(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
existing index.IndexRow
|
||||
operands [][]byte
|
||||
result index.IndexRow
|
||||
success bool
|
||||
}{
|
||||
{
|
||||
existing: NewDictionaryRow(1, []byte("term"), 3),
|
||||
operands: [][]byte{dictionaryTermIncr, dictionaryTermIncr},
|
||||
result: NewDictionaryRow(1, []byte("term"), 5),
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
existing: NewDictionaryRow(1, []byte("term"), 3),
|
||||
operands: [][]byte{dictionaryTermDecr, dictionaryTermDecr},
|
||||
result: NewDictionaryRow(1, []byte("term"), 1),
|
||||
success: true,
|
||||
},
|
||||
}
|
||||
|
||||
mo := &firestormMerge{}
|
||||
for _, test := range tests {
|
||||
|
||||
existingVal := test.existing.Value()
|
||||
actual, success := mo.FullMerge([]byte("key"), existingVal, test.operands)
|
||||
if success != test.success {
|
||||
t.Errorf("expected error %t, got %t", test.success, success)
|
||||
}
|
||||
expectedVal := test.result.Value()
|
||||
if !reflect.DeepEqual(expectedVal, actual) {
|
||||
t.Errorf("expected result %v, got %v", expectedVal, actual)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,220 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
type firestormReader struct {
|
||||
f *Firestorm
|
||||
r store.KVReader
|
||||
s *Snapshot
|
||||
docCount uint64
|
||||
}
|
||||
|
||||
func newFirestormReader(f *Firestorm) (index.IndexReader, error) {
|
||||
r, err := f.store.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening store reader: %v", err)
|
||||
}
|
||||
docCount, err := f.DocCount()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening store reader: %v", err)
|
||||
}
|
||||
rv := firestormReader{
|
||||
f: f,
|
||||
r: r,
|
||||
s: f.compensator.Snapshot(),
|
||||
docCount: docCount,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (r *firestormReader) TermFieldReader(term []byte, field string) (index.TermFieldReader, error) {
|
||||
fieldIndex, fieldExists := r.f.fieldCache.FieldNamed(field, false)
|
||||
if fieldExists {
|
||||
return newFirestormTermFieldReader(r, uint16(fieldIndex), term)
|
||||
}
|
||||
return newFirestormTermFieldReader(r, ^uint16(0), []byte{ByteSeparator})
|
||||
}
|
||||
|
||||
func (r *firestormReader) DocIDReader(start, end string) (index.DocIDReader, error) {
|
||||
return newFirestormDocIDReader(r, start, end)
|
||||
}
|
||||
|
||||
func (r *firestormReader) FieldDict(field string) (index.FieldDict, error) {
|
||||
return r.FieldDictRange(field, nil, nil)
|
||||
}
|
||||
|
||||
func (r *firestormReader) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
fieldIndex, fieldExists := r.f.fieldCache.FieldNamed(field, false)
|
||||
if fieldExists {
|
||||
return newFirestormDictionaryReader(r, uint16(fieldIndex), startTerm, endTerm)
|
||||
}
|
||||
return newFirestormDictionaryReader(r, ^uint16(0), []byte{ByteSeparator}, []byte{})
|
||||
}
|
||||
|
||||
func (r *firestormReader) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
|
||||
return r.FieldDictRange(field, termPrefix, incrementBytes(termPrefix))
|
||||
}
|
||||
|
||||
func (r *firestormReader) Document(id string) (*document.Document, error) {
|
||||
docID := []byte(id)
|
||||
docNum, err := r.currDocNumForId(docID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if docNum == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
rv := document.NewDocument(id)
|
||||
prefix := StoredPrefixDocIDNum(docID, docNum)
|
||||
err = visitPrefix(r.r, prefix, func(key, val []byte) (bool, error) {
|
||||
safeVal := make([]byte, len(val))
|
||||
copy(safeVal, val)
|
||||
row, err := NewStoredRowKV(key, safeVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if row != nil {
|
||||
fieldName := r.f.fieldCache.FieldIndexed(row.field)
|
||||
field := r.decodeFieldType(fieldName, row.arrayPositions, row.value.GetRaw())
|
||||
if field != nil {
|
||||
rv.AddField(field)
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (r *firestormReader) decodeFieldType(name string, pos []uint64, value []byte) document.Field {
|
||||
switch value[0] {
|
||||
case 't':
|
||||
return document.NewTextField(name, pos, value[1:])
|
||||
case 'n':
|
||||
return document.NewNumericFieldFromBytes(name, pos, value[1:])
|
||||
case 'd':
|
||||
return document.NewDateTimeFieldFromBytes(name, pos, value[1:])
|
||||
case 'b':
|
||||
return document.NewBooleanFieldFromBytes(name, pos, value[1:])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *firestormReader) currDocNumForId(docID []byte) (uint64, error) {
|
||||
prefix := TermFreqPrefixFieldTermDocId(0, nil, docID)
|
||||
docNums := make(DocNumberList, 0)
|
||||
err := visitPrefix(r.r, prefix, func(key, val []byte) (bool, error) {
|
||||
tfk, err := NewTermFreqRowKV(key, val)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
docNum := tfk.DocNum()
|
||||
docNums = append(docNums, docNum)
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(docNums) > 0 {
|
||||
sort.Sort(docNums)
|
||||
return docNums[0], nil
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (r *firestormReader) DocumentFieldTerms(id string) (index.FieldTerms, error) {
|
||||
|
||||
docID := []byte(id)
|
||||
docNum, err := r.currDocNumForId(docID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if docNum == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rv := make(index.FieldTerms, 0)
|
||||
// walk the term freqs
|
||||
err = visitPrefix(r.r, TermFreqKeyPrefix, func(key, val []byte) (bool, error) {
|
||||
tfr, err := NewTermFreqRowKV(key, val)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if bytes.Compare(tfr.DocID(), docID) == 0 && tfr.DocNum() == docNum && tfr.Field() != 0 {
|
||||
fieldName := r.f.fieldCache.FieldIndexed(uint16(tfr.Field()))
|
||||
terms, ok := rv[fieldName]
|
||||
if !ok {
|
||||
terms = make([]string, 0, 1)
|
||||
}
|
||||
terms = append(terms, string(tfr.Term()))
|
||||
rv[fieldName] = terms
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (r *firestormReader) Fields() ([]string, error) {
|
||||
fields := make([]string, 0)
|
||||
|
||||
err := visitPrefix(r.r, FieldKeyPrefix, func(key, val []byte) (bool, error) {
|
||||
fieldRow, err := NewFieldRowKV(key, val)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
fields = append(fields, fieldRow.Name())
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
func (r *firestormReader) GetInternal(key []byte) ([]byte, error) {
|
||||
internalRow := NewInternalRow(key, nil)
|
||||
return r.r.Get(internalRow.Key())
|
||||
}
|
||||
|
||||
func (r *firestormReader) DocCount() uint64 {
|
||||
return r.docCount
|
||||
}
|
||||
|
||||
func (r *firestormReader) Close() error {
|
||||
return r.r.Close()
|
||||
}
|
||||
|
||||
func incrementBytes(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
// didn't overflow, so stop
|
||||
break
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
type firestormDictionaryReader struct {
|
||||
r *firestormReader
|
||||
field uint16
|
||||
start []byte
|
||||
i store.KVIterator
|
||||
}
|
||||
|
||||
func newFirestormDictionaryReader(r *firestormReader, field uint16, start, end []byte) (*firestormDictionaryReader, error) {
|
||||
startKey := DictionaryRowKey(field, start)
|
||||
logger.Printf("start key '%s' - % x", startKey, startKey)
|
||||
if end == nil {
|
||||
end = []byte{ByteSeparator}
|
||||
}
|
||||
endKey := DictionaryRowKey(field, end)
|
||||
logger.Printf("end key '%s' - % x", endKey, endKey)
|
||||
i := r.r.RangeIterator(startKey, endKey)
|
||||
rv := firestormDictionaryReader{
|
||||
r: r,
|
||||
field: field,
|
||||
start: startKey,
|
||||
i: i,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (r *firestormDictionaryReader) Next() (*index.DictEntry, error) {
|
||||
key, val, valid := r.i.Current()
|
||||
if !valid {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
logger.Printf("see key '%s' - % x", key, key)
|
||||
|
||||
currRow, err := NewDictionaryRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row kv: %v", err)
|
||||
}
|
||||
rv := index.DictEntry{
|
||||
Term: string(currRow.term),
|
||||
Count: currRow.Count(),
|
||||
}
|
||||
// advance the iterator to the next term
|
||||
r.i.Next()
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (r *firestormDictionaryReader) Close() error {
|
||||
if r.i != nil {
|
||||
return r.i.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,225 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
)
|
||||
|
||||
var testAnalyzer = &analysis.Analyzer{
|
||||
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
|
||||
}
|
||||
|
||||
func TestDictionaryReader(t *testing.T) {
|
||||
aq := index.NewAnalysisQueue(1)
|
||||
f, err := NewFirestorm(gtreap.Name, nil, aq)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = f.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvwriter, err := f.(*Firestorm).store.Writer()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rows := []index.IndexRow{
|
||||
NewFieldRow(0, IDFieldName),
|
||||
NewFieldRow(1, "name"),
|
||||
NewFieldRow(2, "desc"),
|
||||
NewFieldRow(3, "prefix"),
|
||||
}
|
||||
|
||||
for _, row := range rows {
|
||||
wb := kvwriter.NewBatch()
|
||||
wb.Set(row.Key(), row.Value())
|
||||
err = kvwriter.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
err = kvwriter.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvreader, err := f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = f.(*Firestorm).warmup(kvreader)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = kvreader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvwriter, err = f.(*Firestorm).store.Writer()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rows = []index.IndexRow{
|
||||
|
||||
// dictionary entries
|
||||
NewDictionaryRow(1, []byte("test"), 4),
|
||||
|
||||
NewDictionaryRow(2, []byte("eat"), 1),
|
||||
NewDictionaryRow(2, []byte("more"), 1),
|
||||
NewDictionaryRow(2, []byte("rice"), 1),
|
||||
|
||||
NewDictionaryRow(3, []byte("bob"), 1),
|
||||
NewDictionaryRow(3, []byte("cat"), 1),
|
||||
NewDictionaryRow(3, []byte("cats"), 1),
|
||||
NewDictionaryRow(3, []byte("catting"), 1),
|
||||
NewDictionaryRow(3, []byte("dog"), 1),
|
||||
NewDictionaryRow(3, []byte("doggy"), 1),
|
||||
NewDictionaryRow(3, []byte("zoo"), 1),
|
||||
}
|
||||
|
||||
for _, row := range rows {
|
||||
wb := kvwriter.NewBatch()
|
||||
wb.Set(row.Key(), row.Value())
|
||||
err = kvwriter.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// now try it
|
||||
r, err := f.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dict, err := r.FieldDict("name")
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
|
||||
termCount := 0
|
||||
curr, err := dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
if curr.Term != "test" {
|
||||
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
|
||||
}
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
|
||||
err = dict.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dict, err = r.FieldDict("desc")
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
|
||||
termCount = 0
|
||||
terms := make([]string, 0)
|
||||
curr, err = dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms := []string{"eat", "more", "rice"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
err = dict.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// test start and end range
|
||||
dict, err = r.FieldDictRange("desc", []byte("fun"), []byte("nice"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"more"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
err = dict.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// test use case for prefix
|
||||
dict, err = r.FieldDictPrefix("prefix", []byte("cat"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"cat", "cats", "catting"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
err = dict.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = r.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
type firestormDocIDReader struct {
|
||||
r *firestormReader
|
||||
start []byte
|
||||
i store.KVIterator
|
||||
}
|
||||
|
||||
func newFirestormDocIDReader(r *firestormReader, start, end string) (*firestormDocIDReader, error) {
|
||||
startKey := TermFreqIteratorStart(0, nil)
|
||||
if start != "" {
|
||||
startKey = TermFreqPrefixFieldTermDocId(0, nil, []byte(start))
|
||||
}
|
||||
logger.Printf("start key '%s' - % x", startKey, startKey)
|
||||
endKey := TermFreqIteratorStart(0, []byte{ByteSeparator})
|
||||
if end != "" {
|
||||
endKey = TermFreqPrefixFieldTermDocId(0, nil, []byte(end))
|
||||
}
|
||||
|
||||
logger.Printf("end key '%s' - % x", endKey, endKey)
|
||||
|
||||
i := r.r.RangeIterator(startKey, endKey)
|
||||
|
||||
rv := firestormDocIDReader{
|
||||
r: r,
|
||||
start: startKey,
|
||||
i: i,
|
||||
}
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (r *firestormDocIDReader) Next() (string, error) {
|
||||
if r.i != nil {
|
||||
key, val, valid := r.i.Current()
|
||||
for valid {
|
||||
logger.Printf("see key: '%s' - % x", key, key)
|
||||
tfrsByDocNum := make(map[uint64]*TermFreqRow)
|
||||
tfr, err := NewTermFreqRowKV(key, val)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
tfrsByDocNum[tfr.DocNum()] = tfr
|
||||
|
||||
// now we have a possible row, but there may be more rows for the same docid
|
||||
// find these now
|
||||
err = r.findNextTfrsWithSameDocId(tfrsByDocNum, tfr.DocID())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
docNumList := make(DocNumberList, 0, len(tfrsByDocNum))
|
||||
for dn := range tfrsByDocNum {
|
||||
docNumList = append(docNumList, dn)
|
||||
}
|
||||
|
||||
logger.Printf("docNumList: %v", docNumList)
|
||||
|
||||
highestValidDocNum := r.r.s.Which(tfr.docID, docNumList)
|
||||
if highestValidDocNum == 0 {
|
||||
// no valid doc number
|
||||
key, val, valid = r.i.Current()
|
||||
continue
|
||||
}
|
||||
logger.Printf("highest valid: %d", highestValidDocNum)
|
||||
|
||||
tfr = tfrsByDocNum[highestValidDocNum]
|
||||
return string(tfr.DocID()), nil
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// FIXME this is identical to the one in reader_terms.go
|
||||
func (r *firestormDocIDReader) findNextTfrsWithSameDocId(tfrsByDocNum map[uint64]*TermFreqRow, docID []byte) error {
|
||||
tfrDocIdPrefix := TermFreqPrefixFieldTermDocId(0, nil, docID)
|
||||
r.i.Next()
|
||||
key, val, valid := r.i.Current()
|
||||
for valid && bytes.HasPrefix(key, tfrDocIdPrefix) {
|
||||
tfr, err := NewTermFreqRowKV(key, val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tfrsByDocNum[tfr.DocNum()] = tfr
|
||||
r.i.Next()
|
||||
key, val, valid = r.i.Current()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *firestormDocIDReader) Advance(docID string) (string, error) {
|
||||
if r.i != nil {
|
||||
tfrDocIdPrefix := TermFreqPrefixFieldTermDocId(0, nil, []byte(docID))
|
||||
r.i.Seek(tfrDocIdPrefix)
|
||||
return r.Next()
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (r *firestormDocIDReader) Close() error {
|
||||
if r.i != nil {
|
||||
return r.i.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,187 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
)
|
||||
|
||||
func TestDocIDReaderSomeGarbage(t *testing.T) {
|
||||
aq := index.NewAnalysisQueue(1)
|
||||
f, err := NewFirestorm(gtreap.Name, nil, aq)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = f.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvwriter, err := f.(*Firestorm).store.Writer()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rows := []index.IndexRow{
|
||||
NewFieldRow(0, IDFieldName),
|
||||
NewFieldRow(1, "desc"),
|
||||
NewTermFreqRow(0, nil, []byte("a"), 1, 0, 0.0, nil),
|
||||
NewTermFreqRow(0, nil, []byte("b"), 2, 0, 0.0, nil),
|
||||
NewTermFreqRow(0, nil, []byte("c"), 3, 0, 0.0, nil),
|
||||
NewTermFreqRow(0, nil, []byte("d"), 4, 0, 0.0, nil),
|
||||
NewTermFreqRow(0, nil, []byte("a"), 5, 0, 0.0, nil),
|
||||
NewTermFreqRow(0, nil, []byte("b"), 6, 0, 0.0, nil),
|
||||
NewTermFreqRow(0, nil, []byte("e"), 7, 0, 0.0, nil),
|
||||
NewTermFreqRow(0, nil, []byte("g"), 8, 0, 0.0, nil),
|
||||
// first version of all docs have cat
|
||||
NewTermFreqRow(1, []byte("cat"), []byte("a"), 1, 1, 1.0, nil),
|
||||
NewTermFreqRow(1, []byte("cat"), []byte("b"), 2, 1, 1.0, nil),
|
||||
NewTermFreqRow(1, []byte("cat"), []byte("c"), 3, 1, 1.0, nil),
|
||||
NewTermFreqRow(1, []byte("cat"), []byte("d"), 4, 1, 1.0, nil),
|
||||
NewTermFreqRow(1, []byte("cat"), []byte("e"), 7, 1, 1.0, nil),
|
||||
NewTermFreqRow(1, []byte("cat"), []byte("g"), 8, 1, 1.0, nil),
|
||||
// updated version of a still has cat
|
||||
NewTermFreqRow(1, []byte("cat"), []byte("a"), 5, 1, 1.0, nil),
|
||||
// updated version of b does NOT have cat
|
||||
// c has delete in-flight
|
||||
// d has delete not-yet-garbage-collected
|
||||
}
|
||||
|
||||
for _, row := range rows {
|
||||
wb := kvwriter.NewBatch()
|
||||
wb.Set(row.Key(), row.Value())
|
||||
err = kvwriter.ExecuteBatch(wb)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
f.(*Firestorm).compensator.inFlight = f.(*Firestorm).compensator.inFlight.Upsert(&InFlightItem{docID: []byte("c"), docNum: 0}, rand.Int())
|
||||
f.(*Firestorm).compensator.deletedDocNumbers.Set(4)
|
||||
|
||||
err = kvwriter.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
kvreader, err := f.(*Firestorm).store.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// warmup to load field cache and set maxRead correctly
|
||||
err = f.(*Firestorm).warmup(kvreader)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = kvreader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r, err := f.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dr, err := r.DocIDReader("", "")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedDocIds := []string{"a", "b", "e", "g"}
|
||||
foundDocIds := make([]string, 0)
|
||||
next, err := dr.Next()
|
||||
for next != "" && err == nil {
|
||||
foundDocIds = append(foundDocIds, next)
|
||||
next, err = dr.Next()
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedDocIds, foundDocIds) {
|
||||
t.Errorf("expected: %v, got %v", expectedDocIds, foundDocIds)
|
||||
}
|
||||
|
||||
err = dr.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// now test with some doc id ranges
|
||||
dr, err = r.DocIDReader("b", "f")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedDocIds = []string{"b", "e"}
|
||||
foundDocIds = make([]string, 0)
|
||||
next, err = dr.Next()
|
||||
for next != "" && err == nil {
|
||||
foundDocIds = append(foundDocIds, next)
|
||||
next, err = dr.Next()
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedDocIds, foundDocIds) {
|
||||
t.Errorf("expected: %v, got %v", expectedDocIds, foundDocIds)
|
||||
}
|
||||
|
||||
err = dr.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
//now try again and Advance to skip over "e"
|
||||
dr, err = r.DocIDReader("b", "")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedDocIds = []string{"b", "g"}
|
||||
|
||||
foundDocIds = make([]string, 0)
|
||||
next, err = dr.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else {
|
||||
foundDocIds = append(foundDocIds, next)
|
||||
}
|
||||
next, err = dr.Advance("f")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else {
|
||||
foundDocIds = append(foundDocIds, next)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedDocIds, foundDocIds) {
|
||||
t.Errorf("expected: %v, got %v", expectedDocIds, foundDocIds)
|
||||
}
|
||||
|
||||
err = dr.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = r.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package firestorm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
type firestormTermFieldReader struct {
|
||||
r *firestormReader
|
||||
field uint16
|
||||
term []byte
|
||||
prefix []byte
|
||||
count uint64
|
||||
i store.KVIterator
|
||||
}
|
||||
|
||||
func newFirestormTermFieldReader(r *firestormReader, field uint16, term []byte) (index.TermFieldReader, error) {
|
||||
dictionaryKey := DictionaryRowKey(field, term)
|
||||
dictionaryValue, err := r.r.Get(dictionaryKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
prefix := TermFreqIteratorStart(field, term)
|
||||
logger.Printf("starting term freq iterator at: '%s' - % x", prefix, prefix)
|
||||
i := r.r.PrefixIterator(prefix)
|
||||
rv := firestormTermFieldReader{
|
||||
r: r,
|
||||
field: field,
|
||||
term: term,
|
||||
prefix: prefix,
|
||||
i: i,
|
||||
}
|
||||
|
||||
// NOTE: in firestorm the dictionary row is advisory in nature
|
||||
// it *may* tell us the correct out
|
||||
// if this record does not exist, it DOES not mean that there is no
|
||||
// usage, we must scan the term frequencies to be sure
|
||||
if dictionaryValue != nil {
|
||||
dictionaryRow, err := NewDictionaryRowKV(dictionaryKey, dictionaryValue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.count = dictionaryRow.Count()
|
||||
}
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (r *firestormTermFieldReader) Next() (*index.TermFieldDoc, error) {
|
||||
if r.i != nil {
|
||||
key, val, valid := r.i.Current()
|
||||
for valid {
|
||||
logger.Printf("see key: '%s' - % x", key, key)
|
||||
tfrsByDocNum := make(map[uint64]*TermFreqRow)
|
||||
tfr, err := NewTermFreqRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tfrsByDocNum[tfr.DocNum()] = tfr
|
||||
|
||||
// now we have a possible row, but there may be more rows for the same docid
|
||||
// find these now
|
||||
err = r.findNextTfrsWithSameDocId(tfrsByDocNum, tfr.DocID())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
docNumList := make(DocNumberList, 0, len(tfrsByDocNum))
|
||||
for dn := range tfrsByDocNum {
|
||||
docNumList = append(docNumList, dn)
|
||||
}
|
||||
|
||||
logger.Printf("docNumList: %v", docNumList)
|
||||
|
||||
highestValidDocNum := r.r.s.Which(tfr.docID, docNumList)
|
||||
if highestValidDocNum == 0 {
|
||||
// no valid doc number
|
||||
key, val, valid = r.i.Current()
|
||||
continue
|
||||
}
|
||||
logger.Printf("highest valid: %d", highestValidDocNum)
|
||||
|
||||
tfr = tfrsByDocNum[highestValidDocNum]
|
||||
|
||||
return &index.TermFieldDoc{
|
||||
ID: string(tfr.DocID()),
|
||||
Freq: tfr.Freq(),
|
||||
Norm: float64(tfr.Norm()),
|
||||
Vectors: r.termFieldVectorsFromTermVectors(tfr.Vectors()),
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *firestormTermFieldReader) findNextTfrsWithSameDocId(tfrsByDocNum map[uint64]*TermFreqRow, docID []byte) error {
|
||||
tfrDocIdPrefix := TermFreqPrefixFieldTermDocId(r.field, r.term, docID)
|
||||
r.i.Next()
|
||||
key, val, valid := r.i.Current()
|
||||
for valid && bytes.HasPrefix(key, tfrDocIdPrefix) {
|
||||
tfr, err := NewTermFreqRowKV(key, val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tfrsByDocNum[tfr.DocNum()] = tfr
|
||||
r.i.Next()
|
||||
key, val, valid = r.i.Current()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *firestormTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) {
|
||||
if r.i != nil {
|
||||
tfrDocIdPrefix := TermFreqPrefixFieldTermDocId(r.field, r.term, []byte(docID))
|
||||
r.i.Seek(tfrDocIdPrefix)
|
||||
return r.Next()
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *firestormTermFieldReader) Count() uint64 {
|
||||
return r.count
|
||||
}
|
||||
|
||||
func (r *firestormTermFieldReader) Close() error {
|
||||
if r.i != nil {
|
||||
return r.i.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *firestormTermFieldReader) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
|
||||
rv := make([]*index.TermFieldVector, len(in))
|
||||
|
||||
for i, tv := range in {
|
||||
fieldName := r.r.f.fieldCache.FieldIndexed(uint16(tv.GetField()))
|
||||
tfv := index.TermFieldVector{
|
||||
Field: fieldName,
|
||||
ArrayPositions: tv.GetArrayPositions(),
|
||||
Pos: tv.GetPos(),
|
||||
Start: tv.GetStart(),
|
||||
End: tv.GetEnd(),
|
||||
}
|
||||
rv[i] = &tfv
|
||||
}
|
||||
return rv
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue