major refactor of analysis files, now wired up to registry
ultimately this is make it more convenient for us to wire up different elements of the analysis pipeline, without having to preload everything into memory before we need it separately the index layer now has a mechanism for storing internal key/value pairs. this is expected to be used to store the mapping, and possibly other pieces of data by the top layer, but not exposed to the user at the top.
This commit is contained in:
parent
3481ec9cef
commit
c526a38369
|
@ -0,0 +1,44 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package detect_lang_analyzer
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "detect_lang"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed("single")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
detectLangFilter, err := cache.TokenFilterNamed("detect_lang")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
detectLangFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package keyword_analyzer
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "keyword"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package simple_analyzer
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/whitespace_tokenizer"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "simple"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(whitespace_tokenizer.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package standard_analyzer
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/language/en"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/whitespace_tokenizer"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "standard"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(whitespace_tokenizer.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package html_char_filter
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "html"
|
||||
|
||||
var htmlCharFilterRegexp = regexp.MustCompile(`</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
|
||||
|
||||
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
||||
replaceBytes := []byte(" ")
|
||||
return regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, replaceBytes), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterCharFilter(Name, CharFilterConstructor)
|
||||
}
|
|
@ -10,9 +10,15 @@ package regexp_char_filter
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "regexp"
|
||||
|
||||
type RegexpCharFilter struct {
|
||||
r *regexp.Regexp
|
||||
replacement []byte
|
||||
|
@ -28,3 +34,24 @@ func NewRegexpCharFilter(r *regexp.Regexp, replacement []byte) *RegexpCharFilter
|
|||
func (s *RegexpCharFilter) Filter(input []byte) []byte {
|
||||
return s.r.ReplaceAllFunc(input, func(in []byte) []byte { return bytes.Repeat(s.replacement, len(in)) })
|
||||
}
|
||||
|
||||
func RegexpCharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
||||
regexpStr, ok := config["regexp"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify regexp")
|
||||
}
|
||||
r, err := regexp.Compile(regexpStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to build regexp char filter: %v", err)
|
||||
}
|
||||
replaceBytes := []byte(" ")
|
||||
replaceStr, ok := config["replace"].(string)
|
||||
if ok {
|
||||
replaceBytes = []byte(replaceStr)
|
||||
}
|
||||
return NewRegexpCharFilter(r, replaceBytes), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterCharFilter(Name, RegexpCharFilterConstructor)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package zero_width_non_joiner
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "zero_width_spaces"
|
||||
|
||||
var zeroWidthNonJoinerRegexp = regexp.MustCompile(`\x{200C}`)
|
||||
|
||||
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
||||
replaceBytes := []byte(" ")
|
||||
return regexp_char_filter.NewRegexpCharFilter(zeroWidthNonJoinerRegexp, replaceBytes), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterCharFilter(Name, CharFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package html_char_filter
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/datetime_parsers/flexible_go"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "dateTimeOptional"
|
||||
|
||||
const rfc3339NoTimezone = "2006-01-02T15:04:05"
|
||||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
|
||||
const rfc3339NoTime = "2006-01-02"
|
||||
|
||||
var layouts = []string{
|
||||
time.RFC3339Nano,
|
||||
time.RFC3339,
|
||||
rfc3339NoTimezone,
|
||||
rfc3339NoTimezoneNoT,
|
||||
rfc3339NoTime,
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return flexible_go.NewFlexibleGoDateTimeParser(layouts), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
}
|
|
@ -9,11 +9,15 @@
|
|||
package flexible_go
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "flexiblego"
|
||||
|
||||
type FlexibleGoDateTimeParser struct {
|
||||
layouts []string
|
||||
}
|
||||
|
@ -33,3 +37,22 @@ func (p *FlexibleGoDateTimeParser) ParseDateTime(input string) (time.Time, error
|
|||
}
|
||||
return time.Time{}, analysis.INVALID_DATETIME
|
||||
}
|
||||
|
||||
func FlexibleGoDateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
layouts, ok := config["layouts"].([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify layouts")
|
||||
}
|
||||
layoutStrs := make([]string, 0)
|
||||
for _, layout := range layouts {
|
||||
layoutStr, ok := layout.(string)
|
||||
if ok {
|
||||
layoutStrs = append(layoutStrs, layoutStr)
|
||||
}
|
||||
}
|
||||
return NewFlexibleGoDateTimeParser(layoutStrs), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, FlexibleGoDateTimeParserConstructor)
|
||||
}
|
||||
|
|
|
@ -162,6 +162,5 @@ func TestTokenFrequenciesMergeAllLeftEmpty(t *testing.T) {
|
|||
result := tf1.MergeAll("tf2", tf2)
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
||||
//t.Logf("%#v", tf1[0])
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,14 +6,17 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package arabic_normalize
|
||||
package ar
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_ar"
|
||||
|
||||
const (
|
||||
ALEF = '\u0627'
|
||||
ALEF_MADDA = '\u0622'
|
||||
|
@ -70,3 +73,11 @@ func normalize(input []byte) []byte {
|
|||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewArabicNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package arabic_normalize
|
||||
package ar
|
||||
|
||||
import (
|
||||
"reflect"
|
|
@ -6,32 +6,22 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package stop_words_filter
|
||||
package ar
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
type StopWordsFilter struct {
|
||||
stopWords analysis.WordMap
|
||||
}
|
||||
|
||||
func NewStopWordsFilter(stopWords analysis.WordMap) *StopWordsFilter {
|
||||
return &StopWordsFilter{
|
||||
stopWords: stopWords,
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func (f *StopWordsFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
rv := make(analysis.TokenStream, 0)
|
||||
|
||||
for _, token := range input {
|
||||
word := string(token.Term)
|
||||
_, isStopWord := f.stopWords[word]
|
||||
if !isStopWord {
|
||||
rv = append(rv, token)
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package ar
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_ar"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
||||
|
@ -130,3 +137,13 @@ var ArabicStopWords = []byte(`# This file was created by Jacques Savoy and is di
|
|||
لدى
|
||||
جميع
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(ArabicStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bg
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package bg
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_bg"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -198,3 +205,13 @@ var BulgarianStopWords = []byte(`# This file was created by Jacques Savoy and is
|
|||
щом
|
||||
я
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(BulgarianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
package ca
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ArticlesName = "articles_ca"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
||||
|
||||
var CatalanArticles = []byte(`
|
||||
d
|
||||
l
|
||||
m
|
||||
n
|
||||
s
|
||||
t
|
||||
`)
|
||||
|
||||
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(CatalanArticles)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ca
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/elision_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ElisionName = "elision_ca"
|
||||
|
||||
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building elision filter: %v", err)
|
||||
}
|
||||
return elision_filter.NewElisionFilter(articlesTokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ca
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func TestFrenchElision(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("l'Institut"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("d'Estudis"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("Institut"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("Estudis"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := elisionFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ca
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package ca
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_ca"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -225,3 +232,13 @@ vostra
|
|||
vostre
|
||||
vostres
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(CatalanStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const AnalyzerName = "ckb"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normCkbFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopCkbFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerCkbFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
normCkbFilter,
|
||||
toLowerFilter,
|
||||
stopCkbFilter,
|
||||
stemmerCkbFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -6,15 +6,18 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package sorani_normalize
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_ckb"
|
||||
|
||||
const (
|
||||
YEH = '\u064A'
|
||||
DOTLESS_YEH = '\u0649'
|
||||
|
@ -103,3 +106,11 @@ func normalize(input []byte) []byte {
|
|||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewSoraniNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package sorani_normalize
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"reflect"
|
|
@ -6,15 +6,18 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package sorani_stemmer_filter
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_ckb"
|
||||
|
||||
type SoraniStemmerFilter struct {
|
||||
}
|
||||
|
||||
|
@ -133,3 +136,11 @@ func buildTermFromRunes(runes []rune) []byte {
|
|||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewSoraniStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -6,14 +6,13 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package sorani_stemmer_filter
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/sorani_normalize"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
|
||||
)
|
||||
|
||||
|
@ -24,7 +23,7 @@ func TestSoraniStemmerFilter(t *testing.T) {
|
|||
analyzer := analysis.Analyzer{
|
||||
Tokenizer: single_token.NewSingleTokenTokenizer(),
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
sorani_normalize.NewSoraniNormalizeFilter(),
|
||||
NewSoraniNormalizeFilter(),
|
||||
NewSoraniStemmerFilter(),
|
||||
},
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_ckb"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -141,3 +148,13 @@ var SoraniStopWords = []byte(`# set of kurdish stopwords
|
|||
# like
|
||||
وەک
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(SoraniStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package cs
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package cs
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_cs"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -177,3 +184,13 @@ jež
|
|||
jakož
|
||||
načež
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(CzechStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package da
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const AnalyzerName = "da"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopDaFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerDaFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopDaFilter,
|
||||
stemmerDaFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -6,18 +6,20 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package rune_tokenizer
|
||||
package da
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
type WhitespaceClassifier struct{}
|
||||
const StemmerName = "stemmer_da"
|
||||
|
||||
func NewWhitespaceClassifier() *WhitespaceClassifier {
|
||||
return &WhitespaceClassifier{}
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("da")
|
||||
}
|
||||
|
||||
func (c *WhitespaceClassifier) InToken(r rune) bool {
|
||||
return !unicode.IsSpace(r)
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package da
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package da
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_da"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -115,3 +122,13 @@ thi | for (conj)
|
|||
jer | you
|
||||
sådan | such, like this/like that
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(DanishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package de
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const AnalyzerName = "de"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopDeFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normalizeDeFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerDeFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopDeFilter,
|
||||
normalizeDeFilter,
|
||||
stemmerDeFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -6,14 +6,17 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package german_normalize
|
||||
package de
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_de"
|
||||
|
||||
const (
|
||||
N = 0 /* ordinary state */
|
||||
V = 1 /* stops 'u' from entering umlaut state */
|
||||
|
@ -84,3 +87,11 @@ func normalize(input []byte) []byte {
|
|||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewGermanNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package german_normalize
|
||||
package de
|
||||
|
||||
import (
|
||||
"reflect"
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package de
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_de"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("de")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package de
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package de
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_de"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -299,3 +306,13 @@ zwar | indeed
|
|||
zwischen | between
|
||||
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(GermanStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package el
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package el
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_el"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -83,3 +90,13 @@ var GreekStopWords = []byte(`# Lucene Greek Stopwords list
|
|||
οσο
|
||||
οτι
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(GreekStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "en"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerEnFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
stemmerEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_en"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("en")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package en
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func TestEnglishStemmer(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("walking"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("talked"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("business"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("protected"),
|
||||
KeyWord: true,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("walk"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("talk"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("busi"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("protected"),
|
||||
KeyWord: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
stemmerFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := stemmerFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_en"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -324,3 +331,13 @@ very
|
|||
| high
|
||||
| long
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(EnglishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package es
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "es"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEsFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerEsFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEsFilter,
|
||||
stemmerEsFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package es
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_es"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("es")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package es
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package es
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_es"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -361,3 +368,13 @@ tenidas
|
|||
tened
|
||||
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(SpanishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package eu
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package eu
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_eu"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -104,3 +111,13 @@ zuek
|
|||
zuen
|
||||
zuten
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(BasqueStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fa
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/char_filters/zero_width_non_joiner"
|
||||
"github.com/couchbaselabs/bleve/analysis/language/ar"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "fa"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
zFilter, err := cache.CharFilterNamed(zero_width_non_joiner.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normArFilter, err := cache.TokenFilterNamed(ar.NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normFaFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopFaFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
CharFilters: []analysis.CharFilter{
|
||||
zFilter,
|
||||
},
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
normArFilter,
|
||||
normFaFilter,
|
||||
stopFaFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -6,14 +6,17 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package persian_normalize
|
||||
package fa
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_fa"
|
||||
|
||||
const (
|
||||
YEH = '\u064A'
|
||||
FARSI_YEH = '\u06CC'
|
||||
|
@ -62,3 +65,11 @@ func normalize(input []byte) []byte {
|
|||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewPersianNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package persian_normalize
|
||||
package fa
|
||||
|
||||
import (
|
||||
"reflect"
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fa
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package fa
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_fa"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -318,3 +325,13 @@ var PersianStopWords = []byte(`# This file was created by Jacques Savoy and is d
|
|||
عنوان
|
||||
بود
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(PersianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fi
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "fi"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopFiFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerFiFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopFiFilter,
|
||||
stemmerFiFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fi
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_fi"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("fi")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fi
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package fi
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_fi"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -102,3 +109,13 @@ nyt | now
|
|||
itse | self
|
||||
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(FinnishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fr
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "fr"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopFrFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerFrFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
elisionFilter,
|
||||
toLowerFilter,
|
||||
stopFrFilter,
|
||||
stemmerFrFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package fr
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ArticlesName = "articles_fr"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
||||
|
||||
var FrenchArticles = []byte(`
|
||||
l
|
||||
m
|
||||
t
|
||||
qu
|
||||
n
|
||||
s
|
||||
j
|
||||
d
|
||||
c
|
||||
jusqu
|
||||
quoiqu
|
||||
lorsqu
|
||||
puisqu
|
||||
`)
|
||||
|
||||
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(FrenchArticles)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/elision_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ElisionName = "elision_fr"
|
||||
|
||||
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building elision filter: %v", err)
|
||||
}
|
||||
return elision_filter.NewElisionFilter(articlesTokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
|
||||
}
|
|
@ -6,50 +6,44 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package rune_tokenizer
|
||||
package fr
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func TestWhitespaceTokenizer(t *testing.T) {
|
||||
|
||||
classifier := NewWhitespaceClassifier()
|
||||
|
||||
func TestFrenchElision(t *testing.T) {
|
||||
tests := []struct {
|
||||
input []byte
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
[]byte("Hello World"),
|
||||
analysis.TokenStream{
|
||||
{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
Term: []byte("Hello"),
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("l'avion"),
|
||||
},
|
||||
{
|
||||
Start: 6,
|
||||
End: 11,
|
||||
Term: []byte("World"),
|
||||
Position: 2,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("avion"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
tokenizer := NewRuneTokenizer(classifier)
|
||||
actual := tokenizer.Tokenize(test.input)
|
||||
|
||||
actual := elisionFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
|
||||
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fr
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_fr"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("fr")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package fr
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package fr
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_fr"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -191,3 +198,13 @@ sans | without
|
|||
soi | oneself
|
||||
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(FrenchStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package ga
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ArticlesName = "articles_ga"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
||||
|
||||
var IrishArticles = []byte(`
|
||||
d
|
||||
m
|
||||
b
|
||||
`)
|
||||
|
||||
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(IrishArticles)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ga
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/elision_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ElisionName = "elision_ga"
|
||||
|
||||
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building elision filter: %v", err)
|
||||
}
|
||||
return elision_filter.NewElisionFilter(articlesTokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ga
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func TestFrenchElision(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("b'fhearr"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("fhearr"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := elisionFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package ga
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package ga
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_ga"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -115,3 +122,13 @@ um
|
|||
óna
|
||||
ónár
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(IrishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package gl
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package gl
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_gl"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -166,3 +173,13 @@ voso
|
|||
vosos
|
||||
vós
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(GalicianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -6,14 +6,17 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hindi_normalize
|
||||
package hi
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_hi"
|
||||
|
||||
type HindiNormalizeFilter struct {
|
||||
}
|
||||
|
||||
|
@ -123,3 +126,11 @@ func normalize(input []byte) []byte {
|
|||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewHindiNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hindi_normalize
|
||||
package hi
|
||||
|
||||
import (
|
||||
"reflect"
|
|
@ -6,15 +6,18 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hindi_stemmer_filter
|
||||
package hi
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_hi"
|
||||
|
||||
type HindiStemmerFilter struct {
|
||||
}
|
||||
|
||||
|
@ -134,3 +137,11 @@ func stem(input []byte) []byte {
|
|||
|
||||
return input
|
||||
}
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewHindiStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hindi_stemmer_filter
|
||||
package hi
|
||||
|
||||
import (
|
||||
"reflect"
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hi
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package hi
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_hi"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -240,3 +247,13 @@ var HindiStopWords = []byte(`# Also see http://www.opensource.org/licenses/bsd-l
|
|||
जेसा
|
||||
नहिं
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(HindiStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hu
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "hu"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopHuFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerHuFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopHuFilter,
|
||||
stemmerHuFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hu
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_hu"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("hu")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hu
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package hu
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_hu"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -216,3 +223,13 @@ vele
|
|||
viszont
|
||||
volna
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(HungarianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package hy
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package hy
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_hy"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -51,3 +58,13 @@ var ArmenianStopWords = []byte(`# example set of Armenian stopwords.
|
|||
վրա
|
||||
և
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(ArmenianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package id
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package id
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_id"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
|
@ -364,3 +371,13 @@ yaitu
|
|||
yakni
|
||||
yang
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(IndonesianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package it
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "it"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopItFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerItFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
elisionFilter,
|
||||
toLowerFilter,
|
||||
stopItFilter,
|
||||
stemmerItFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package it
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ArticlesName = "articles_it"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
||||
|
||||
var ItalianArticles = []byte(`
|
||||
c
|
||||
l
|
||||
all
|
||||
dall
|
||||
dell
|
||||
nell
|
||||
sull
|
||||
coll
|
||||
pell
|
||||
gl
|
||||
agl
|
||||
dagl
|
||||
degl
|
||||
negl
|
||||
sugl
|
||||
un
|
||||
m
|
||||
t
|
||||
s
|
||||
v
|
||||
d
|
||||
`)
|
||||
|
||||
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(ItalianArticles)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package it
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/elision_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const ElisionName = "elision_it"
|
||||
|
||||
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building elision filter: %v", err)
|
||||
}
|
||||
return elision_filter.NewElisionFilter(articlesTokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package it
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func TestFrenchElision(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("dell'Italia"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("Italia"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := elisionFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package it
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_it"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("it")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package it
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
|
@ -1,4 +1,11 @@
|
|||
package stop_words_filter
|
||||
package it
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_it"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
|
@ -308,3 +315,13 @@ stessimo
|
|||
stessero
|
||||
stando
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(ItalianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package nl
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
|
||||
)
|
||||
|
||||
const AnalyzerName = "nl"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopNlFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerNlFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: unicodeTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopNlFilter,
|
||||
stemmerNlFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package nl
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_nl"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("nl")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package nl
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
"github.com/couchbaselabs/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/couchbaselabs/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue