diff --git a/analysis/language/ar/arabic_normalize.go b/analysis/language/ar/arabic_normalize.go index 70705258..abf86018 100644 --- a/analysis/language/ar/arabic_normalize.go +++ b/analysis/language/ar/arabic_normalize.go @@ -46,15 +46,11 @@ func NewArabicNormalizeFilter() *ArabicNormalizeFilter { } func (s *ArabicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { term := normalize(token.Term) token.Term = term - rv = append(rv, token) } - - return rv + return input } func normalize(input []byte) []byte { diff --git a/analysis/language/cjk/cjk_bigram.go b/analysis/language/cjk/cjk_bigram.go index 36cfc88d..0596447e 100644 --- a/analysis/language/cjk/cjk_bigram.go +++ b/analysis/language/cjk/cjk_bigram.go @@ -32,7 +32,7 @@ func (s *CJKBigramFilter) Filter(input analysis.TokenStream) analysis.TokenStrea r := ring.New(2) itemsInRing := 0 - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) for _, token := range input { if token.Type == analysis.Ideographic { diff --git a/analysis/language/ckb/sorani_normalize.go b/analysis/language/ckb/sorani_normalize.go index 9b414192..b264bb40 100644 --- a/analysis/language/ckb/sorani_normalize.go +++ b/analysis/language/ckb/sorani_normalize.go @@ -56,15 +56,11 @@ func NewSoraniNormalizeFilter() *SoraniNormalizeFilter { } func (s *SoraniNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { term := normalize(token.Term) token.Term = term - rv = append(rv, token) } - - return rv + return input } func normalize(input []byte) []byte { diff --git a/analysis/language/ckb/sorani_stemmer_filter.go b/analysis/language/ckb/sorani_stemmer_filter.go index aabce1c6..68367a55 100644 --- a/analysis/language/ckb/sorani_stemmer_filter.go +++ b/analysis/language/ckb/sorani_stemmer_filter.go @@ -27,18 +27,14 @@ func NewSoraniStemmerFilter() *SoraniStemmerFilter { } func (s *SoraniStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { // if not protected keyword, stem it if !token.KeyWord { stemmed := stem(token.Term) token.Term = stemmed } - rv = append(rv, token) } - - return rv + return input } func stem(input []byte) []byte { diff --git a/analysis/language/de/german_normalize.go b/analysis/language/de/german_normalize.go index cc73ea65..47cc97b4 100644 --- a/analysis/language/de/german_normalize.go +++ b/analysis/language/de/german_normalize.go @@ -32,15 +32,11 @@ func NewGermanNormalizeFilter() *GermanNormalizeFilter { } func (s *GermanNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { term := normalize(token.Term) token.Term = term - rv = append(rv, token) } - - return rv + return input } func normalize(input []byte) []byte { diff --git a/analysis/language/en/possessive_filter_en.go b/analysis/language/en/possessive_filter_en.go index 128fbac9..84a62c2f 100644 --- a/analysis/language/en/possessive_filter_en.go +++ b/analysis/language/en/possessive_filter_en.go @@ -32,7 +32,6 @@ func NewPossessiveFilter() *PossessiveFilter { } func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - for _, token := range input { runes := bytes.Runes(token.Term) if len(runes) >= 2 { @@ -46,7 +45,6 @@ func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStre } } } - return input } diff --git a/analysis/language/fa/persian_normalize.go b/analysis/language/fa/persian_normalize.go index 494b5597..ab67af3a 100644 --- a/analysis/language/fa/persian_normalize.go +++ b/analysis/language/fa/persian_normalize.go @@ -38,15 +38,11 @@ func NewPersianNormalizeFilter() *PersianNormalizeFilter { } func (s *PersianNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { term := normalize(token.Term) token.Term = term - rv = append(rv, token) } - - return rv + return input } func normalize(input []byte) []byte { diff --git a/analysis/language/hi/hindi_normalize.go b/analysis/language/hi/hindi_normalize.go index 1117c131..f9b607c6 100644 --- a/analysis/language/hi/hindi_normalize.go +++ b/analysis/language/hi/hindi_normalize.go @@ -26,15 +26,11 @@ func NewHindiNormalizeFilter() *HindiNormalizeFilter { } func (s *HindiNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { term := normalize(token.Term) token.Term = term - rv = append(rv, token) } - - return rv + return input } func normalize(input []byte) []byte { diff --git a/analysis/language/hi/hindi_stemmer_filter.go b/analysis/language/hi/hindi_stemmer_filter.go index 0ebd3bde..b4e6ddd6 100644 --- a/analysis/language/hi/hindi_stemmer_filter.go +++ b/analysis/language/hi/hindi_stemmer_filter.go @@ -27,18 +27,14 @@ func NewHindiStemmerFilter() *HindiStemmerFilter { } func (s *HindiStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { // if not protected keyword, stem it if !token.KeyWord { stemmed := stem(token.Term) token.Term = stemmed } - rv = append(rv, token) } - - return rv + return input } func stem(input []byte) []byte { diff --git a/analysis/language/ja/ja_morph_kagome.go b/analysis/language/ja/ja_morph_kagome.go index 9989526e..415183d2 100644 --- a/analysis/language/ja/ja_morph_kagome.go +++ b/analysis/language/ja/ja_morph_kagome.go @@ -46,7 +46,7 @@ func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream { prevstart int ) - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) if len(input) < 1 { return rv } diff --git a/analysis/token_filters/apostrophe_filter/apostrophe_filter.go b/analysis/token_filters/apostrophe_filter/apostrophe_filter.go index 7f889ec5..853db823 100644 --- a/analysis/token_filters/apostrophe_filter/apostrophe_filter.go +++ b/analysis/token_filters/apostrophe_filter/apostrophe_filter.go @@ -29,18 +29,15 @@ func NewApostropheFilter() *ApostropheFilter { } func (s *ApostropheFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { firstApostrophe := bytes.IndexAny(token.Term, Apostrophes) if firstApostrophe >= 0 { // found an apostrophe token.Term = token.Term[0:firstApostrophe] } - rv = append(rv, token) } - return rv + return input } func ApostropheFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { diff --git a/analysis/token_filters/cld2/cld2_filter.go b/analysis/token_filters/cld2/cld2_filter.go index 2458564d..4349ef8a 100644 --- a/analysis/token_filters/cld2/cld2_filter.go +++ b/analysis/token_filters/cld2/cld2_filter.go @@ -33,7 +33,7 @@ func NewCld2Filter() *Cld2Filter { } func (f *Cld2Filter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) offset := 0 for _, token := range input { diff --git a/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go b/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go index 7372e4e8..f370aa9d 100644 --- a/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go +++ b/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go @@ -40,7 +40,7 @@ func NewEdgeNgramFilter(side Side, minLength, maxLength int) *EdgeNgramFilter { } func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) for _, token := range input { runeCount := utf8.RuneCount(token.Term) diff --git a/analysis/token_filters/elision_filter/elision_filter.go b/analysis/token_filters/elision_filter/elision_filter.go index e1baced8..61aa6137 100644 --- a/analysis/token_filters/elision_filter/elision_filter.go +++ b/analysis/token_filters/elision_filter/elision_filter.go @@ -35,8 +35,6 @@ func NewElisionFilter(articles analysis.TokenMap) *ElisionFilter { } func (s *ElisionFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { firstApostrophe := bytes.IndexAny(token.Term, Apostrophes) if firstApostrophe >= 0 { @@ -48,10 +46,8 @@ func (s *ElisionFilter) Filter(input analysis.TokenStream) analysis.TokenStream token.Term = token.Term[firstApostrophe+1:] } } - rv = append(rv, token) } - - return rv + return input } func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { diff --git a/analysis/token_filters/keyword_marker_filter/keyword_marker_filter.go b/analysis/token_filters/keyword_marker_filter/keyword_marker_filter.go index e950331f..062d2172 100644 --- a/analysis/token_filters/keyword_marker_filter/keyword_marker_filter.go +++ b/analysis/token_filters/keyword_marker_filter/keyword_marker_filter.go @@ -29,7 +29,6 @@ func NewKeyWordMarkerFilter(keyWords analysis.TokenMap) *KeyWordMarkerFilter { } func (f *KeyWordMarkerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - for _, token := range input { word := string(token.Term) _, isKeyWord := f.keyWords[word] @@ -37,7 +36,6 @@ func (f *KeyWordMarkerFilter) Filter(input analysis.TokenStream) analysis.TokenS token.KeyWord = true } } - return input } diff --git a/analysis/token_filters/length_filter/length_filter.go b/analysis/token_filters/length_filter/length_filter.go index f42c8c7d..118c583c 100644 --- a/analysis/token_filters/length_filter/length_filter.go +++ b/analysis/token_filters/length_filter/length_filter.go @@ -32,7 +32,7 @@ func NewLengthFilter(min, max int) *LengthFilter { } func (f *LengthFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) for _, token := range input { wordLen := utf8.RuneCount(token.Term) diff --git a/analysis/token_filters/lower_case_filter/lower_case_filter.go b/analysis/token_filters/lower_case_filter/lower_case_filter.go index 7668bf4b..44884924 100644 --- a/analysis/token_filters/lower_case_filter/lower_case_filter.go +++ b/analysis/token_filters/lower_case_filter/lower_case_filter.go @@ -26,16 +26,12 @@ func NewLowerCaseFilter() *LowerCaseFilter { } func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { word := string(token.Term) wordLowerCase := strings.ToLower(word) token.Term = []byte(wordLowerCase) - rv = append(rv, token) } - - return rv + return input } func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { diff --git a/analysis/token_filters/ngram_filter/ngram_filter.go b/analysis/token_filters/ngram_filter/ngram_filter.go index 903a81df..d1433e26 100644 --- a/analysis/token_filters/ngram_filter/ngram_filter.go +++ b/analysis/token_filters/ngram_filter/ngram_filter.go @@ -33,7 +33,7 @@ func NewNgramFilter(minLength, maxLength int) *NgramFilter { } func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) for _, token := range input { runeCount := utf8.RuneCount(token.Term) diff --git a/analysis/token_filters/shingle/shingle.go b/analysis/token_filters/shingle/shingle.go index 8b7f122f..90965092 100644 --- a/analysis/token_filters/shingle/shingle.go +++ b/analysis/token_filters/shingle/shingle.go @@ -32,7 +32,7 @@ func NewShingleFilter(min, max int, outputOriginal bool, sep, fill string) *Shin } func (s *ShingleFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) currentPosition := 0 for _, token := range input { diff --git a/analysis/token_filters/shingle/shingle_test.go b/analysis/token_filters/shingle/shingle_test.go index 42f114a4..d81ac57e 100644 --- a/analysis/token_filters/shingle/shingle_test.go +++ b/analysis/token_filters/shingle/shingle_test.go @@ -16,7 +16,7 @@ import ( "github.com/blevesearch/bleve/analysis" ) -func TestNgramFilter(t *testing.T) { +func TestShingleFilter(t *testing.T) { tests := []struct { min int diff --git a/analysis/token_filters/stemmer_filter/stemmer_filter.go b/analysis/token_filters/stemmer_filter/stemmer_filter.go index 6c476840..3640681b 100644 --- a/analysis/token_filters/stemmer_filter/stemmer_filter.go +++ b/analysis/token_filters/stemmer_filter/stemmer_filter.go @@ -22,18 +22,22 @@ import ( const Name = "stem" type StemmerFilter struct { - lang string - stemmer *snowball.Stemmer + lang string + stemmerPool chan *snowball.Stemmer } func NewStemmerFilter(lang string) (*StemmerFilter, error) { - stemmer, err := snowball.New(lang) - if err != nil { - return nil, err + stemmerPool := make(chan *snowball.Stemmer, 4) + for i := 0; i < 4; i++ { + stemmer, err := snowball.New(lang) + if err != nil { + return nil, err + } + stemmerPool <- stemmer } return &StemmerFilter{ - lang: lang, - stemmer: stemmer, + lang: lang, + stemmerPool: stemmerPool, }, nil } @@ -50,18 +54,16 @@ func (s *StemmerFilter) List() []string { } func (s *StemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { // if not protected keyword, stem it if !token.KeyWord { - stemmed := s.stemmer.Stem(string(token.Term)) + stemmer := <-s.stemmerPool + stemmed := stemmer.Stem(string(token.Term)) + s.stemmerPool <- stemmer token.Term = []byte(stemmed) } - rv = append(rv, token) } - - return rv + return input } func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { diff --git a/analysis/token_filters/stop_tokens_filter/stop_tokens_filter.go b/analysis/token_filters/stop_tokens_filter/stop_tokens_filter.go index 8f429ff3..712302b8 100644 --- a/analysis/token_filters/stop_tokens_filter/stop_tokens_filter.go +++ b/analysis/token_filters/stop_tokens_filter/stop_tokens_filter.go @@ -29,7 +29,7 @@ func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter { } func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) + rv := make(analysis.TokenStream, 0, len(input)) for _, token := range input { tokenTerm := string(token.Term) diff --git a/analysis/token_filters/truncate_token_filter/truncate_token_filter.go b/analysis/token_filters/truncate_token_filter/truncate_token_filter.go index af648181..2b16cb08 100644 --- a/analysis/token_filters/truncate_token_filter/truncate_token_filter.go +++ b/analysis/token_filters/truncate_token_filter/truncate_token_filter.go @@ -31,8 +31,6 @@ func NewTruncateTokenFilter(length int) *TruncateTokenFilter { } func (s *TruncateTokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { wordLen := utf8.RuneCount(token.Term) if wordLen > s.length { @@ -45,10 +43,8 @@ func (s *TruncateTokenFilter) Filter(input analysis.TokenStream) analysis.TokenS } token.Term = newterm } - rv = append(rv, token) } - - return rv + return input } func TruncateTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { diff --git a/analysis/token_filters/unicode_normalize/unicode_normalize.go b/analysis/token_filters/unicode_normalize/unicode_normalize.go index 798f3da3..12956bc0 100644 --- a/analysis/token_filters/unicode_normalize/unicode_normalize.go +++ b/analysis/token_filters/unicode_normalize/unicode_normalize.go @@ -54,14 +54,10 @@ func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter { } func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { - rv := make(analysis.TokenStream, 0) - for _, token := range input { token.Term = s.form.Bytes(token.Term) - rv = append(rv, token) } - - return rv + return input } func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {