From efe573bc1034c52a0aaa9c986e6d5b6a952ac84b Mon Sep 17 00:00:00 2001 From: a-little-srdjan Date: Thu, 9 Jun 2016 15:13:30 -0400 Subject: [PATCH] removing duplicate code by reusing util.go in analysis --- analysis/token_filters/camelcase_filter/parser.go | 15 +-------------- .../edge_ngram_filter/edge_ngram_filter.go | 14 ++------------ .../token_filters/ngram_filter/ngram_filter.go | 12 +----------- .../truncate_token_filter.go | 10 +--------- 4 files changed, 5 insertions(+), 46 deletions(-) diff --git a/analysis/token_filters/camelcase_filter/parser.go b/analysis/token_filters/camelcase_filter/parser.go index 71c85ec3..f431982d 100644 --- a/analysis/token_filters/camelcase_filter/parser.go +++ b/analysis/token_filters/camelcase_filter/parser.go @@ -1,28 +1,15 @@ package camelcase_filter import ( - "unicode/utf8" - "github.com/blevesearch/bleve/analysis" ) func buildTokenFromTerm(buffer []rune) *analysis.Token { return &analysis.Token{ - Term: buildTermFromRunes(buffer), + Term: analysis.BuildTermFromRunes(buffer), } } -// TODO: Lifted from ngram_filter. Expose as public and re-use? -func buildTermFromRunes(runes []rune) []byte { - rv := make([]byte, 0, len(runes)*4) - for _, r := range runes { - runeBytes := make([]byte, utf8.RuneLen(r)) - utf8.EncodeRune(runeBytes, r) - rv = append(rv, runeBytes...) - } - return rv -} - // Parser accepts a symbol and passes it to the current state (representing a class). // The state can accept it (and accumulate it). Otherwise, the parser creates a new state that // starts with the pushed symbol. diff --git a/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go b/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go index f370aa9d..0f97fbf1 100644 --- a/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go +++ b/analysis/token_filters/edge_ngram_filter/edge_ngram_filter.go @@ -51,7 +51,7 @@ func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStrea for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ { // build an ngram of this size starting at i if i-ngramSize > 0 { - ngramTerm := buildTermFromRunes(runes[i-ngramSize : i]) + ngramTerm := analysis.BuildTermFromRunes(runes[i-ngramSize : i]) token := analysis.Token{ Position: token.Position, Start: token.Start, @@ -68,7 +68,7 @@ func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStrea for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ { // build an ngram of this size starting at i if i+ngramSize <= runeCount { - ngramTerm := buildTermFromRunes(runes[i : i+ngramSize]) + ngramTerm := analysis.BuildTermFromRunes(runes[i : i+ngramSize]) token := analysis.Token{ Position: token.Position, Start: token.Start, @@ -85,16 +85,6 @@ func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStrea return rv } -func buildTermFromRunes(runes []rune) []byte { - rv := make([]byte, 0, len(runes)*4) - for _, r := range runes { - runeBytes := make([]byte, utf8.RuneLen(r)) - utf8.EncodeRune(runeBytes, r) - rv = append(rv, runeBytes...) - } - return rv -} - func EdgeNgramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { side := FRONT back, ok := config["back"].(bool) diff --git a/analysis/token_filters/ngram_filter/ngram_filter.go b/analysis/token_filters/ngram_filter/ngram_filter.go index 4165714a..79eb0ebf 100644 --- a/analysis/token_filters/ngram_filter/ngram_filter.go +++ b/analysis/token_filters/ngram_filter/ngram_filter.go @@ -43,7 +43,7 @@ func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream { for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ { // build an ngram of this size starting at i if i+ngramSize <= runeCount { - ngramTerm := buildTermFromRunes(runes[i : i+ngramSize]) + ngramTerm := analysis.BuildTermFromRunes(runes[i : i+ngramSize]) token := analysis.Token{ Position: token.Position, Start: token.Start, @@ -60,16 +60,6 @@ func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream { return rv } -func buildTermFromRunes(runes []rune) []byte { - rv := make([]byte, 0, len(runes)*4) - for _, r := range runes { - runeBytes := make([]byte, utf8.RuneLen(r)) - utf8.EncodeRune(runeBytes, r) - rv = append(rv, runeBytes...) - } - return rv -} - func NgramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { minVal, ok := config["min"] if !ok { diff --git a/analysis/token_filters/truncate_token_filter/truncate_token_filter.go b/analysis/token_filters/truncate_token_filter/truncate_token_filter.go index 2b16cb08..7eb21275 100644 --- a/analysis/token_filters/truncate_token_filter/truncate_token_filter.go +++ b/analysis/token_filters/truncate_token_filter/truncate_token_filter.go @@ -10,7 +10,6 @@ package truncate_token_filter import ( - "bytes" "fmt" "unicode/utf8" @@ -34,14 +33,7 @@ func (s *TruncateTokenFilter) Filter(input analysis.TokenStream) analysis.TokenS for _, token := range input { wordLen := utf8.RuneCount(token.Term) if wordLen > s.length { - runes := bytes.Runes(token.Term)[0:s.length] - newterm := make([]byte, 0, s.length*4) - for _, r := range runes { - runeBytes := make([]byte, utf8.RuneLen(r)) - utf8.EncodeRune(runeBytes, r) - newterm = append(newterm, runeBytes...) - } - token.Term = newterm + token.Term = analysis.TruncateRunes(token.Term, wordLen-s.length) } } return input