diff --git a/analysis/language/en/possessive_filter_en.go b/analysis/language/en/possessive_filter_en.go index f322c04a..9bae66f8 100644 --- a/analysis/language/en/possessive_filter_en.go +++ b/analysis/language/en/possessive_filter_en.go @@ -10,7 +10,7 @@ package en import ( - "bytes" + "unicode/utf8" "github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/registry" @@ -40,15 +40,13 @@ func NewPossessiveFilter() *PossessiveFilter { func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream { for _, token := range input { - runes := bytes.Runes(token.Term) - if len(runes) >= 2 { - secondToLastRune := runes[len(runes)-2] - lastRune := runes[len(runes)-1] - if (secondToLastRune == rightSingleQuotationMark || - secondToLastRune == apostrophe || - secondToLastRune == fullWidthApostrophe) && - (lastRune == 's' || lastRune == 'S') { - token.Term = analysis.TruncateRunes(token.Term, 2) + lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term) + if lastRune == 's' || lastRune == 'S' { + nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize]) + if nextLastRune == rightSingleQuotationMark || + nextLastRune == apostrophe || + nextLastRune == fullWidthApostrophe { + token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize] } } } diff --git a/analysis/language/en/possessive_filter_en_test.go b/analysis/language/en/possessive_filter_en_test.go index e434bf5d..334d6326 100644 --- a/analysis/language/en/possessive_filter_en_test.go +++ b/analysis/language/en/possessive_filter_en_test.go @@ -45,6 +45,12 @@ func TestEnglishPossessiveFilter(t *testing.T) { &analysis.Token{ Term: []byte("m"), }, + &analysis.Token{ + Term: []byte("s"), + }, + &analysis.Token{ + Term: []byte("'s"), + }, }, output: analysis.TokenStream{ &analysis.Token{ @@ -68,6 +74,12 @@ func TestEnglishPossessiveFilter(t *testing.T) { &analysis.Token{ Term: []byte("m"), }, + &analysis.Token{ + Term: []byte("s"), + }, + &analysis.Token{ + Term: []byte(""), + }, }, }, } @@ -84,3 +96,42 @@ func TestEnglishPossessiveFilter(t *testing.T) { } } } + +func BenchmarkEnglishPossessiveFilter(b *testing.B) { + + input := analysis.TokenStream{ + &analysis.Token{ + Term: []byte("marty's"), + }, + &analysis.Token{ + Term: []byte("MARTY'S"), + }, + &analysis.Token{ + Term: []byte("marty’s"), + }, + &analysis.Token{ + Term: []byte("MARTY’S"), + }, + &analysis.Token{ + Term: []byte("marty's"), + }, + &analysis.Token{ + Term: []byte("MARTY'S"), + }, + &analysis.Token{ + Term: []byte("m"), + }, + } + + cache := registry.NewCache() + stemmerFilter, err := cache.TokenFilterNamed(PossessiveName) + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + stemmerFilter.Filter(input) + } + +}