From 1f4ef3da8bcc1498e4db1117a6bce2b7203c7177 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Tue, 21 Jul 2015 10:43:53 -0400 Subject: [PATCH] move elision filter after lowercase filter this affects all languages using the elision filter languages fr and it are updated now languages ca and ga are still missing other components and do not yet have an analyzer, but they should follow this lead once they are ready fixes #218 --- analysis/language/fr/analyzer_fr.go | 2 +- analysis/language/it/analyzer_it.go | 2 +- analysis/language/it/analyzer_it_test.go | 9 +++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/analysis/language/fr/analyzer_fr.go b/analysis/language/fr/analyzer_fr.go index 6c6f60f3..6d914eba 100644 --- a/analysis/language/fr/analyzer_fr.go +++ b/analysis/language/fr/analyzer_fr.go @@ -43,8 +43,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) ( rv := analysis.Analyzer{ Tokenizer: tokenizer, TokenFilters: []analysis.TokenFilter{ - elisionFilter, toLowerFilter, + elisionFilter, stopFrFilter, stemmerFrFilter, }, diff --git a/analysis/language/it/analyzer_it.go b/analysis/language/it/analyzer_it.go index 9cb4021d..ef1e412a 100644 --- a/analysis/language/it/analyzer_it.go +++ b/analysis/language/it/analyzer_it.go @@ -43,8 +43,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) ( rv := analysis.Analyzer{ Tokenizer: tokenizer, TokenFilters: []analysis.TokenFilter{ - elisionFilter, toLowerFilter, + elisionFilter, stopItFilter, stemmerItFilter, }, diff --git a/analysis/language/it/analyzer_it_test.go b/analysis/language/it/analyzer_it_test.go index 03cc8112..4419d207 100644 --- a/analysis/language/it/analyzer_it_test.go +++ b/analysis/language/it/analyzer_it_test.go @@ -61,6 +61,15 @@ func TestItalianAnalyzer(t *testing.T) { }, }, }, + // test for bug #218 + { + input: []byte("Nell'anfora"), + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("anfor"), + }, + }, + }, } cache := registry.NewCache()