0
0
Fork 0

Merge pull request #423 from mschoch/stopfilterfaster

avoid allocation in stop token filter
This commit is contained in:
Marty Schoch 2016-09-11 13:59:31 -04:00 committed by GitHub
commit 56c7b9f831
2 changed files with 50 additions and 4 deletions

View File

@ -36,16 +36,16 @@ func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter {
}
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
j := 0
for _, token := range input {
_, isStopToken := f.stopTokens[string(token.Term)]
if !isStopToken {
rv = append(rv, token)
input[j] = token
j++
}
}
return rv
return input[:j]
}
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {

View File

@ -71,3 +71,49 @@ func TestStopWordsFilter(t *testing.T) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
}
}
func BenchmarkStopWordsFilter(b *testing.B) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("in"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("park"),
},
}
cache := registry.NewCache()
stopListConfig := map[string]interface{}{
"type": token_map.Name,
"tokens": []interface{}{"a", "in", "the"},
}
_, err := cache.DefineTokenMap("stop_test", stopListConfig)
if err != nil {
b.Fatal(err)
}
stopConfig := map[string]interface{}{
"type": "stop_tokens",
"stop_token_map": "stop_test",
}
stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
stopFilter.Filter(inputTokenStream)
}
}