avoid allocation in stop token filter
the token stream resulting from the removal of stop words must be shorter or the same length as the original, so we just reuse it and truncate it at the end.
This commit is contained in:
parent
b961d742c1
commit
faa07ac3a6
|
@ -36,16 +36,16 @@ func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter {
|
|||
}
|
||||
|
||||
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
rv := make(analysis.TokenStream, 0, len(input))
|
||||
|
||||
j := 0
|
||||
for _, token := range input {
|
||||
_, isStopToken := f.stopTokens[string(token.Term)]
|
||||
if !isStopToken {
|
||||
rv = append(rv, token)
|
||||
input[j] = token
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
return input[:j]
|
||||
}
|
||||
|
||||
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
|
|
|
@ -71,3 +71,49 @@ func TestStopWordsFilter(t *testing.T) {
|
|||
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkStopWordsFilter(b *testing.B) {
|
||||
|
||||
inputTokenStream := analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("a"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("walk"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("in"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("the"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("park"),
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
stopListConfig := map[string]interface{}{
|
||||
"type": token_map.Name,
|
||||
"tokens": []interface{}{"a", "in", "the"},
|
||||
}
|
||||
_, err := cache.DefineTokenMap("stop_test", stopListConfig)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
stopConfig := map[string]interface{}{
|
||||
"type": "stop_tokens",
|
||||
"stop_token_map": "stop_test",
|
||||
}
|
||||
stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
stopFilter.Filter(inputTokenStream)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue