change unicode tokenizer to use direct segmenter api
This commit is contained in:
parent
603c3af8bb
commit
0a4844f9d0
|
@ -10,8 +10,6 @@
|
||||||
package unicode
|
package unicode
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
|
|
||||||
"github.com/blevesearch/segment"
|
"github.com/blevesearch/segment"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
@ -31,7 +29,7 @@ func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||||
|
|
||||||
rv := make(analysis.TokenStream, 0)
|
rv := make(analysis.TokenStream, 0)
|
||||||
|
|
||||||
segmenter := segment.NewWordSegmenter(bytes.NewReader(input))
|
segmenter := segment.NewWordSegmenterDirect(input)
|
||||||
start := 0
|
start := 0
|
||||||
pos := 1
|
pos := 1
|
||||||
for segmenter.Segment() {
|
for segmenter.Segment() {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user