From 0a4844f9d09084947dbeaf3feb55fbc0d3cc9937 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Mon, 12 Jan 2015 17:57:45 -0500 Subject: [PATCH] change unicode tokenizer to use direct segmenter api --- analysis/tokenizers/unicode/unicode.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/analysis/tokenizers/unicode/unicode.go b/analysis/tokenizers/unicode/unicode.go index d81f6c0d..b957231d 100644 --- a/analysis/tokenizers/unicode/unicode.go +++ b/analysis/tokenizers/unicode/unicode.go @@ -10,8 +10,6 @@ package unicode import ( - "bytes" - "github.com/blevesearch/segment" "github.com/blevesearch/bleve/analysis" @@ -31,7 +29,7 @@ func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream { rv := make(analysis.TokenStream, 0) - segmenter := segment.NewWordSegmenter(bytes.NewReader(input)) + segmenter := segment.NewWordSegmenterDirect(input) start := 0 pos := 1 for segmenter.Segment() {