0
0
Fork 0

removing duplicate code by reusing util.go in analysis

This commit is contained in:
a-little-srdjan 2016-06-09 15:13:30 -04:00
parent 5722d7b1d1
commit efe573bc10
4 changed files with 5 additions and 46 deletions

View File

@ -1,28 +1,15 @@
package camelcase_filter
import (
"unicode/utf8"
"github.com/blevesearch/bleve/analysis"
)
func buildTokenFromTerm(buffer []rune) *analysis.Token {
return &analysis.Token{
Term: buildTermFromRunes(buffer),
Term: analysis.BuildTermFromRunes(buffer),
}
}
// TODO: Lifted from ngram_filter. Expose as public and re-use?
func buildTermFromRunes(runes []rune) []byte {
rv := make([]byte, 0, len(runes)*4)
for _, r := range runes {
runeBytes := make([]byte, utf8.RuneLen(r))
utf8.EncodeRune(runeBytes, r)
rv = append(rv, runeBytes...)
}
return rv
}
// Parser accepts a symbol and passes it to the current state (representing a class).
// The state can accept it (and accumulate it). Otherwise, the parser creates a new state that
// starts with the pushed symbol.

View File

@ -51,7 +51,7 @@ func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStrea
for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ {
// build an ngram of this size starting at i
if i-ngramSize > 0 {
ngramTerm := buildTermFromRunes(runes[i-ngramSize : i])
ngramTerm := analysis.BuildTermFromRunes(runes[i-ngramSize : i])
token := analysis.Token{
Position: token.Position,
Start: token.Start,
@ -68,7 +68,7 @@ func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStrea
for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ {
// build an ngram of this size starting at i
if i+ngramSize <= runeCount {
ngramTerm := buildTermFromRunes(runes[i : i+ngramSize])
ngramTerm := analysis.BuildTermFromRunes(runes[i : i+ngramSize])
token := analysis.Token{
Position: token.Position,
Start: token.Start,
@ -85,16 +85,6 @@ func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStrea
return rv
}
func buildTermFromRunes(runes []rune) []byte {
rv := make([]byte, 0, len(runes)*4)
for _, r := range runes {
runeBytes := make([]byte, utf8.RuneLen(r))
utf8.EncodeRune(runeBytes, r)
rv = append(rv, runeBytes...)
}
return rv
}
func EdgeNgramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
side := FRONT
back, ok := config["back"].(bool)

View File

@ -43,7 +43,7 @@ func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ {
// build an ngram of this size starting at i
if i+ngramSize <= runeCount {
ngramTerm := buildTermFromRunes(runes[i : i+ngramSize])
ngramTerm := analysis.BuildTermFromRunes(runes[i : i+ngramSize])
token := analysis.Token{
Position: token.Position,
Start: token.Start,
@ -60,16 +60,6 @@ func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
return rv
}
func buildTermFromRunes(runes []rune) []byte {
rv := make([]byte, 0, len(runes)*4)
for _, r := range runes {
runeBytes := make([]byte, utf8.RuneLen(r))
utf8.EncodeRune(runeBytes, r)
rv = append(rv, runeBytes...)
}
return rv
}
func NgramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
minVal, ok := config["min"]
if !ok {

View File

@ -10,7 +10,6 @@
package truncate_token_filter
import (
"bytes"
"fmt"
"unicode/utf8"
@ -34,14 +33,7 @@ func (s *TruncateTokenFilter) Filter(input analysis.TokenStream) analysis.TokenS
for _, token := range input {
wordLen := utf8.RuneCount(token.Term)
if wordLen > s.length {
runes := bytes.Runes(token.Term)[0:s.length]
newterm := make([]byte, 0, s.length*4)
for _, r := range runes {
runeBytes := make([]byte, utf8.RuneLen(r))
utf8.EncodeRune(runeBytes, r)
newterm = append(newterm, runeBytes...)
}
token.Term = newterm
token.Term = analysis.TruncateRunes(token.Term, wordLen-s.length)
}
}
return input