0
0
Fork 0

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Bertram Truong 2017-08-23 14:38:14 +10:00
commit 110c98e0ab
5 changed files with 78 additions and 164 deletions

View File

@ -50,11 +50,12 @@ func NewCamelCaseFilter() *CamelCaseFilter {
func (f *CamelCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
nextPosition := 1
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
p := NewParser(runeCount)
p := NewParser(runeCount, nextPosition, token.Start)
for i := 0; i < runeCount; i++ {
if i+1 >= runeCount {
p.Push(runes[i], nil)
@ -63,6 +64,7 @@ func (f *CamelCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStrea
}
}
rv = append(rv, p.FlushTokens()...)
nextPosition = p.NextPosition()
}
return rv
}

View File

@ -28,176 +28,45 @@ func TestCamelCaseFilter(t *testing.T) {
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
input: tokenStream(""),
output: tokenStream(""),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
},
input: tokenStream("a"),
output: tokenStream("a"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("...aMACMac123macILoveGolang"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("..."),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("MAC"),
},
&analysis.Token{
Term: []byte("Mac"),
},
&analysis.Token{
Term: []byte("123"),
},
&analysis.Token{
Term: []byte("mac"),
},
&analysis.Token{
Term: []byte("I"),
},
&analysis.Token{
Term: []byte("Love"),
},
&analysis.Token{
Term: []byte("Golang"),
},
},
input: tokenStream("...aMACMac123macILoveGolang"),
output: tokenStream("...", "a", "MAC", "Mac", "123", "mac", "I", "Love", "Golang"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Lang"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Lang"),
},
},
input: tokenStream("Lang"),
output: tokenStream("Lang"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("GLang"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("G"),
},
&analysis.Token{
Term: []byte("Lang"),
},
},
input: tokenStream("GLang"),
output: tokenStream("G", "Lang"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("GOLang"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("GO"),
},
&analysis.Token{
Term: []byte("Lang"),
},
},
input: tokenStream("GOLang"),
output: tokenStream("GO", "Lang"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("GOOLang"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("GOO"),
},
&analysis.Token{
Term: []byte("Lang"),
},
},
input: tokenStream("GOOLang"),
output: tokenStream("GOO", "Lang"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234"),
},
},
input: tokenStream("1234"),
output: tokenStream("1234"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("starbucks"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("starbucks"),
},
},
input: tokenStream("starbucks"),
output: tokenStream("starbucks"),
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Starbucks TVSamsungIsGREAT000"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Starbucks"),
},
&analysis.Token{
Term: []byte(" "),
},
&analysis.Token{
Term: []byte("TV"),
},
&analysis.Token{
Term: []byte("Samsung"),
},
&analysis.Token{
Term: []byte("Is"),
},
&analysis.Token{
Term: []byte("GREAT"),
},
&analysis.Token{
Term: []byte("000"),
},
},
input: tokenStream("Starbucks TVSamsungIsGREAT000"),
output: tokenStream("Starbucks", " ", "TV", "Samsung", "Is", "GREAT", "000"),
},
}
@ -209,3 +78,18 @@ func TestCamelCaseFilter(t *testing.T) {
}
}
}
func tokenStream(termStrs ...string) analysis.TokenStream {
tokenStream := make([]*analysis.Token, len(termStrs))
index := 0
for i, termStr := range termStrs {
tokenStream[i] = &analysis.Token{
Term: []byte(termStr),
Position: i + 1,
Start: index,
End: index + len(termStr),
}
index += len(termStr)
}
return analysis.TokenStream(tokenStream)
}

View File

@ -18,10 +18,17 @@ import (
"github.com/blevesearch/bleve/analysis"
)
func buildTokenFromTerm(buffer []rune) *analysis.Token {
return &analysis.Token{
Term: analysis.BuildTermFromRunes(buffer),
func (p *Parser) buildTokenFromTerm(buffer []rune) *analysis.Token {
term := analysis.BuildTermFromRunes(buffer)
token := &analysis.Token{
Term: term,
Position: p.position,
Start: p.index,
End: p.index + len(term),
}
p.position++
p.index += len(term)
return token
}
// Parser accepts a symbol and passes it to the current state (representing a class).
@ -35,13 +42,17 @@ type Parser struct {
buffer []rune
current State
tokens []*analysis.Token
position int
index int
}
func NewParser(len int) *Parser {
func NewParser(len, position, index int) *Parser {
return &Parser{
bufferLen: len,
buffer: make([]rune, 0, len),
tokens: make([]*analysis.Token, 0, len),
position: position,
index: index,
}
}
@ -57,7 +68,7 @@ func (p *Parser) Push(sym rune, peek *rune) {
} else {
// the old state is no more, thus convert the buffer
p.tokens = append(p.tokens, buildTokenFromTerm(p.buffer))
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
// let the new state begin
p.current = p.NewState(sym)
@ -89,6 +100,10 @@ func (p *Parser) NewState(sym rune) State {
}
func (p *Parser) FlushTokens() []*analysis.Token {
p.tokens = append(p.tokens, buildTokenFromTerm(p.buffer))
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
return p.tokens
}
func (p *Parser) NextPosition() int {
return p.position
}

View File

@ -190,15 +190,18 @@ func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDo
}
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
// we don't actually own the list of ids, so if before we sort we must copy
idsCopy := make([]string, len(ids))
copy(idsCopy, ids)
// ensure ids are sorted
sort.Strings(ids)
sort.Strings(idsCopy)
startBytes := []byte{0x0}
if len(ids) > 0 {
startBytes = []byte(ids[0])
if len(idsCopy) > 0 {
startBytes = []byte(idsCopy[0])
}
endBytes := []byte{0xff}
if len(ids) > 0 {
endBytes = incrementBytes([]byte(ids[len(ids)-1]))
if len(idsCopy) > 0 {
endBytes = incrementBytes([]byte(idsCopy[len(idsCopy)-1]))
}
bisr := NewBackIndexRow(startBytes, nil, nil)
bier := NewBackIndexRow(endBytes, nil, nil)
@ -207,7 +210,7 @@ func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (
return &UpsideDownCouchDocIDReader{
indexReader: indexReader,
iterator: it,
only: ids,
only: idsCopy,
onlyMode: true,
}, nil
}

View File

@ -170,6 +170,16 @@ func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
}
// AddDateTimeRangeString adds a bucket to a field
// containing date values.
func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
if fr.DateTimeRanges == nil {
fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
}
fr.DateTimeRanges = append(fr.DateTimeRanges,
&dateTimeRange{Name: name, startString: start, endString: end})
}
// AddNumericRange adds a bucket to a field
// containing numeric values. Documents with a
// numeric value falling into this range are