0
0
Fork 0

Merge pull request #426 from mschoch/fasterbuildterms

encode runes directly into buffer
This commit is contained in:
Marty Schoch 2016-09-11 20:19:09 -04:00 committed by GitHub
commit f8e8c9d065
2 changed files with 88 additions and 6 deletions

View File

@ -34,14 +34,32 @@ func InsertRune(in []rune, pos int, r rune) []rune {
return rv
}
func BuildTermFromRunes(runes []rune) []byte {
rv := make([]byte, 0, len(runes)*4)
// BuildTermFromRunesOptimistic will build a term from the provided runes
// AND optimistically attempt to encode into the provided buffer
// if at any point it appears the buffer is too small, a new buffer is
// allocated and that is used instead
// this should be used in cases where frequently the new term is the same
// length or shorter than the original term (in number of bytes)
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte {
rv := buf
used := 0
for _, r := range runes {
runeBytes := make([]byte, utf8.RuneLen(r))
utf8.EncodeRune(runeBytes, r)
rv = append(rv, runeBytes...)
nextLen := utf8.RuneLen(r)
if used+nextLen > len(rv) {
// alloc new buf
buf = make([]byte, len(runes)*utf8.UTFMax)
// copy work we've already done
copy(buf, rv[:used])
rv = buf
}
written := utf8.EncodeRune(rv[used:], r)
used += written
}
return rv
return rv[:used]
}
func BuildTermFromRunes(runes []rune) []byte {
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes)
}
func TruncateRunes(input []byte, num int) []byte {

View File

@ -69,3 +69,67 @@ func TestInsertRune(t *testing.T) {
}
}
}
func TestBuildTermFromRunes(t *testing.T) {
tests := []struct {
in []rune
}{
{
in: []rune{'a', 'b', 'c'},
},
{
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
},
}
for _, test := range tests {
out := BuildTermFromRunes(test.in)
back := []rune(string(out))
if !reflect.DeepEqual(back, test.in) {
t.Errorf("expected %v to convert back to %v", out, test.in)
}
}
}
func TestBuildTermFromRunesOptimistic(t *testing.T) {
tests := []struct {
buf []byte
in []rune
}{
{
buf: []byte("abc"),
in: []rune{'a', 'b', 'c'},
},
{
buf: []byte("こんにちは世界"),
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
},
// same, but don't give enough buffer
{
buf: []byte("ab"),
in: []rune{'a', 'b', 'c'},
},
{
buf: []byte("こ"),
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
},
}
for _, test := range tests {
out := BuildTermFromRunesOptimistic(test.buf, test.in)
back := []rune(string(out))
if !reflect.DeepEqual(back, test.in) {
t.Errorf("expected %v to convert back to %v", out, test.in)
}
}
}
func BenchmarkBuildTermFromRunes(b *testing.B) {
input := [][]rune{
{'a', 'b', 'c'},
{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
}
for i := 0; i < b.N; i++ {
for _, i := range input {
BuildTermFromRunes(i)
}
}
}