Merge pull request #426 from mschoch/fasterbuildterms
encode runes directly into buffer
This commit is contained in:
commit
f8e8c9d065
|
@ -34,14 +34,32 @@ func InsertRune(in []rune, pos int, r rune) []rune {
|
|||
return rv
|
||||
}
|
||||
|
||||
func BuildTermFromRunes(runes []rune) []byte {
|
||||
rv := make([]byte, 0, len(runes)*4)
|
||||
// BuildTermFromRunesOptimistic will build a term from the provided runes
|
||||
// AND optimistically attempt to encode into the provided buffer
|
||||
// if at any point it appears the buffer is too small, a new buffer is
|
||||
// allocated and that is used instead
|
||||
// this should be used in cases where frequently the new term is the same
|
||||
// length or shorter than the original term (in number of bytes)
|
||||
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte {
|
||||
rv := buf
|
||||
used := 0
|
||||
for _, r := range runes {
|
||||
runeBytes := make([]byte, utf8.RuneLen(r))
|
||||
utf8.EncodeRune(runeBytes, r)
|
||||
rv = append(rv, runeBytes...)
|
||||
nextLen := utf8.RuneLen(r)
|
||||
if used+nextLen > len(rv) {
|
||||
// alloc new buf
|
||||
buf = make([]byte, len(runes)*utf8.UTFMax)
|
||||
// copy work we've already done
|
||||
copy(buf, rv[:used])
|
||||
rv = buf
|
||||
}
|
||||
written := utf8.EncodeRune(rv[used:], r)
|
||||
used += written
|
||||
}
|
||||
return rv
|
||||
return rv[:used]
|
||||
}
|
||||
|
||||
func BuildTermFromRunes(runes []rune) []byte {
|
||||
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes)
|
||||
}
|
||||
|
||||
func TruncateRunes(input []byte, num int) []byte {
|
||||
|
|
|
@ -69,3 +69,67 @@ func TestInsertRune(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTermFromRunes(t *testing.T) {
|
||||
tests := []struct {
|
||||
in []rune
|
||||
}{
|
||||
{
|
||||
in: []rune{'a', 'b', 'c'},
|
||||
},
|
||||
{
|
||||
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
out := BuildTermFromRunes(test.in)
|
||||
back := []rune(string(out))
|
||||
if !reflect.DeepEqual(back, test.in) {
|
||||
t.Errorf("expected %v to convert back to %v", out, test.in)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTermFromRunesOptimistic(t *testing.T) {
|
||||
tests := []struct {
|
||||
buf []byte
|
||||
in []rune
|
||||
}{
|
||||
{
|
||||
buf: []byte("abc"),
|
||||
in: []rune{'a', 'b', 'c'},
|
||||
},
|
||||
{
|
||||
buf: []byte("こんにちは世界"),
|
||||
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
|
||||
},
|
||||
// same, but don't give enough buffer
|
||||
{
|
||||
buf: []byte("ab"),
|
||||
in: []rune{'a', 'b', 'c'},
|
||||
},
|
||||
{
|
||||
buf: []byte("こ"),
|
||||
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
out := BuildTermFromRunesOptimistic(test.buf, test.in)
|
||||
back := []rune(string(out))
|
||||
if !reflect.DeepEqual(back, test.in) {
|
||||
t.Errorf("expected %v to convert back to %v", out, test.in)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBuildTermFromRunes(b *testing.B) {
|
||||
input := [][]rune{
|
||||
{'a', 'b', 'c'},
|
||||
{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
|
||||
}
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, i := range input {
|
||||
BuildTermFromRunes(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue