rewrite to used fixed size []byte instead of buffer

removes unchecked errors in calls to buffer.Write and also benchmarks considerably faster
2015-03-11 15:12:13 -04:00 · 2015-03-11 15:12:13 -04:00 · a44a7c01af
parent 50bd082257
commit a44a7c01af
2 changed files with 72 additions and 20 deletions
--- a/index/upside_down/row.go
+++ b/index/upside_down/row.go
@ -496,10 +496,10 @@ func (br *BackIndexRow) AllStoredKeys() [][]byte {
 }

 func (br *BackIndexRow) Key() []byte {
-	buf := new(bytes.Buffer)
-	buf.WriteByte('b')
-	buf.Write(br.doc)
-	return buf.Bytes()
+	buf := make([]byte, len(br.doc)+1)
+	buf[0] = 'b'
+	copy(buf[1:], br.doc)
+	return buf
 }

 func (br *BackIndexRow) Value() []byte {
@ -562,19 +562,18 @@ type StoredRow struct {
 }

 func (s *StoredRow) Key() []byte {
-	buf := new(bytes.Buffer)
-	buf.WriteByte('s')
-	buf.Write(s.doc)
-	buf.WriteByte(ByteSeparator)
-	fieldbuf := make([]byte, 2)
-	binary.LittleEndian.PutUint16(fieldbuf, s.field)
-	buf.Write(fieldbuf)
+	docLen := len(s.doc)
+	buf := make([]byte, 1+docLen+1+2+(binary.MaxVarintLen64*len(s.arrayPositions)))
+	buf[0] = 's'
+	copy(buf[1:], s.doc)
+	buf[1+docLen] = ByteSeparator
+	binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field)
+	bytesUsed := 1 + docLen + 1 + 2
 	for _, arrayPosition := range s.arrayPositions {
-		arrayPositionBuffer := make([]byte, binary.MaxVarintLen64)
-		numBytes := binary.PutUvarint(arrayPositionBuffer, arrayPosition)
-		buf.Write(arrayPositionBuffer[0:numBytes])
+		varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition)
+		bytesUsed += varbytes
 	}
-	return buf.Bytes()
+	return buf[0:bytesUsed]
 }

 func (s *StoredRow) Value() []byte {
@ -589,11 +588,12 @@ func (s *StoredRow) String() string {
 }

 func (s *StoredRow) ScanPrefixForDoc() []byte {
-	buf := new(bytes.Buffer)
-	buf.WriteByte('s')
-	buf.Write(s.doc)
-	buf.WriteByte(ByteSeparator)
-	return buf.Bytes()
+	docLen := len(s.doc)
+	buf := make([]byte, 1+docLen+1)
+	buf[0] = 's'
+	copy(buf[1:], s.doc)
+	buf[1+docLen] = ByteSeparator
+	return buf
 }

 func NewStoredRow(doc string, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
--- a/index/upside_down/row_test.go
+++ b/index/upside_down/row_test.go
@ -88,6 +88,11 @@ func TestRows(t *testing.T) {
 			[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0},
 			[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
 		},
+		{
+			NewStoredRow("budweiser", 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")),
+			[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0, 2, 166, 2, 134, 24},
+			[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
+		},
 		{
 			NewInternalRow([]byte("mapping"), []byte(`{"mapping":"json content"}`)),
 			[]byte{'i', 'm', 'a', 'p', 'p', 'i', 'n', 'g'},
@ -266,3 +271,50 @@ func BenchmarkTermFrequencyRowDecode(b *testing.B) {
 		NewTermFrequencyRowKV(k, v)
 	}
 }
+
+func BenchmarkBackIndexRowEncode(b *testing.B) {
+	field := uint32(1)
+	t1 := "term1"
+	for i := 0; i < b.N; i++ {
+		row := NewBackIndexRow("beername",
+			[]*BackIndexTermEntry{
+				&BackIndexTermEntry{
+					Term:  &t1,
+					Field: &field,
+				},
+			},
+			[]*BackIndexStoreEntry{
+				&BackIndexStoreEntry{
+					Field: &field,
+				},
+			})
+
+		row.Key()
+		row.Value()
+	}
+}
+
+func BenchmarkBackIndexRowDecode(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		k := []byte{0x62, 0x62, 0x65, 0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65}
+		v := []byte{0x0a, 0x09, 0x0a, 0x05, 0x74, 0x65, 0x72, 0x6d, 0x31, 0x10, 0x01, 0x12, 0x02, 0x08, 0x01}
+		NewTermFrequencyRowKV(k, v)
+	}
+}
+
+func BenchmarkStoredRowEncode(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		row := NewStoredRow("budweiser", 0, []uint64{}, byte('t'), []byte("an american beer"))
+
+		row.Key()
+		row.Value()
+	}
+}
+
+func BenchmarkStoredRowDecode(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		k := []byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0}
+		v := []byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'}
+		NewTermFrequencyRowKV(k, v)
+	}
+}