From 606fd6344bdacf903c4f379a42268be3104aee5e Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Tue, 24 Jan 2017 15:33:54 -0500 Subject: [PATCH] INDEX FORMAT CHANGE: change back index row value Previously term entries were encoded pairwise (field/term), so you'd have data like: F1/T1 F1/T2 F1/T3 F2/T4 F3/T5 As you can see, even though field 1 has 3 terms, we repeat the F1 part in the encoded data. This is a bit wasteful. In the new format we encode it as a list of terms for each field: F1/T1,T2,T3 F2/T4 F3/T5 When fields have multiple terms, this saves space. In unit tests there is no additional waste even in the case that a field has only a single value. Here are the results of an indexing test case (beer-search): $ benchcmp indexing-before.txt indexing-after.txt benchmark old ns/op new ns/op delta BenchmarkIndexing-4 11275835988 10745514321 -4.70% benchmark old allocs new allocs delta BenchmarkIndexing-4 25230685 22480494 -10.90% benchmark old bytes new bytes delta BenchmarkIndexing-4 4802816224 4741641856 -1.27% And here are the results of a MatchAll search building a facet on the "abv" field: $ benchcmp facet-before.txt facet-after.txt benchmark old ns/op new ns/op delta BenchmarkFacets-4 439762100 228064575 -48.14% benchmark old allocs new allocs delta BenchmarkFacets-4 9460208 3723286 -60.64% benchmark old bytes new bytes delta BenchmarkFacets-4 260784261 151746483 -41.81% Although we expect the index to be smaller in many cases, the beer-search index is about the same in this case. However, this may be due to the underlying storage (boltdb) in this case. Finally, the index version was bumped from 5 to 7, since smolder also used version 6, which could lead to some confusion. --- index/upsidedown/analysis.go | 6 +- index/upsidedown/dump.go | 10 +- index/upsidedown/index_reader.go | 4 +- index/upsidedown/row.go | 24 ++--- index/upsidedown/row_test.go | 19 ++-- index/upsidedown/upsidedown.go | 20 ++-- index/upsidedown/upsidedown.pb.go | 152 +++++++++++++++--------------- index/upsidedown/upsidedown.proto | 8 +- 8 files changed, 128 insertions(+), 115 deletions(-) diff --git a/index/upsidedown/analysis.go b/index/upsidedown/analysis.go index 1679f7a0..d1b1fd59 100644 --- a/index/upsidedown/analysis.go +++ b/index/upsidedown/analysis.go @@ -90,7 +90,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...) - backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded) + backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs)) // walk through the collated information and process // once for each indexed field (unique name) @@ -99,11 +99,11 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult includeTermVectors := fieldIncludeTermVectors[fieldIndex] // encode this field - rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries) + rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries) } // build the back index row - backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) + backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries) rv.Rows = append(rv.Rows, backIndexRow) return rv diff --git a/index/upsidedown/dump.go b/index/upsidedown/dump.go index 58dc0dd2..cb045d24 100644 --- a/index/upsidedown/dump.go +++ b/index/upsidedown/dump.go @@ -127,10 +127,12 @@ func (i *IndexReader) DumpDoc(id string) chan interface{} { } // build sorted list of term keys keys := make(keyset, 0) - for _, entry := range back.termEntries { - tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0) - key := tfr.Key() - keys = append(keys, key) + for _, entry := range back.termsEntries { + for i := range entry.Terms { + tfr := NewTermFrequencyRow([]byte(entry.Terms[i]), uint16(*entry.Field), idBytes, 0, 0) + key := tfr.Key() + keys = append(keys, key) + } } sort.Sort(keys) diff --git a/index/upsidedown/index_reader.go b/index/upsidedown/index_reader.go index 288adaf2..f41e47c7 100644 --- a/index/upsidedown/index_reader.go +++ b/index/upsidedown/index_reader.go @@ -117,9 +117,9 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri fieldsMap[id] = f } } - for _, entry := range back.termEntries { + for _, entry := range back.termsEntries { if field, ok := fieldsMap[uint16(*entry.Field)]; ok { - rv[field] = append(rv[field], *entry.Term) + rv[field] = entry.Terms } } return rv, nil diff --git a/index/upsidedown/row.go b/index/upsidedown/row.go index bfaab892..f2826581 100644 --- a/index/upsidedown/row.go +++ b/index/upsidedown/row.go @@ -634,7 +634,7 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { type BackIndexRow struct { doc []byte - termEntries []*BackIndexTermEntry + termsEntries []*BackIndexTermsEntry storedEntries []*BackIndexStoreEntry } @@ -642,10 +642,12 @@ func (br *BackIndexRow) AllTermKeys() [][]byte { if br == nil { return nil } - rv := make([][]byte, len(br.termEntries)) - for i, termEntry := range br.termEntries { - termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0) - rv[i] = termRow.Key() + rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely + for _, termsEntry := range br.termsEntries { + for i := range termsEntry.Terms { + termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0) + rv = append(rv, termRow.Key()) + } } return rv } @@ -686,7 +688,7 @@ func (br *BackIndexRow) Value() []byte { func (br *BackIndexRow) ValueSize() int { birv := &BackIndexRowValue{ - TermEntries: br.termEntries, + TermsEntries: br.termsEntries, StoredEntries: br.storedEntries, } return birv.Size() @@ -694,20 +696,20 @@ func (br *BackIndexRow) ValueSize() int { func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { birv := &BackIndexRowValue{ - TermEntries: br.termEntries, + TermsEntries: br.termsEntries, StoredEntries: br.storedEntries, } return birv.MarshalTo(buf) } func (br *BackIndexRow) String() string { - return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries) + return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries) } -func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { +func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { return &BackIndexRow{ doc: docID, - termEntries: entries, + termsEntries: entries, storedEntries: storedFields, } } @@ -736,7 +738,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { if err != nil { return nil, err } - rv.termEntries = birv.TermEntries + rv.termsEntries = birv.TermsEntries rv.storedEntries = birv.StoredEntries return &rv, nil diff --git a/index/upsidedown/row_test.go b/index/upsidedown/row_test.go index c0e884be..3eaf9ba9 100644 --- a/index/upsidedown/row_test.go +++ b/index/upsidedown/row_test.go @@ -81,19 +81,19 @@ func TestRows(t *testing.T) { []byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 1, 0, 0, 150, 17, 23, 31, 2, 1, 2, 0, 3, 43, 51, 3, 3, 4, 5}, }, { - NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{{Term: proto.String("beer"), Field: proto.Uint32(0)}}, nil), + NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}}, nil), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, - []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0}, + []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r'}, }, { - NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{{Term: proto.String("beer"), Field: proto.Uint32(0)}, {Term: proto.String("beat"), Field: proto.Uint32(1)}}, nil), + NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}, {Field: proto.Uint32(1), Terms: []string{"beat"}}}, nil), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, - []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1}, + []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't'}, }, { - NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{{Term: proto.String("beer"), Field: proto.Uint32(0)}, {Term: proto.String("beat"), Field: proto.Uint32(1)}}, []*BackIndexStoreEntry{{Field: proto.Uint32(3)}, {Field: proto.Uint32(4)}, {Field: proto.Uint32(5)}}), + NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}, {Field: proto.Uint32(1), Terms: []string{"beat"}}}, []*BackIndexStoreEntry{{Field: proto.Uint32(3)}, {Field: proto.Uint32(4)}, {Field: proto.Uint32(5)}}), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, - []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1, 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}, + []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}, }, { NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer")), @@ -310,10 +310,10 @@ func BenchmarkBackIndexRowEncode(b *testing.B) { field := uint32(1) t1 := "term1" row := NewBackIndexRow([]byte("beername"), - []*BackIndexTermEntry{ + []*BackIndexTermsEntry{ { - Term: &t1, Field: &field, + Terms: []string{t1}, }, }, []*BackIndexStoreEntry{ @@ -325,12 +325,13 @@ func BenchmarkBackIndexRowEncode(b *testing.B) { for i := 0; i < b.N; i++ { row.Key() row.Value() + b.Logf("%#v", row.Value()) } } func BenchmarkBackIndexRowDecode(b *testing.B) { k := []byte{0x62, 0x62, 0x65, 0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65} - v := []byte{0x0a, 0x09, 0x0a, 0x05, 0x74, 0x65, 0x72, 0x6d, 0x31, 0x10, 0x01, 0x12, 0x02, 0x08, 0x01} + v := []byte{0xa, 0x9, 0x8, 0x1, 0x12, 0x5, 0x74, 0x65, 0x72, 0x6d, 0x31, 0x12, 0x2, 0x8, 0x1} b.ResetTimer() for i := 0; i < b.N; i++ { _, err := NewBackIndexRowKV(k, v) diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go index 1bf9c733..7c6fea45 100644 --- a/index/upsidedown/upsidedown.go +++ b/index/upsidedown/upsidedown.go @@ -45,7 +45,7 @@ const RowBufferSize = 4 * 1024 var VersionKey = []byte{'v'} -const Version uint8 = 5 +const Version uint8 = 7 var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version) @@ -610,12 +610,13 @@ func encodeFieldType(f document.Field) byte { return fieldType } -func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) { +func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]index.IndexRow, []*BackIndexTermsEntry) { fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) termFreqRows := make([]TermFrequencyRow, len(tokenFreqs)) termFreqRowsUsed := 0 + terms := make([]string, 0, len(tokenFreqs)) for k, tf := range tokenFreqs { termFreqRow := &termFreqRows[termFreqRowsUsed] termFreqRowsUsed++ @@ -628,13 +629,14 @@ func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fi } // record the back index entry - backIndexTermEntry := BackIndexTermEntry{Term: proto.String(k), Field: proto.Uint32(uint32(fieldIndex))} - backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry) + terms = append(terms, k) rows = append(rows, termFreqRow) } + backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms} + backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry) - return rows, backIndexTermEntries + return rows, backIndexTermsEntries } func (udc *UpsideDownCouch) Delete(id string) (err error) { @@ -707,9 +709,11 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) { func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow { idBytes := []byte(id) - for _, backIndexEntry := range backIndexRow.termEntries { - tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0) - deleteRows = append(deleteRows, tfr) + for _, backIndexEntry := range backIndexRow.termsEntries { + for i := range backIndexEntry.Terms { + tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0) + deleteRows = append(deleteRows, tfr) + } } for _, se := range backIndexRow.storedEntries { sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil) diff --git a/index/upsidedown/upsidedown.pb.go b/index/upsidedown/upsidedown.pb.go index 3218b922..c161e1cc 100644 --- a/index/upsidedown/upsidedown.pb.go +++ b/index/upsidedown/upsidedown.pb.go @@ -3,15 +3,15 @@ // DO NOT EDIT! /* -Package upsidedown is a generated protocol buffer package. + Package upsidedown is a generated protocol buffer package. -It is generated from these files: - upsidedown.proto + It is generated from these files: + upsidedown.proto -It has these top-level messages: - BackIndexTermEntry - BackIndexStoreEntry - BackIndexRowValue + It has these top-level messages: + BackIndexTermsEntry + BackIndexStoreEntry + BackIndexRowValue */ package upsidedown @@ -26,30 +26,30 @@ import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto" var _ = proto.Marshal var _ = math.Inf -type BackIndexTermEntry struct { - Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"` - Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"` - XXX_unrecognized []byte `json:"-"` +type BackIndexTermsEntry struct { + Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` + Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"` + XXX_unrecognized []byte `json:"-"` } -func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} } -func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) } -func (*BackIndexTermEntry) ProtoMessage() {} +func (m *BackIndexTermsEntry) Reset() { *m = BackIndexTermsEntry{} } +func (m *BackIndexTermsEntry) String() string { return proto.CompactTextString(m) } +func (*BackIndexTermsEntry) ProtoMessage() {} -func (m *BackIndexTermEntry) GetTerm() string { - if m != nil && m.Term != nil { - return *m.Term - } - return "" -} - -func (m *BackIndexTermEntry) GetField() uint32 { +func (m *BackIndexTermsEntry) GetField() uint32 { if m != nil && m.Field != nil { return *m.Field } return 0 } +func (m *BackIndexTermsEntry) GetTerms() []string { + if m != nil { + return m.Terms + } + return nil +} + type BackIndexStoreEntry struct { Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"` @@ -75,7 +75,7 @@ func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 { } type BackIndexRowValue struct { - TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"` + TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"` StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -84,9 +84,9 @@ func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} } func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) } func (*BackIndexRowValue) ProtoMessage() {} -func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry { +func (m *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry { if m != nil { - return m.TermEntries + return m.TermsEntries } return nil } @@ -98,7 +98,7 @@ func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry { return nil } -func (m *BackIndexTermEntry) Unmarshal(data []byte) error { +func (m *BackIndexTermsEntry) Unmarshal(data []byte) error { var hasFields [1]uint64 l := len(data) iNdEx := 0 @@ -119,8 +119,26 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error { wireType := int(wire & 0x7) switch fieldNum { case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) + } + var v uint32 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Field = &v + hasFields[0] |= uint64(0x00000001) + case 2: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType) } var stringLen uint64 for shift := uint(0); ; shift += 7 { @@ -138,28 +156,8 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - s := string(data[iNdEx:postIndex]) - m.Term = &s + m.Terms = append(m.Terms, string(data[iNdEx:postIndex])) iNdEx = postIndex - hasFields[0] |= uint64(0x00000001) - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) - } - var v uint32 - for shift := uint(0); ; shift += 7 { - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := data[iNdEx] - iNdEx++ - v |= (uint32(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - m.Field = &v - hasFields[0] |= uint64(0x00000002) default: var sizeOfWire int for { @@ -187,9 +185,6 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error { if hasFields[0]&uint64(0x00000001) == 0 { return new(github_com_golang_protobuf_proto.RequiredNotSetError) } - if hasFields[0]&uint64(0x00000002) == 0 { - return new(github_com_golang_protobuf_proto.RequiredNotSetError) - } return nil } @@ -299,7 +294,7 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error { switch fieldNum { case 1: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType) } var msglen int for shift := uint(0); ; shift += 7 { @@ -320,8 +315,8 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{}) - if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { + m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{}) + if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { return err } iNdEx = postIndex @@ -472,16 +467,18 @@ var ( ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling") ) -func (m *BackIndexTermEntry) Size() (n int) { +func (m *BackIndexTermsEntry) Size() (n int) { var l int _ = l - if m.Term != nil { - l = len(*m.Term) - n += 1 + l + sovUpsidedown(uint64(l)) - } if m.Field != nil { n += 1 + sovUpsidedown(uint64(*m.Field)) } + if len(m.Terms) > 0 { + for _, s := range m.Terms { + l = len(s) + n += 1 + l + sovUpsidedown(uint64(l)) + } + } if m.XXX_unrecognized != nil { n += len(m.XXX_unrecognized) } @@ -508,8 +505,8 @@ func (m *BackIndexStoreEntry) Size() (n int) { func (m *BackIndexRowValue) Size() (n int) { var l int _ = l - if len(m.TermEntries) > 0 { - for _, e := range m.TermEntries { + if len(m.TermsEntries) > 0 { + for _, e := range m.TermsEntries { l = e.Size() n += 1 + l + sovUpsidedown(uint64(l)) } @@ -539,7 +536,7 @@ func sovUpsidedown(x uint64) (n int) { func sozUpsidedown(x uint64) (n int) { return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63)))) } -func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { +func (m *BackIndexTermsEntry) Marshal() (data []byte, err error) { size := m.Size() data = make([]byte, size) n, err := m.MarshalTo(data) @@ -549,26 +546,33 @@ func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { return data[:n], nil } -func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) { +func (m *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) { var i int _ = i var l int _ = l - if m.Term == nil { - return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) - } else { - data[i] = 0xa - i++ - i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term))) - i += copy(data[i:], *m.Term) - } if m.Field == nil { return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) } else { - data[i] = 0x10 + data[i] = 0x8 i++ i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) } + if len(m.Terms) > 0 { + for _, s := range m.Terms { + data[i] = 0x12 + i++ + l = len(s) + for l >= 1<<7 { + data[i] = uint8(uint64(l)&0x7f | 0x80) + l >>= 7 + i++ + } + data[i] = uint8(l) + i++ + i += copy(data[i:], s) + } + } if m.XXX_unrecognized != nil { i += copy(data[i:], m.XXX_unrecognized) } @@ -625,8 +629,8 @@ func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) { _ = i var l int _ = l - if len(m.TermEntries) > 0 { - for _, msg := range m.TermEntries { + if len(m.TermsEntries) > 0 { + for _, msg := range m.TermsEntries { data[i] = 0xa i++ i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) diff --git a/index/upsidedown/upsidedown.proto b/index/upsidedown/upsidedown.proto index ced4dfa6..cf0492a2 100644 --- a/index/upsidedown/upsidedown.proto +++ b/index/upsidedown/upsidedown.proto @@ -1,6 +1,6 @@ -message BackIndexTermEntry { - required string term = 1; - required uint32 field = 2; +message BackIndexTermsEntry { + required uint32 field = 1; + repeated string terms = 2; } message BackIndexStoreEntry { @@ -9,6 +9,6 @@ message BackIndexStoreEntry { } message BackIndexRowValue { - repeated BackIndexTermEntry termEntries = 1; + repeated BackIndexTermsEntry termsEntries = 1; repeated BackIndexStoreEntry storedEntries = 2; }