diff --git a/index/firestorm/firestorm.md b/index/firestorm/firestorm.md index e6df132b..2b9d6b86 100644 --- a/index/firestorm/firestorm.md +++ b/index/firestorm/firestorm.md @@ -241,7 +241,7 @@ Batch operations look largely just like the indexing/deleting operations. Two o #### Term Field Iteration - Acquire indexState.docIdNumberMutex for reading: -- Get copy of: (it is assumed some COW datastructure is used) +- Get copy of: (it is assumed some COW datastructure is used, or MVCC is accomodated in some way by the impl) - maxReadDocNumber - inFlightDocIds - deletedDocIdNumbers @@ -265,7 +265,7 @@ Any row satisfying the above conditions is a candidate document. ### Row Encoding All keys are manually encoded to ensure a precise row ordering. -Stored and Internal values are opaque byte arrays. +Internal Row values are opaque byte arrays. All other values are encoded using protobuf for a balance of efficiency and flexibility. Dictionary and TermFrequency rows are the most likely to take advantage of this flexibility, but other rows are read/written infrequently enough that the flexibility outweighs any overhead. #### Version @@ -300,9 +300,9 @@ Dictionary rows record which terms are used in a particular field. The value ca |---------|------------| |```'d' ```|``````| - message DictionaryValue { - uint64 count = 1; // number of documents using this term in this field - } + message DictionaryValue { + optional uint64 count = 1; // number of documents using this term in this field + } #### Term Frequency @@ -314,18 +314,18 @@ Term Freqquency rows record which documents use a term in a particular field. T message TermVectorEntry { - uint16 field = 1; // field optional if redundant, required for composite fields - uint64 pos = 2; // positional offset within the field - uint64 start = 3; // start byte offset - uint64 end = 4; // end byte offset - repeated uint64 arrayPositions = 5; // array positions + optional uint32 field = 1; // field optional if redundant, required for composite fields + optional uint64 pos = 2; // positional offset within the field + optional uint64 start = 3; // start byte offset + optional uint64 end = 4; // end byte offset + repeated uint64 arrayPositions = 5; // array positions } - message DictionaryValue { - required uint64 freq = 1; // frequency of the term occurance within this field - float norm = 2; // normalization factor - repeated TermVectorEntry vectors = 3; // term vectors - } + message TermFrequencyValue { + required uint64 freq = 1; // frequency of the term occurance within this field + optional float norm = 2; // normalization factor + repeated TermVectorEntry vectors = 3; // term vectors + } #### Stored @@ -336,7 +336,7 @@ Stored rows record the original values used to produce the index. At the row en |```'s' 0xff ```|``````| message StoredValue { - bytes raw = 1; // raw bytes + optional bytes raw = 1; // raw bytes } NOTE: we currently encode stored values as raw bytes, however we have other proposals in flight to do something better than this. By using protobuf here as well, we can support existing functionality through the raw field, but allow for more strongly typed information in the future. @@ -349,8 +349,6 @@ Internal rows are a reserved keyspace which the layer above can use for anything |---------------------------|-------------------------| |```'i' ```|``````| - - ### FAQ 1. How do you ensure correct semantics while updating a document in the index? diff --git a/index/firestorm/firestorm_rows.pb.go b/index/firestorm/firestorm_rows.pb.go new file mode 100644 index 00000000..eb3c9af5 --- /dev/null +++ b/index/firestorm/firestorm_rows.pb.go @@ -0,0 +1,1125 @@ +// Code generated by protoc-gen-gogo. +// source: firestorm_rows.proto +// DO NOT EDIT! + +/* + Package firestorm_rows is a generated protocol buffer package. + + It is generated from these files: + firestorm_rows.proto + + It has these top-level messages: + VersionValue + FieldValue + DictionaryValue + TermVectorEntry + TermFrequencyValue + StoredValue +*/ +package firestorm_rows + +import proto "github.com/golang/protobuf/proto" +import math "math" + +import io "io" +import fmt "fmt" +import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = math.Inf + +type VersionValue struct { + Version *uint64 `protobuf:"varint,1,req,name=version" json:"version,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *VersionValue) Reset() { *m = VersionValue{} } +func (m *VersionValue) String() string { return proto.CompactTextString(m) } +func (*VersionValue) ProtoMessage() {} + +func (m *VersionValue) GetVersion() uint64 { + if m != nil && m.Version != nil { + return *m.Version + } + return 0 +} + +type FieldValue struct { + Name *string `protobuf:"bytes,1,req,name=name" json:"name,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FieldValue) Reset() { *m = FieldValue{} } +func (m *FieldValue) String() string { return proto.CompactTextString(m) } +func (*FieldValue) ProtoMessage() {} + +func (m *FieldValue) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +type DictionaryValue struct { + Count *uint64 `protobuf:"varint,1,opt,name=count" json:"count,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *DictionaryValue) Reset() { *m = DictionaryValue{} } +func (m *DictionaryValue) String() string { return proto.CompactTextString(m) } +func (*DictionaryValue) ProtoMessage() {} + +func (m *DictionaryValue) GetCount() uint64 { + if m != nil && m.Count != nil { + return *m.Count + } + return 0 +} + +type TermVectorEntry struct { + Field *uint32 `protobuf:"varint,1,opt,name=field" json:"field,omitempty"` + Pos *uint64 `protobuf:"varint,2,opt,name=pos" json:"pos,omitempty"` + Start *uint64 `protobuf:"varint,3,opt,name=start" json:"start,omitempty"` + End *uint64 `protobuf:"varint,4,opt,name=end" json:"end,omitempty"` + ArrayPositions []uint64 `protobuf:"varint,5,rep,name=arrayPositions" json:"arrayPositions,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *TermVectorEntry) Reset() { *m = TermVectorEntry{} } +func (m *TermVectorEntry) String() string { return proto.CompactTextString(m) } +func (*TermVectorEntry) ProtoMessage() {} + +func (m *TermVectorEntry) GetField() uint32 { + if m != nil && m.Field != nil { + return *m.Field + } + return 0 +} + +func (m *TermVectorEntry) GetPos() uint64 { + if m != nil && m.Pos != nil { + return *m.Pos + } + return 0 +} + +func (m *TermVectorEntry) GetStart() uint64 { + if m != nil && m.Start != nil { + return *m.Start + } + return 0 +} + +func (m *TermVectorEntry) GetEnd() uint64 { + if m != nil && m.End != nil { + return *m.End + } + return 0 +} + +func (m *TermVectorEntry) GetArrayPositions() []uint64 { + if m != nil { + return m.ArrayPositions + } + return nil +} + +type TermFrequencyValue struct { + Freq *uint64 `protobuf:"varint,1,req,name=freq" json:"freq,omitempty"` + Norm *float32 `protobuf:"fixed32,2,opt,name=norm" json:"norm,omitempty"` + Vectors []*TermVectorEntry `protobuf:"bytes,3,rep,name=vectors" json:"vectors,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *TermFrequencyValue) Reset() { *m = TermFrequencyValue{} } +func (m *TermFrequencyValue) String() string { return proto.CompactTextString(m) } +func (*TermFrequencyValue) ProtoMessage() {} + +func (m *TermFrequencyValue) GetFreq() uint64 { + if m != nil && m.Freq != nil { + return *m.Freq + } + return 0 +} + +func (m *TermFrequencyValue) GetNorm() float32 { + if m != nil && m.Norm != nil { + return *m.Norm + } + return 0 +} + +func (m *TermFrequencyValue) GetVectors() []*TermVectorEntry { + if m != nil { + return m.Vectors + } + return nil +} + +type StoredValue struct { + Raw []byte `protobuf:"bytes,1,opt,name=raw" json:"raw,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *StoredValue) Reset() { *m = StoredValue{} } +func (m *StoredValue) String() string { return proto.CompactTextString(m) } +func (*StoredValue) ProtoMessage() {} + +func (m *StoredValue) GetRaw() []byte { + if m != nil { + return m.Raw + } + return nil +} + +func (m *VersionValue) Unmarshal(data []byte) error { + var hasFields [1]uint64 + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Version", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Version = &v + hasFields[0] |= uint64(0x00000001) + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + iNdEx -= sizeOfWire + skippy, err := skipFirestormRows(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthFirestormRows + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + if hasFields[0]&uint64(0x00000001) == 0 { + return new(github_com_golang_protobuf_proto.RequiredNotSetError) + } + + return nil +} +func (m *FieldValue) Unmarshal(data []byte) error { + var hasFields [1]uint64 + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + postIndex := iNdEx + int(stringLen) + if postIndex > l { + return io.ErrUnexpectedEOF + } + s := string(data[iNdEx:postIndex]) + m.Name = &s + iNdEx = postIndex + hasFields[0] |= uint64(0x00000001) + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + iNdEx -= sizeOfWire + skippy, err := skipFirestormRows(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthFirestormRows + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + if hasFields[0]&uint64(0x00000001) == 0 { + return new(github_com_golang_protobuf_proto.RequiredNotSetError) + } + + return nil +} +func (m *DictionaryValue) Unmarshal(data []byte) error { + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Count", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Count = &v + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + iNdEx -= sizeOfWire + skippy, err := skipFirestormRows(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthFirestormRows + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + return nil +} +func (m *TermVectorEntry) Unmarshal(data []byte) error { + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) + } + var v uint32 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Field = &v + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pos", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Pos = &v + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Start", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Start = &v + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field End", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.End = &v + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.ArrayPositions = append(m.ArrayPositions, v) + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + iNdEx -= sizeOfWire + skippy, err := skipFirestormRows(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthFirestormRows + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + return nil +} +func (m *TermFrequencyValue) Unmarshal(data []byte) error { + var hasFields [1]uint64 + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Freq", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Freq = &v + hasFields[0] |= uint64(0x00000001) + case 2: + if wireType != 5 { + return fmt.Errorf("proto: wrong wireType = %d for field Norm", wireType) + } + var v uint32 + if (iNdEx + 4) > l { + return io.ErrUnexpectedEOF + } + iNdEx += 4 + v = uint32(data[iNdEx-4]) + v |= uint32(data[iNdEx-3]) << 8 + v |= uint32(data[iNdEx-2]) << 16 + v |= uint32(data[iNdEx-1]) << 24 + v2 := float32(math.Float32frombits(v)) + m.Norm = &v2 + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Vectors", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + postIndex := iNdEx + msglen + if msglen < 0 { + return ErrInvalidLengthFirestormRows + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Vectors = append(m.Vectors, &TermVectorEntry{}) + if err := m.Vectors[len(m.Vectors)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + iNdEx -= sizeOfWire + skippy, err := skipFirestormRows(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthFirestormRows + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + if hasFields[0]&uint64(0x00000001) == 0 { + return new(github_com_golang_protobuf_proto.RequiredNotSetError) + } + + return nil +} +func (m *StoredValue) Unmarshal(data []byte) error { + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Raw", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + byteLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthFirestormRows + } + postIndex := iNdEx + byteLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Raw = append([]byte{}, data[iNdEx:postIndex]...) + iNdEx = postIndex + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + iNdEx -= sizeOfWire + skippy, err := skipFirestormRows(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthFirestormRows + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + return nil +} +func skipFirestormRows(data []byte) (n int, err error) { + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for { + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if data[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthFirestormRows + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipFirestormRows(data[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthFirestormRows = fmt.Errorf("proto: negative length found during unmarshaling") +) + +func (m *VersionValue) Size() (n int) { + var l int + _ = l + if m.Version != nil { + n += 1 + sovFirestormRows(uint64(*m.Version)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *FieldValue) Size() (n int) { + var l int + _ = l + if m.Name != nil { + l = len(*m.Name) + n += 1 + l + sovFirestormRows(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *DictionaryValue) Size() (n int) { + var l int + _ = l + if m.Count != nil { + n += 1 + sovFirestormRows(uint64(*m.Count)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *TermVectorEntry) Size() (n int) { + var l int + _ = l + if m.Field != nil { + n += 1 + sovFirestormRows(uint64(*m.Field)) + } + if m.Pos != nil { + n += 1 + sovFirestormRows(uint64(*m.Pos)) + } + if m.Start != nil { + n += 1 + sovFirestormRows(uint64(*m.Start)) + } + if m.End != nil { + n += 1 + sovFirestormRows(uint64(*m.End)) + } + if len(m.ArrayPositions) > 0 { + for _, e := range m.ArrayPositions { + n += 1 + sovFirestormRows(uint64(e)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *TermFrequencyValue) Size() (n int) { + var l int + _ = l + if m.Freq != nil { + n += 1 + sovFirestormRows(uint64(*m.Freq)) + } + if m.Norm != nil { + n += 5 + } + if len(m.Vectors) > 0 { + for _, e := range m.Vectors { + l = e.Size() + n += 1 + l + sovFirestormRows(uint64(l)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *StoredValue) Size() (n int) { + var l int + _ = l + if m.Raw != nil { + l = len(m.Raw) + n += 1 + l + sovFirestormRows(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovFirestormRows(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozFirestormRows(x uint64) (n int) { + return sovFirestormRows(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *VersionValue) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *VersionValue) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if m.Version == nil { + return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) + } else { + data[i] = 0x8 + i++ + i = encodeVarintFirestormRows(data, i, uint64(*m.Version)) + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *FieldValue) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *FieldValue) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if m.Name == nil { + return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) + } else { + data[i] = 0xa + i++ + i = encodeVarintFirestormRows(data, i, uint64(len(*m.Name))) + i += copy(data[i:], *m.Name) + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *DictionaryValue) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *DictionaryValue) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if m.Count != nil { + data[i] = 0x8 + i++ + i = encodeVarintFirestormRows(data, i, uint64(*m.Count)) + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *TermVectorEntry) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *TermVectorEntry) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if m.Field != nil { + data[i] = 0x8 + i++ + i = encodeVarintFirestormRows(data, i, uint64(*m.Field)) + } + if m.Pos != nil { + data[i] = 0x10 + i++ + i = encodeVarintFirestormRows(data, i, uint64(*m.Pos)) + } + if m.Start != nil { + data[i] = 0x18 + i++ + i = encodeVarintFirestormRows(data, i, uint64(*m.Start)) + } + if m.End != nil { + data[i] = 0x20 + i++ + i = encodeVarintFirestormRows(data, i, uint64(*m.End)) + } + if len(m.ArrayPositions) > 0 { + for _, num := range m.ArrayPositions { + data[i] = 0x28 + i++ + i = encodeVarintFirestormRows(data, i, uint64(num)) + } + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *TermFrequencyValue) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *TermFrequencyValue) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if m.Freq == nil { + return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) + } else { + data[i] = 0x8 + i++ + i = encodeVarintFirestormRows(data, i, uint64(*m.Freq)) + } + if m.Norm != nil { + data[i] = 0x15 + i++ + i = encodeFixed32FirestormRows(data, i, uint32(math.Float32bits(*m.Norm))) + } + if len(m.Vectors) > 0 { + for _, msg := range m.Vectors { + data[i] = 0x1a + i++ + i = encodeVarintFirestormRows(data, i, uint64(msg.Size())) + n, err := msg.MarshalTo(data[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *StoredValue) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *StoredValue) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if m.Raw != nil { + data[i] = 0xa + i++ + i = encodeVarintFirestormRows(data, i, uint64(len(m.Raw))) + i += copy(data[i:], m.Raw) + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func encodeFixed64FirestormRows(data []byte, offset int, v uint64) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + data[offset+4] = uint8(v >> 32) + data[offset+5] = uint8(v >> 40) + data[offset+6] = uint8(v >> 48) + data[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32FirestormRows(data []byte, offset int, v uint32) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintFirestormRows(data []byte, offset int, v uint64) int { + for v >= 1<<7 { + data[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + data[offset] = uint8(v) + return offset + 1 +} diff --git a/index/firestorm/firestorm_rows.proto b/index/firestorm/firestorm_rows.proto index d24c7eec..a26541a3 100644 --- a/index/firestorm/firestorm_rows.proto +++ b/index/firestorm/firestorm_rows.proto @@ -7,5 +7,23 @@ message FieldValue { } message DictionaryValue { - uint64 count = 1; // number of documents using this term in this field + optional uint64 count = 1; // number of documents using this term in this field +} + +message TermVectorEntry { + optional uint32 field = 1; // field optional if redundant, required for composite fields + optional uint64 pos = 2; // positional offset within the field + optional uint64 start = 3; // start byte offset + optional uint64 end = 4; // end byte offset + repeated uint64 arrayPositions = 5; // array positions +} + +message TermFrequencyValue { + required uint64 freq = 1; // frequency of the term occurance within this field + optional float norm = 2; // normalization factor + repeated TermVectorEntry vectors = 3; // term vectors +} + +message StoredValue { + optional bytes raw = 1; // raw bytes } \ No newline at end of file