0
0
bleve/index/upsidedown/row.go
abhinavdangeti 7e36109b3c MB-28162: Provide API to estimate memory needed to run a search query
This API (unexported) will estimate the amount of memory needed to execute
a search query over an index before the collector begins data collection.

Sample estimates for certain queries:
{Size: 10, BenchmarkUpsidedownSearchOverhead}
                                                           ESTIMATE    BENCHMEM
TermQuery                                                  4616        4796
MatchQuery                                                 5210        5405
DisjunctionQuery (Match queries)                           7700        8447
DisjunctionQuery (Term queries)                            6514        6591
ConjunctionQuery (Match queries)                           7524        8175
Nested disjunction query (disjunction of disjunctions)     10306       10708
…
2018-03-06 13:53:42 -08:00

1142 lines
27 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"reflect"
"github.com/blevesearch/bleve/size"
"github.com/golang/protobuf/proto"
)
var reflectStaticSizeTermFrequencyRow int
var reflectStaticSizeTermVector int
func init() {
var tfr TermFrequencyRow
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
var tv TermVector
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
}
const ByteSeparator byte = 0xff
type UpsideDownCouchRowStream chan UpsideDownCouchRow
type UpsideDownCouchRow interface {
KeySize() int
KeyTo([]byte) (int, error)
Key() []byte
Value() []byte
ValueSize() int
ValueTo([]byte) (int, error)
}
func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) {
if len(key) > 0 {
switch key[0] {
case 'v':
return NewVersionRowKV(key, value)
case 'f':
return NewFieldRowKV(key, value)
case 'd':
return NewDictionaryRowKV(key, value)
case 't':
return NewTermFrequencyRowKV(key, value)
case 'b':
return NewBackIndexRowKV(key, value)
case 's':
return NewStoredRowKV(key, value)
case 'i':
return NewInternalRowKV(key, value)
}
return nil, fmt.Errorf("Unknown field type '%s'", string(key[0]))
}
return nil, fmt.Errorf("Invalid empty key")
}
// VERSION
type VersionRow struct {
version uint8
}
func (v *VersionRow) Key() []byte {
return []byte{'v'}
}
func (v *VersionRow) KeySize() int {
return 1
}
func (v *VersionRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'v'
return 1, nil
}
func (v *VersionRow) Value() []byte {
return []byte{byte(v.version)}
}
func (v *VersionRow) ValueSize() int {
return 1
}
func (v *VersionRow) ValueTo(buf []byte) (int, error) {
buf[0] = v.version
return 1, nil
}
func (v *VersionRow) String() string {
return fmt.Sprintf("Version: %d", v.version)
}
func NewVersionRow(version uint8) *VersionRow {
return &VersionRow{
version: version,
}
}
func NewVersionRowKV(key, value []byte) (*VersionRow, error) {
rv := VersionRow{}
buf := bytes.NewBuffer(value)
err := binary.Read(buf, binary.LittleEndian, &rv.version)
if err != nil {
return nil, err
}
return &rv, nil
}
// INTERNAL STORAGE
type InternalRow struct {
key []byte
val []byte
}
func (i *InternalRow) Key() []byte {
buf := make([]byte, i.KeySize())
size, _ := i.KeyTo(buf)
return buf[:size]
}
func (i *InternalRow) KeySize() int {
return len(i.key) + 1
}
func (i *InternalRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'i'
actual := copy(buf[1:], i.key)
return 1 + actual, nil
}
func (i *InternalRow) Value() []byte {
return i.val
}
func (i *InternalRow) ValueSize() int {
return len(i.val)
}
func (i *InternalRow) ValueTo(buf []byte) (int, error) {
actual := copy(buf, i.val)
return actual, nil
}
func (i *InternalRow) String() string {
return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val)
}
func NewInternalRow(key, val []byte) *InternalRow {
return &InternalRow{
key: key,
val: val,
}
}
func NewInternalRowKV(key, value []byte) (*InternalRow, error) {
rv := InternalRow{}
rv.key = key[1:]
rv.val = value
return &rv, nil
}
// FIELD definition
type FieldRow struct {
index uint16
name string
}
func (f *FieldRow) Key() []byte {
buf := make([]byte, f.KeySize())
size, _ := f.KeyTo(buf)
return buf[:size]
}
func (f *FieldRow) KeySize() int {
return 3
}
func (f *FieldRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'f'
binary.LittleEndian.PutUint16(buf[1:3], f.index)
return 3, nil
}
func (f *FieldRow) Value() []byte {
return append([]byte(f.name), ByteSeparator)
}
func (f *FieldRow) ValueSize() int {
return len(f.name) + 1
}
func (f *FieldRow) ValueTo(buf []byte) (int, error) {
size := copy(buf, f.name)
buf[size] = ByteSeparator
return size + 1, nil
}
func (f *FieldRow) String() string {
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
}
func NewFieldRow(index uint16, name string) *FieldRow {
return &FieldRow{
index: index,
name: name,
}
}
func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
rv := FieldRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
err = binary.Read(buf, binary.LittleEndian, &rv.index)
if err != nil {
return nil, err
}
buf = bytes.NewBuffer(value)
rv.name, err = buf.ReadString(ByteSeparator)
if err != nil {
return nil, err
}
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
return &rv, nil
}
// DICTIONARY
const DictionaryRowMaxValueSize = binary.MaxVarintLen64
type DictionaryRow struct {
term []byte
count uint64
field uint16
}
func (dr *DictionaryRow) Key() []byte {
buf := make([]byte, dr.KeySize())
size, _ := dr.KeyTo(buf)
return buf[:size]
}
func (dr *DictionaryRow) KeySize() int {
return dictionaryRowKeySize(dr.term)
}
func dictionaryRowKeySize(term []byte) int {
return len(term) + 3
}
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
}
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], field)
size := copy(buf[3:], term)
return size + 3
}
func (dr *DictionaryRow) Value() []byte {
buf := make([]byte, dr.ValueSize())
size, _ := dr.ValueTo(buf)
return buf[:size]
}
func (dr *DictionaryRow) ValueSize() int {
return DictionaryRowMaxValueSize
}
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) {
used := binary.PutUvarint(buf, dr.count)
return used, nil
}
func (dr *DictionaryRow) String() string {
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count)
}
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow {
return &DictionaryRow{
term: term,
field: field,
count: count,
}
}
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
rv, err := NewDictionaryRowK(key)
if err != nil {
return nil, err
}
err = rv.parseDictionaryV(value)
if err != nil {
return nil, err
}
return rv, nil
}
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
rv := &DictionaryRow{}
err := rv.parseDictionaryK(key)
if err != nil {
return nil, err
}
return rv, nil
}
func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
dr.field = binary.LittleEndian.Uint16(key[1:3])
if dr.term != nil {
dr.term = dr.term[:0]
}
dr.term = append(dr.term, key[3:]...)
return nil
}
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
count, err := dictionaryRowParseV(value)
if err != nil {
return err
}
dr.count = count
return nil
}
func dictionaryRowParseV(value []byte) (uint64, error) {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
}
return count, nil
}
// TERM FIELD FREQUENCY
type TermVector struct {
field uint16
arrayPositions []uint64
pos uint64
start uint64
end uint64
}
func (tv *TermVector) Size() int {
return reflectStaticSizeTermVector + size.SizeOfPtr +
len(tv.arrayPositions)*size.SizeOfUint64
}
func (tv *TermVector) String() string {
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
}
type TermFrequencyRow struct {
term []byte
doc []byte
freq uint64
vectors []*TermVector
norm float32
field uint16
}
func (tfr *TermFrequencyRow) Size() int {
sizeInBytes := reflectStaticSizeTermFrequencyRow +
len(tfr.term) +
len(tfr.doc)
for _, entry := range tfr.vectors {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (tfr *TermFrequencyRow) Term() []byte {
return tfr.term
}
func (tfr *TermFrequencyRow) Freq() uint64 {
return tfr.freq
}
func (tfr *TermFrequencyRow) ScanPrefixForField() []byte {
buf := make([]byte, 3)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
return buf
}
func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte {
buf := make([]byte, 3+len(tfr.term))
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
copy(buf[3:], tfr.term)
return buf
}
func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte {
buf := make([]byte, 3+len(tfr.term)+1)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
buf[3+termLen] = ByteSeparator
return buf
}
func (tfr *TermFrequencyRow) Key() []byte {
buf := make([]byte, tfr.KeySize())
size, _ := tfr.KeyTo(buf)
return buf[:size]
}
func (tfr *TermFrequencyRow) KeySize() int {
return termFrequencyRowKeySize(tfr.term, tfr.doc)
}
func termFrequencyRowKeySize(term, doc []byte) int {
return 3 + len(term) + 1 + len(doc)
}
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
}
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], field)
termLen := copy(buf[3:], term)
buf[3+termLen] = ByteSeparator
docLen := copy(buf[3+termLen+1:], doc)
return 3 + termLen + 1 + docLen
}
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
keySize := tfr.KeySize()
if cap(buf) < keySize {
buf = make([]byte, keySize)
}
actualSize, err := tfr.KeyTo(buf[0:keySize])
return buf[0:actualSize], err
}
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.Key()
}
func (tfr *TermFrequencyRow) DictionaryRowKeySize() int {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.KeySize()
}
func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.KeyTo(buf)
}
func (tfr *TermFrequencyRow) Value() []byte {
buf := make([]byte, tfr.ValueSize())
size, _ := tfr.ValueTo(buf)
return buf[:size]
}
func (tfr *TermFrequencyRow) ValueSize() int {
bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64
for _, vector := range tfr.vectors {
bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64
}
return bufLen
}
func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) {
used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq)
normuint32 := math.Float32bits(tfr.norm)
newbuf := buf[used : used+binary.MaxVarintLen64]
used += binary.PutUvarint(newbuf, uint64(normuint32))
for _, vector := range tfr.vectors {
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field))
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions)))
for _, arrayPosition := range vector.arrayPositions {
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition)
}
}
return used, nil
}
func (tfr *TermFrequencyRow) String() string {
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
}
func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
tfr.term = term
tfr.field = field
tfr.doc = docID
tfr.freq = freq
tfr.norm = norm
return tfr
}
func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: docID,
freq: freq,
norm: norm,
}
}
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: docID,
freq: freq,
norm: norm,
vectors: vectors,
}
}
func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) {
rv := &TermFrequencyRow{}
err := rv.parseK(key)
if err != nil {
return nil, err
}
return rv, nil
}
func (tfr *TermFrequencyRow) parseK(key []byte) error {
keyLen := len(key)
if keyLen < 3 {
return fmt.Errorf("invalid term frequency key, no valid field")
}
tfr.field = binary.LittleEndian.Uint16(key[1:3])
termEndPos := bytes.IndexByte(key[3:], ByteSeparator)
if termEndPos < 0 {
return fmt.Errorf("invalid term frequency key, no byte separator terminating term")
}
tfr.term = key[3 : 3+termEndPos]
docLen := keyLen - (3 + termEndPos + 1)
if docLen < 1 {
return fmt.Errorf("invalid term frequency key, empty docid")
}
tfr.doc = key[3+termEndPos+1:]
return nil
}
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
tfr.doc = key[3+len(term)+1:]
if len(tfr.doc) <= 0 {
return fmt.Errorf("invalid term frequency key, empty docid")
}
return nil
}
func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error {
var bytesRead int
tfr.freq, bytesRead = binary.Uvarint(value)
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, invalid frequency")
}
currOffset := bytesRead
var norm uint64
norm, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, no norm")
}
currOffset += bytesRead
tfr.norm = math.Float32frombits(uint32(norm))
tfr.vectors = nil
if !includeTermVectors {
return nil
}
var field uint64
field, bytesRead = binary.Uvarint(value[currOffset:])
for bytesRead > 0 {
currOffset += bytesRead
tv := TermVector{}
tv.field = uint16(field)
// at this point we expect at least one term vector
if tfr.vectors == nil {
tfr.vectors = make([]*TermVector, 0)
}
tv.pos, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no position")
}
currOffset += bytesRead
tv.start, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no start")
}
currOffset += bytesRead
tv.end, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no end")
}
currOffset += bytesRead
var arrayPositionsLen uint64 = 0
arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen")
}
currOffset += bytesRead
if arrayPositionsLen > 0 {
tv.arrayPositions = make([]uint64, arrayPositionsLen)
for i := 0; uint64(i) < arrayPositionsLen; i++ {
tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i)
}
currOffset += bytesRead
}
}
tfr.vectors = append(tfr.vectors, &tv)
// try to read next record (may not exist)
field, bytesRead = binary.Uvarint(value[currOffset:])
}
if len(value[currOffset:]) > 0 && bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector field invalid")
}
return nil
}
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
rv, err := NewTermFrequencyRowK(key)
if err != nil {
return nil, err
}
err = rv.parseV(value, true)
if err != nil {
return nil, err
}
return rv, nil
}
type BackIndexRow struct {
doc []byte
termsEntries []*BackIndexTermsEntry
storedEntries []*BackIndexStoreEntry
}
func (br *BackIndexRow) AllTermKeys() [][]byte {
if br == nil {
return nil
}
rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely
for _, termsEntry := range br.termsEntries {
for i := range termsEntry.Terms {
termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0)
rv = append(rv, termRow.Key())
}
}
return rv
}
func (br *BackIndexRow) AllStoredKeys() [][]byte {
if br == nil {
return nil
}
rv := make([][]byte, len(br.storedEntries))
for i, storedEntry := range br.storedEntries {
storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
rv[i] = storedRow.Key()
}
return rv
}
func (br *BackIndexRow) Key() []byte {
buf := make([]byte, br.KeySize())
size, _ := br.KeyTo(buf)
return buf[:size]
}
func (br *BackIndexRow) KeySize() int {
return len(br.doc) + 1
}
func (br *BackIndexRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'b'
used := copy(buf[1:], br.doc)
return used + 1, nil
}
func (br *BackIndexRow) Value() []byte {
buf := make([]byte, br.ValueSize())
size, _ := br.ValueTo(buf)
return buf[:size]
}
func (br *BackIndexRow) ValueSize() int {
birv := &BackIndexRowValue{
TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries,
}
return birv.Size()
}
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) {
birv := &BackIndexRowValue{
TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries,
}
return birv.MarshalTo(buf)
}
func (br *BackIndexRow) String() string {
return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries)
}
func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
return &BackIndexRow{
doc: docID,
termsEntries: entries,
storedEntries: storedFields,
}
}
func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
rv := BackIndexRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
rv.doc, err = buf.ReadBytes(ByteSeparator)
if err == io.EOF && len(rv.doc) < 1 {
err = fmt.Errorf("invalid doc length 0 - % x", key)
}
if err != nil && err != io.EOF {
return nil, err
} else if err == nil {
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
}
var birv BackIndexRowValue
err = proto.Unmarshal(value, &birv)
if err != nil {
return nil, err
}
rv.termsEntries = birv.TermsEntries
rv.storedEntries = birv.StoredEntries
return &rv, nil
}
// STORED
type StoredRow struct {
doc []byte
field uint16
arrayPositions []uint64
typ byte
value []byte
}
func (s *StoredRow) Key() []byte {
buf := make([]byte, s.KeySize())
size, _ := s.KeyTo(buf)
return buf[0:size]
}
func (s *StoredRow) KeySize() int {
return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions))
}
func (s *StoredRow) KeyTo(buf []byte) (int, error) {
docLen := len(s.doc)
buf[0] = 's'
copy(buf[1:], s.doc)
buf[1+docLen] = ByteSeparator
binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field)
bytesUsed := 1 + docLen + 1 + 2
for _, arrayPosition := range s.arrayPositions {
varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition)
bytesUsed += varbytes
}
return bytesUsed, nil
}
func (s *StoredRow) Value() []byte {
buf := make([]byte, s.ValueSize())
size, _ := s.ValueTo(buf)
return buf[:size]
}
func (s *StoredRow) ValueSize() int {
return len(s.value) + 1
}
func (s *StoredRow) ValueTo(buf []byte) (int, error) {
buf[0] = s.typ
used := copy(buf[1:], s.value)
return used + 1, nil
}
func (s *StoredRow) String() string {
return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value)
}
func (s *StoredRow) ScanPrefixForDoc() []byte {
docLen := len(s.doc)
buf := make([]byte, 1+docLen+1)
buf[0] = 's'
copy(buf[1:], s.doc)
buf[1+docLen] = ByteSeparator
return buf
}
func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
return &StoredRow{
doc: docID,
field: field,
arrayPositions: arrayPositions,
typ: typ,
value: value,
}
}
func NewStoredRowK(key []byte) (*StoredRow, error) {
rv := StoredRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
rv.doc, err = buf.ReadBytes(ByteSeparator)
if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator
err = fmt.Errorf("invalid doc length 0")
return nil, err
}
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
err = binary.Read(buf, binary.LittleEndian, &rv.field)
if err != nil {
return nil, err
}
rv.arrayPositions = make([]uint64, 0)
nextArrayPos, err := binary.ReadUvarint(buf)
for err == nil {
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos)
nextArrayPos, err = binary.ReadUvarint(buf)
}
return &rv, nil
}
func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
rv, err := NewStoredRowK(key)
if err != nil {
return nil, err
}
rv.typ = value[0]
rv.value = value[1:]
return rv, nil
}
type backIndexFieldTermVisitor func(field uint32, term []byte)
// visitBackIndexRow is designed to process a protobuf encoded
// value, without creating unnecessary garbage. Instead values are passed
// to a callback, inspected first, and only copied if necessary.
// Due to the fact that this borrows from generated code, it must be marnually
// updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexRowValue) Unmarshal(data []byte) error
// the sections which create garbage or parse unintersting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// dont parse term entries
// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
// instead, inspect them
if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// don't parse stored entries
// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
// don't track unrecognized data
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
// Instead values are passed to a callback, inspected first, and only copied if
// necessary. Due to the fact that this borrows from generated code, it must
// be marnually updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
// the sections which create garbage or parse uninteresting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
var theField uint32
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
// m.Field = &v
theField = v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + int(stringLen)
if postIndex > l {
return io.ErrUnexpectedEOF
}
//m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
callback(theField, data[iNdEx:postIndex])
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
// if hasFields[0]&uint64(0x00000001) == 0 {
// return new(github_com_golang_protobuf_proto.RequiredNotSetError)
// }
return nil
}