Merge branch 'master' into avoid-app-herder-hot-lock
This commit is contained in:
commit
f1c26e29f0
|
@ -209,9 +209,7 @@ var docvalueCmd = &cobra.Command{
|
|||
for i := 0; i < int(numDocs); i++ {
|
||||
curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(nread)
|
||||
curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(nread)
|
||||
curChunkHeader[i].DocDvLen, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
curChunkHeader[i].DocDvOffset, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(nread)
|
||||
}
|
||||
|
||||
|
@ -255,7 +253,7 @@ func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64)
|
|||
return metaHeader[i].DocNum >= docNum
|
||||
})
|
||||
if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
|
||||
return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
|
||||
return zap.ReadDocValueBoundary(i, metaHeader)
|
||||
}
|
||||
return math.MaxUint64, math.MaxUint64
|
||||
}
|
||||
|
|
|
@ -94,8 +94,6 @@ type IndexReader interface {
|
|||
DumpFields() chan interface{}
|
||||
|
||||
Close() error
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
|
|
|
@ -22,7 +22,7 @@ import (
|
|||
"github.com/Smerity/govarint"
|
||||
)
|
||||
|
||||
const version uint32 = 4
|
||||
const version uint32 = 6
|
||||
|
||||
const fieldNotUninverted = math.MaxUint64
|
||||
|
||||
|
|
|
@ -47,9 +47,8 @@ type chunkedContentCoder struct {
|
|||
// MetaData represents the data information inside a
|
||||
// chunk.
|
||||
type MetaData struct {
|
||||
DocNum uint64 // docNum of the data inside the chunk
|
||||
DocDvLoc uint64 // starting offset for a given docid
|
||||
DocDvLen uint64 // length of data inside the chunk for the given docid
|
||||
DocNum uint64 // docNum of the data inside the chunk
|
||||
DocDvOffset uint64 // offset of data inside the chunk for the given docid
|
||||
}
|
||||
|
||||
// newChunkedContentCoder returns a new chunk content coder which
|
||||
|
@ -96,7 +95,7 @@ func (c *chunkedContentCoder) flushContents() error {
|
|||
|
||||
// write out the metaData slice
|
||||
for _, meta := range c.chunkMeta {
|
||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
|
||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -130,7 +129,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
|||
c.currChunk = chunk
|
||||
}
|
||||
|
||||
// mark the starting offset for this doc
|
||||
// get the starting offset for this doc
|
||||
dvOffset := c.chunkBuf.Len()
|
||||
dvSize, err := c.chunkBuf.Write(vals)
|
||||
if err != nil {
|
||||
|
@ -138,9 +137,8 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
|||
}
|
||||
|
||||
c.chunkMeta = append(c.chunkMeta, MetaData{
|
||||
DocNum: docNum,
|
||||
DocDvLoc: uint64(dvOffset),
|
||||
DocDvLen: uint64(dvSize),
|
||||
DocNum: docNum,
|
||||
DocDvOffset: uint64(dvOffset + dvSize),
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
@ -156,9 +154,11 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
|
|||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
// write out the chunk lens
|
||||
for _, chunkLen := range c.chunkLens {
|
||||
n := binary.PutUvarint(buf, uint64(chunkLen))
|
||||
|
||||
chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
|
||||
// write out the chunk offsets
|
||||
for _, chunkOffset := range chunkOffsets {
|
||||
n := binary.PutUvarint(buf, chunkOffset)
|
||||
nw, err = w.Write(buf[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
|
@ -173,3 +173,13 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
|
|||
}
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
// ReadDocValueBoundary elicits the start, end offsets from a
|
||||
// metaData header slice
|
||||
func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
|
||||
var start uint64
|
||||
if chunk > 0 {
|
||||
start = metaHeaders[chunk-1].DocDvOffset
|
||||
}
|
||||
return start, metaHeaders[chunk].DocDvOffset
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ func TestChunkContentCoder(t *testing.T) {
|
|||
docNums: []uint64{0},
|
||||
vals: [][]byte{[]byte("bleve")},
|
||||
// 1 chunk, chunk-0 length 11(b), value
|
||||
expected: string([]byte{0x1, 0xb, 0x1, 0x0, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
|
||||
expected: string([]byte{0x1, 0xa, 0x1, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
|
||||
},
|
||||
{
|
||||
maxDocNum: 1,
|
||||
|
@ -46,8 +46,8 @@ func TestChunkContentCoder(t *testing.T) {
|
|||
[]byte("scorch"),
|
||||
},
|
||||
|
||||
expected: string([]byte{0x02, 0x0c, 0x0c, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14,
|
||||
0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x00, 0x06, 0x06,
|
||||
expected: string([]byte{0x02, 0x0b, 0x16, 0x01, 0x00, 0x06, 0x06, 0x14,
|
||||
0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x06, 0x06,
|
||||
0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}),
|
||||
},
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ func TestChunkContentCoder(t *testing.T) {
|
|||
}
|
||||
|
||||
if !reflect.DeepEqual(test.expected, string(actual.Bytes())) {
|
||||
t.Errorf("got % s, expected % s", string(actual.Bytes()), test.expected)
|
||||
t.Errorf("got:%s, expected:%s", string(actual.Bytes()), test.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ type docValueIterator struct {
|
|||
field string
|
||||
curChunkNum uint64
|
||||
numChunks uint64
|
||||
chunkLens []uint64
|
||||
chunkOffsets []uint64
|
||||
dvDataLoc uint64
|
||||
curChunkHeader []MetaData
|
||||
curChunkData []byte // compressed data cache
|
||||
|
@ -47,7 +47,7 @@ type docValueIterator struct {
|
|||
func (di *docValueIterator) size() int {
|
||||
return reflectStaticSizedocValueIterator + size.SizeOfPtr +
|
||||
len(di.field) +
|
||||
len(di.chunkLens)*size.SizeOfUint64 +
|
||||
len(di.chunkOffsets)*size.SizeOfUint64 +
|
||||
len(di.curChunkHeader)*reflectStaticSizeMetaData +
|
||||
len(di.curChunkData)
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
|
|||
}
|
||||
|
||||
// read the number of chunks, chunk lengths
|
||||
var offset, clen uint64
|
||||
var offset, loc uint64
|
||||
numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
|
||||
if read <= 0 {
|
||||
return nil, fmt.Errorf("failed to read the field "+
|
||||
|
@ -78,16 +78,16 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
|
|||
offset += uint64(read)
|
||||
|
||||
fdvIter := &docValueIterator{
|
||||
curChunkNum: math.MaxUint64,
|
||||
field: field,
|
||||
chunkLens: make([]uint64, int(numChunks)),
|
||||
curChunkNum: math.MaxUint64,
|
||||
field: field,
|
||||
chunkOffsets: make([]uint64, int(numChunks)),
|
||||
}
|
||||
for i := 0; i < int(numChunks); i++ {
|
||||
clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
|
||||
loc, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
|
||||
if read <= 0 {
|
||||
return nil, fmt.Errorf("corrupted chunk length during segment load")
|
||||
return nil, fmt.Errorf("corrupted chunk offset during segment load")
|
||||
}
|
||||
fdvIter.chunkLens[i] = clen
|
||||
fdvIter.chunkOffsets[i] = loc
|
||||
offset += uint64(read)
|
||||
}
|
||||
|
||||
|
@ -99,12 +99,11 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
|||
localDocNum uint64, s *SegmentBase) error {
|
||||
// advance to the chunk where the docValues
|
||||
// reside for the given docNum
|
||||
destChunkDataLoc := di.dvDataLoc
|
||||
for i := 0; i < int(chunkNumber); i++ {
|
||||
destChunkDataLoc += di.chunkLens[i]
|
||||
}
|
||||
destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
|
||||
start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
|
||||
destChunkDataLoc += start
|
||||
curChunkEnd += end
|
||||
|
||||
curChunkSize := di.chunkLens[chunkNumber]
|
||||
// read the number of docs reside in the chunk
|
||||
numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
|
||||
if read <= 0 {
|
||||
|
@ -117,14 +116,12 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
|||
for i := 0; i < int(numDocs); i++ {
|
||||
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
}
|
||||
|
||||
compressedDataLoc := chunkMetaLoc + offset
|
||||
dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
|
||||
dataLength := curChunkEnd - compressedDataLoc
|
||||
di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
|
||||
di.curChunkNum = chunkNumber
|
||||
return nil
|
||||
|
@ -133,8 +130,8 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
|||
func (di *docValueIterator) visitDocValues(docNum uint64,
|
||||
visitor index.DocumentFieldTermVisitor) error {
|
||||
// binary search the term locations for the docNum
|
||||
start, length := di.getDocValueLocs(docNum)
|
||||
if start == math.MaxUint64 || length == math.MaxUint64 {
|
||||
start, end := di.getDocValueLocs(docNum)
|
||||
if start == math.MaxUint64 || end == math.MaxUint64 {
|
||||
return nil
|
||||
}
|
||||
// uncompress the already loaded data
|
||||
|
@ -144,7 +141,7 @@ func (di *docValueIterator) visitDocValues(docNum uint64,
|
|||
}
|
||||
|
||||
// pick the terms for the given docNum
|
||||
uncompressed = uncompressed[start : start+length]
|
||||
uncompressed = uncompressed[start:end]
|
||||
for {
|
||||
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
||||
if i < 0 {
|
||||
|
@ -163,7 +160,7 @@ func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
|||
return di.curChunkHeader[i].DocNum >= docNum
|
||||
})
|
||||
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
|
||||
return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
|
||||
return ReadDocValueBoundary(i, di.curChunkHeader)
|
||||
}
|
||||
return math.MaxUint64, math.MaxUint64
|
||||
}
|
||||
|
|
|
@ -111,10 +111,13 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
|
|||
}
|
||||
buf := c.buf
|
||||
|
||||
// write out the number of chunks & each chunkLen
|
||||
n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
|
||||
for _, chunkLen := range c.chunkLens {
|
||||
n += binary.PutUvarint(buf[n:], uint64(chunkLen))
|
||||
// convert the chunk lengths into chunk offsets
|
||||
chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
|
||||
|
||||
// write out the number of chunks & each chunk offsets
|
||||
n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
|
||||
for _, chunkOffset := range chunkOffsets {
|
||||
n += binary.PutUvarint(buf[n:], chunkOffset)
|
||||
}
|
||||
|
||||
tw, err := w.Write(buf[:n])
|
||||
|
@ -134,3 +137,36 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
|
|||
func (c *chunkedIntCoder) FinalSize() int {
|
||||
return len(c.final)
|
||||
}
|
||||
|
||||
// modifyLengthsToEndOffsets converts the chunk length array
|
||||
// to a chunk offset array. The readChunkBoundary
|
||||
// will figure out the start and end of every chunk from
|
||||
// these offsets. Starting offset of i'th index is stored
|
||||
// in i-1'th position except for 0'th index and ending offset
|
||||
// is stored at i'th index position.
|
||||
// For 0'th element, starting position is always zero.
|
||||
// eg:
|
||||
// Lens -> 5 5 5 5 => 5 10 15 20
|
||||
// Lens -> 0 5 0 5 => 0 5 5 10
|
||||
// Lens -> 0 0 0 5 => 0 0 0 5
|
||||
// Lens -> 5 0 0 0 => 5 5 5 5
|
||||
// Lens -> 0 5 0 0 => 0 5 5 5
|
||||
// Lens -> 0 0 5 0 => 0 0 5 5
|
||||
func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
|
||||
var runningOffset uint64
|
||||
var index, i int
|
||||
for i = 1; i <= len(lengths); i++ {
|
||||
runningOffset += lengths[i-1]
|
||||
lengths[index] = runningOffset
|
||||
index++
|
||||
}
|
||||
return lengths
|
||||
}
|
||||
|
||||
func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
|
||||
var start uint64
|
||||
if chunk > 0 {
|
||||
start = offsets[chunk-1]
|
||||
}
|
||||
return start, offsets[chunk]
|
||||
}
|
||||
|
|
|
@ -46,8 +46,8 @@ func TestChunkIntCoder(t *testing.T) {
|
|||
[]uint64{3},
|
||||
[]uint64{7},
|
||||
},
|
||||
// 2 chunks, chunk-0 length 1, chunk-1 length 1, value 3, value 7
|
||||
expected: []byte{0x2, 0x1, 0x1, 0x3, 0x7},
|
||||
// 2 chunks, chunk-0 offset 1, chunk-1 offset 2, value 3, value 7
|
||||
expected: []byte{0x2, 0x1, 0x2, 0x3, 0x7},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -71,3 +71,199 @@ func TestChunkIntCoder(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestChunkLengthToOffsets(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
lengths []uint64
|
||||
expectedOffsets []uint64
|
||||
}{
|
||||
{
|
||||
lengths: []uint64{5, 5, 5, 5, 5},
|
||||
expectedOffsets: []uint64{5, 10, 15, 20, 25},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{0, 5, 0, 5, 0},
|
||||
expectedOffsets: []uint64{0, 5, 5, 10, 10},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{0, 0, 0, 0, 5},
|
||||
expectedOffsets: []uint64{0, 0, 0, 0, 5},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{5, 0, 0, 0, 0},
|
||||
expectedOffsets: []uint64{5, 5, 5, 5, 5},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{0, 5, 0, 0, 0},
|
||||
expectedOffsets: []uint64{0, 5, 5, 5, 5},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{0, 0, 0, 5, 0},
|
||||
expectedOffsets: []uint64{0, 0, 0, 5, 5},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{0, 0, 0, 5, 5},
|
||||
expectedOffsets: []uint64{0, 0, 0, 5, 10},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{5, 5, 5, 0, 0},
|
||||
expectedOffsets: []uint64{5, 10, 15, 15, 15},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{5},
|
||||
expectedOffsets: []uint64{5},
|
||||
},
|
||||
{
|
||||
lengths: []uint64{5, 5},
|
||||
expectedOffsets: []uint64{5, 10},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
modifyLengthsToEndOffsets(test.lengths)
|
||||
if !reflect.DeepEqual(test.expectedOffsets, test.lengths) {
|
||||
t.Errorf("Test: %d failed, got %+v, expected %+v", i, test.lengths, test.expectedOffsets)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestChunkReadBoundaryFromOffsets(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
chunkNumber int
|
||||
offsets []uint64
|
||||
expectedStart uint64
|
||||
expectedEnd uint64
|
||||
}{
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 20, 25},
|
||||
chunkNumber: 4,
|
||||
expectedStart: 20,
|
||||
expectedEnd: 25,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 20, 25},
|
||||
chunkNumber: 0,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 20, 25},
|
||||
chunkNumber: 2,
|
||||
expectedStart: 10,
|
||||
expectedEnd: 15,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 5, 5, 10, 10},
|
||||
chunkNumber: 4,
|
||||
expectedStart: 10,
|
||||
expectedEnd: 10,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 5, 5, 10, 10},
|
||||
chunkNumber: 1,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 5, 5, 5, 5},
|
||||
chunkNumber: 0,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 5, 5, 5, 5},
|
||||
chunkNumber: 4,
|
||||
expectedStart: 5,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 5, 5, 5, 5},
|
||||
chunkNumber: 1,
|
||||
expectedStart: 5,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 5, 5, 5, 5},
|
||||
chunkNumber: 1,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 5, 5, 5, 5},
|
||||
chunkNumber: 0,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 0, 0, 5, 5},
|
||||
chunkNumber: 2,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 0, 0, 5, 5},
|
||||
chunkNumber: 1,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 0, 0, 0, 5},
|
||||
chunkNumber: 4,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 0, 0, 0, 5},
|
||||
chunkNumber: 2,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 15, 15},
|
||||
chunkNumber: 0,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 15, 15},
|
||||
chunkNumber: 1,
|
||||
expectedStart: 5,
|
||||
expectedEnd: 10,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 15, 15},
|
||||
chunkNumber: 2,
|
||||
expectedStart: 10,
|
||||
expectedEnd: 15,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 15, 15},
|
||||
chunkNumber: 3,
|
||||
expectedStart: 15,
|
||||
expectedEnd: 15,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5, 10, 15, 15, 15},
|
||||
chunkNumber: 4,
|
||||
expectedStart: 15,
|
||||
expectedEnd: 15,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{5},
|
||||
chunkNumber: 0,
|
||||
expectedStart: 0,
|
||||
expectedEnd: 5,
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
s, e := readChunkBoundary(test.chunkNumber, test.offsets)
|
||||
if test.expectedStart != s || test.expectedEnd != e {
|
||||
t.Errorf("Test: %d failed for chunkNumber: %d got start: %d end: %d,"+
|
||||
" expected start: %d end: %d", i, test.chunkNumber, s, e,
|
||||
test.expectedStart, test.expectedEnd)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -189,9 +189,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
|||
var numFreqChunks uint64
|
||||
numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
rv.freqChunkLens = make([]uint64, int(numFreqChunks))
|
||||
rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
|
||||
for i := 0; i < int(numFreqChunks); i++ {
|
||||
rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
|
||||
rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
}
|
||||
rv.freqChunkStart = p.freqOffset + n
|
||||
|
@ -201,9 +201,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
|
|||
var numLocChunks uint64
|
||||
numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
rv.locChunkLens = make([]uint64, int(numLocChunks))
|
||||
rv.locChunkOffsets = make([]uint64, int(numLocChunks))
|
||||
for i := 0; i < int(numLocChunks); i++ {
|
||||
rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
|
||||
rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
}
|
||||
rv.locChunkStart = p.locOffset + n
|
||||
|
@ -316,11 +316,11 @@ type PostingsIterator struct {
|
|||
locDecoder *govarint.Base128Decoder
|
||||
locReader *bytes.Reader
|
||||
|
||||
freqChunkLens []uint64
|
||||
freqChunkStart uint64
|
||||
freqChunkOffsets []uint64
|
||||
freqChunkStart uint64
|
||||
|
||||
locChunkLens []uint64
|
||||
locChunkStart uint64
|
||||
locChunkOffsets []uint64
|
||||
locChunkStart uint64
|
||||
|
||||
locBitmap *roaring.Bitmap
|
||||
|
||||
|
@ -337,8 +337,8 @@ func (i *PostingsIterator) Size() int {
|
|||
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
|
||||
len(i.currChunkFreqNorm) +
|
||||
len(i.currChunkLoc) +
|
||||
len(i.freqChunkLens)*size.SizeOfUint64 +
|
||||
len(i.locChunkLens)*size.SizeOfUint64 +
|
||||
len(i.freqChunkOffsets)*size.SizeOfUint64 +
|
||||
len(i.locChunkOffsets)*size.SizeOfUint64 +
|
||||
i.next.Size()
|
||||
|
||||
if i.locBitmap != nil {
|
||||
|
@ -353,16 +353,14 @@ func (i *PostingsIterator) Size() int {
|
|||
}
|
||||
|
||||
func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
|
||||
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
|
||||
if chunk >= len(i.freqChunkOffsets) || chunk >= len(i.locChunkOffsets) {
|
||||
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkOffsets), len(i.locChunkOffsets))
|
||||
}
|
||||
|
||||
// load freq chunk bytes
|
||||
start := i.freqChunkStart
|
||||
for j := 0; j < chunk; j++ {
|
||||
start += i.freqChunkLens[j]
|
||||
}
|
||||
end := start + i.freqChunkLens[chunk]
|
||||
end, start := i.freqChunkStart, i.freqChunkStart
|
||||
s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
|
||||
start += s
|
||||
end += e
|
||||
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
|
||||
if i.freqNormReader == nil {
|
||||
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
|
||||
|
@ -371,12 +369,10 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
|||
i.freqNormReader.Reset(i.currChunkFreqNorm)
|
||||
}
|
||||
|
||||
// load loc chunk bytes
|
||||
start = i.locChunkStart
|
||||
for j := 0; j < chunk; j++ {
|
||||
start += i.locChunkLens[j]
|
||||
}
|
||||
end = start + i.locChunkLens[chunk]
|
||||
end, start = i.locChunkStart, i.locChunkStart
|
||||
s, e = readChunkBoundary(chunk, i.locChunkOffsets)
|
||||
start += s
|
||||
end += e
|
||||
i.currChunkLoc = i.postings.sb.mem[start:end]
|
||||
if i.locReader == nil {
|
||||
i.locReader = bytes.NewReader(i.currChunkLoc)
|
||||
|
|
|
@ -28,7 +28,6 @@ import (
|
|||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
|
@ -99,12 +98,6 @@ func (i *IndexSnapshot) Close() error {
|
|||
return i.DecRef()
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Size() int {
|
||||
// Just return the size of the pointer for estimating the overhead
|
||||
// during Search, a reference of the IndexSnapshot serves as the reader.
|
||||
return size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) SizeFull() int {
|
||||
return int(i.size)
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ import (
|
|||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexReader int
|
||||
|
@ -36,10 +35,6 @@ type IndexReader struct {
|
|||
docCount uint64
|
||||
}
|
||||
|
||||
func (i *IndexReader) Size() int {
|
||||
return reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||
if fieldExists {
|
||||
|
|
|
@ -203,7 +203,7 @@ type UpsideDownCouchDocIDReader struct {
|
|||
|
||||
func (r *UpsideDownCouchDocIDReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
|
||||
r.indexReader.Size()
|
||||
reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||
|
||||
for _, entry := range r.only {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
|
|
|
@ -66,8 +66,7 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
|
|||
}
|
||||
|
||||
func (fb *FacetsBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
|
||||
fb.indexReader.Size()
|
||||
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
|
||||
|
||||
for k, v := range fb.facets {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
|
|
|
@ -62,8 +62,7 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
|
|||
}
|
||||
|
||||
func (s *BooleanSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr
|
||||
|
||||
if s.mustSearcher != nil {
|
||||
sizeInBytes += s.mustSearcher.Size()
|
||||
|
|
|
@ -101,7 +101,6 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
|
|||
|
||||
func (s *DisjunctionSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.scorer.Size()
|
||||
|
||||
for _, entry := range s.searchers {
|
||||
|
|
|
@ -58,7 +58,6 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
|
|||
|
||||
func (s *MatchAllSearcher) Size() int {
|
||||
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.reader.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
|
|
@ -40,8 +40,7 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
|
|||
}
|
||||
|
||||
func (s *MatchNoneSearcher) Size() int {
|
||||
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (s *MatchNoneSearcher) Count() uint64 {
|
||||
|
|
|
@ -42,8 +42,7 @@ type PhraseSearcher struct {
|
|||
}
|
||||
|
||||
func (s *PhraseSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size()
|
||||
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr
|
||||
|
||||
if s.mustSearcher != nil {
|
||||
sizeInBytes += s.mustSearcher.Size()
|
||||
|
|
|
@ -75,7 +75,6 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
|
|||
|
||||
func (s *TermSearcher) Size() int {
|
||||
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
|
||||
s.indexReader.Size() +
|
||||
s.reader.Size() +
|
||||
s.tfd.Size() +
|
||||
s.scorer.Size()
|
||||
|
|
Loading…
Reference in New Issue