0
0
Fork 0

minor optimisation to loadChunk method

This commit is contained in:
Sreekanth Sivasankaran 2018-03-09 16:01:37 +05:30
parent b38a61d4cf
commit d6522e7e17
5 changed files with 294 additions and 38 deletions

View File

@ -156,7 +156,12 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
if err != nil {
return tw, err
}
// write out the chunk lens
if len(c.chunkLens) > 1 {
chunkLengthsToOffsets(c.chunkLens)
}
// write out the chunk starting offsets
for _, chunkLen := range c.chunkLens {
n := binary.PutUvarint(buf, uint64(chunkLen))
nw, err = w.Write(buf[:n])

View File

@ -38,7 +38,7 @@ type docValueIterator struct {
field string
curChunkNum uint64
numChunks uint64
chunkLens []uint64
chunkOffsets []uint64
dvDataLoc uint64
curChunkHeader []MetaData
curChunkData []byte // compressed data cache
@ -47,7 +47,7 @@ type docValueIterator struct {
func (di *docValueIterator) size() int {
return reflectStaticSizedocValueIterator + size.SizeOfPtr +
len(di.field) +
len(di.chunkLens)*size.SizeOfUint64 +
len(di.chunkOffsets)*size.SizeOfUint64 +
len(di.curChunkHeader)*reflectStaticSizeMetaData +
len(di.curChunkData)
}
@ -78,16 +78,16 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
offset += uint64(read)
fdvIter := &docValueIterator{
curChunkNum: math.MaxUint64,
field: field,
chunkLens: make([]uint64, int(numChunks)),
curChunkNum: math.MaxUint64,
field: field,
chunkOffsets: make([]uint64, int(numChunks)),
}
for i := 0; i < int(numChunks); i++ {
clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
if read <= 0 {
return nil, fmt.Errorf("corrupted chunk length during segment load")
}
fdvIter.chunkLens[i] = clen
fdvIter.chunkOffsets[i] = clen
offset += uint64(read)
}
@ -99,12 +99,11 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
localDocNum uint64, s *SegmentBase) error {
// advance to the chunk where the docValues
// reside for the given docNum
destChunkDataLoc := di.dvDataLoc
for i := 0; i < int(chunkNumber); i++ {
destChunkDataLoc += di.chunkLens[i]
}
destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
destChunkDataLoc += start
curChunkEnd += end
curChunkSize := di.chunkLens[chunkNumber]
// read the number of docs reside in the chunk
numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
if read <= 0 {
@ -124,7 +123,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
}
compressedDataLoc := chunkMetaLoc + offset
dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
dataLength := curChunkEnd - compressedDataLoc
di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
di.curChunkNum = chunkNumber
return nil

View File

@ -111,7 +111,12 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
}
buf := c.buf
// write out the number of chunks & each chunkLen
// convert the chunk lengths into starting chunk offsets
if len(c.chunkLens) > 1 {
chunkLengthsToOffsets(c.chunkLens)
}
// write out the number of chunks & each chunk starting offsets
n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
for _, chunkLen := range c.chunkLens {
n += binary.PutUvarint(buf[n:], uint64(chunkLen))
@ -134,3 +139,42 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
func (c *chunkedIntCoder) FinalSize() int {
return len(c.final)
}
// chunkLengthsToOffsets converts the chunk length array
// to a chunk starting offset array. The readChunkBoundary
// will figure out the start and end of every chunk from
// these offsets. The starting offset of the first/single
// array element will always be zero and this position is
// used for storing the size of the current last item in
// the array at any given point.
// For eg:
// Lens -> 5 5 5 5 => 5 5 10 15
// Lens -> 0 5 0 5 => 5 0 5 5
// Lens -> 0 0 0 5 => 5 0 0 0
// Lens -> 5 0 0 0 => 0 5 5 5
// Lens -> 0 5 0 0 => 0 0 5 5
// Lens -> 0 0 5 0 => 0 0 0 5
func chunkLengthsToOffsets(lengths []uint64) {
lengths[1], lengths[0] = lengths[0], lengths[1]
for i := 2; i < len(lengths); i++ {
cur := lengths[i]
lengths[i] = lengths[i-1] + lengths[0]
lengths[0] = cur
}
}
func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
var start, end uint64
if chunk > 0 {
start = offsets[chunk]
}
// single element case
if chunk == 0 && len(offsets) == 1 {
end = offsets[chunk]
} else if chunk < len(offsets)-1 {
end = offsets[chunk+1]
} else { // for last element
end = start + offsets[0]
}
return start, end
}

View File

@ -71,3 +71,215 @@ func TestChunkIntCoder(t *testing.T) {
}
}
}
func TestChunkLengthToOffsets(t *testing.T) {
tests := []struct {
lengths []uint64
expectedOffsets []uint64
}{
{
lengths: []uint64{5, 5, 5, 5, 5},
expectedOffsets: []uint64{5, 5, 10, 15, 20},
},
{
lengths: []uint64{0, 5, 0, 5, 0},
expectedOffsets: []uint64{0, 0, 5, 5, 10},
},
{
lengths: []uint64{0, 0, 0, 0, 5},
expectedOffsets: []uint64{5, 0, 0, 0, 0},
},
{
lengths: []uint64{5, 0, 0, 0, 0},
expectedOffsets: []uint64{0, 5, 5, 5, 5},
},
{
lengths: []uint64{0, 5, 0, 0, 0},
expectedOffsets: []uint64{0, 0, 5, 5, 5},
},
{
lengths: []uint64{0, 0, 0, 5, 0},
expectedOffsets: []uint64{0, 0, 0, 0, 5},
},
{
lengths: []uint64{0, 0, 0, 5, 5},
expectedOffsets: []uint64{5, 0, 0, 0, 5},
},
{
lengths: []uint64{5, 5, 5, 0, 0},
expectedOffsets: []uint64{0, 5, 10, 15, 15},
},
}
for i, test := range tests {
chunkLengthsToOffsets(test.lengths)
if !reflect.DeepEqual(test.expectedOffsets, test.lengths) {
t.Errorf("Test: %d failed, got %+v, expected %+v", i, test.lengths, test.expectedOffsets)
}
}
}
func TestChunkReadBoundaryFromOffsets(t *testing.T) {
tests := []struct {
chunkNumber int
offsets []uint64
expectedStart uint64
expectedEnd uint64
}{
{
offsets: []uint64{5, 5, 10, 15, 20},
chunkNumber: 4,
expectedStart: 20,
expectedEnd: 25,
},
{
offsets: []uint64{5, 5, 10, 15, 20},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{5, 5, 10, 15, 20},
chunkNumber: 2,
expectedStart: 10,
expectedEnd: 15,
},
{
offsets: []uint64{0, 0, 5, 5, 10},
chunkNumber: 4,
expectedStart: 10,
expectedEnd: 10,
},
{
offsets: []uint64{0, 0, 5, 5, 10},
chunkNumber: 1,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{5, 0, 0, 0, 0},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{5, 0, 0, 0, 0},
chunkNumber: 4,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{5, 0, 0, 0, 0},
chunkNumber: 1,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{0, 5, 5, 5, 5},
chunkNumber: 1,
expectedStart: 5,
expectedEnd: 5,
},
{
offsets: []uint64{0, 5, 5, 5, 5},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{0, 0, 5, 5, 5},
chunkNumber: 2,
expectedStart: 5,
expectedEnd: 5,
},
{
offsets: []uint64{0, 0, 5, 5, 5},
chunkNumber: 1,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{0, 0, 0, 0, 5},
chunkNumber: 4,
expectedStart: 5,
expectedEnd: 5,
},
{
offsets: []uint64{0, 0, 0, 0, 5},
chunkNumber: 3,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{0, 0, 0, 0, 5},
chunkNumber: 2,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{5, 0, 0, 0, 5},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{5, 0, 0, 0, 5},
chunkNumber: 1,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{5, 0, 0, 0, 5},
chunkNumber: 3,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{5, 0, 0, 0, 5},
chunkNumber: 4,
expectedStart: 5,
expectedEnd: 10,
},
{
offsets: []uint64{0, 5, 10, 15, 15},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{0, 5, 10, 15, 15},
chunkNumber: 1,
expectedStart: 5,
expectedEnd: 10,
},
{
offsets: []uint64{0, 5, 10, 15, 15},
chunkNumber: 2,
expectedStart: 10,
expectedEnd: 15,
},
{
offsets: []uint64{0, 5, 10, 15, 15},
chunkNumber: 3,
expectedStart: 15,
expectedEnd: 15,
},
{
offsets: []uint64{0, 5, 10, 15, 15},
chunkNumber: 4,
expectedStart: 15,
expectedEnd: 15,
},
}
for i, test := range tests {
s, e := readChunkBoundary(test.chunkNumber, test.offsets)
if test.expectedStart != s || test.expectedEnd != e {
t.Errorf("Test: %d failed for chunkNumber: %d got start: %d end: %d,"+
" expected start: %d end: %d", i, test.chunkNumber, s, e,
test.expectedStart, test.expectedEnd)
}
}
}

View File

@ -163,9 +163,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
var numFreqChunks uint64
numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read)
rv.freqChunkLens = make([]uint64, int(numFreqChunks))
rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
for i := 0; i < int(numFreqChunks); i++ {
rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read)
}
rv.freqChunkStart = p.freqOffset + n
@ -175,9 +175,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
var numLocChunks uint64
numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read)
rv.locChunkLens = make([]uint64, int(numLocChunks))
rv.locChunkOffsets = make([]uint64, int(numLocChunks))
for i := 0; i < int(numLocChunks); i++ {
rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read)
}
rv.locChunkStart = p.locOffset + n
@ -297,11 +297,11 @@ type PostingsIterator struct {
locDecoder *govarint.Base128Decoder
locReader *bytes.Reader
freqChunkLens []uint64
freqChunkStart uint64
freqChunkOffsets []uint64
freqChunkStart uint64
locChunkLens []uint64
locChunkStart uint64
locChunkOffsets []uint64
locChunkStart uint64
locBitmap *roaring.Bitmap
@ -317,8 +317,8 @@ func (i *PostingsIterator) Size() int {
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
len(i.currChunkFreqNorm) +
len(i.currChunkLoc) +
len(i.freqChunkLens)*size.SizeOfUint64 +
len(i.locChunkLens)*size.SizeOfUint64 +
len(i.freqChunkOffsets)*size.SizeOfUint64 +
len(i.locChunkOffsets)*size.SizeOfUint64 +
i.next.Size()
if i.locBitmap != nil {
@ -333,16 +333,14 @@ func (i *PostingsIterator) Size() int {
}
func (i *PostingsIterator) loadChunk(chunk int) error {
if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
if chunk >= len(i.freqChunkOffsets) || chunk >= len(i.locChunkOffsets) {
return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkOffsets), len(i.locChunkOffsets))
}
// load freq chunk bytes
start := i.freqChunkStart
for j := 0; j < chunk; j++ {
start += i.freqChunkLens[j]
}
end := start + i.freqChunkLens[chunk]
end, start := i.freqChunkStart, i.freqChunkStart
s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
start += s
end += e
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
if i.freqNormReader == nil {
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
@ -351,12 +349,10 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
i.freqNormReader.Reset(i.currChunkFreqNorm)
}
// load loc chunk bytes
start = i.locChunkStart
for j := 0; j < chunk; j++ {
start += i.locChunkLens[j]
}
end = start + i.locChunkLens[chunk]
end, start = i.locChunkStart, i.locChunkStart
s, e = readChunkBoundary(chunk, i.locChunkOffsets)
start += s
end += e
i.currChunkLoc = i.postings.sb.mem[start:end]
if i.locReader == nil {
i.locReader = bytes.NewReader(i.currChunkLoc)