0
0
Fork 0

zap version bump, changed the offset slice format

,UTs
This commit is contained in:
Sreekanth Sivasankaran 2018-03-13 12:13:48 +05:30
parent 5271b582bb
commit d1155c223a
7 changed files with 110 additions and 137 deletions

View File

@ -22,7 +22,7 @@ import (
"github.com/Smerity/govarint" "github.com/Smerity/govarint"
) )
const version uint32 = 4 const version uint32 = 5
const fieldNotUninverted = math.MaxUint64 const fieldNotUninverted = math.MaxUint64

View File

@ -157,13 +157,10 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
return tw, err return tw, err
} }
if len(c.chunkLens) > 1 { chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
chunkLengthsToOffsets(c.chunkLens) // write out the chunk offsets
} for _, chunkOffset := range chunkOffsets {
n := binary.PutUvarint(buf, chunkOffset)
// write out the chunk starting offsets
for _, chunkLen := range c.chunkLens {
n := binary.PutUvarint(buf, uint64(chunkLen))
nw, err = w.Write(buf[:n]) nw, err = w.Write(buf[:n])
tw += nw tw += nw
if err != nil { if err != nil {

View File

@ -46,7 +46,7 @@ func TestChunkContentCoder(t *testing.T) {
[]byte("scorch"), []byte("scorch"),
}, },
expected: string([]byte{0x02, 0x0c, 0x0c, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14, expected: string([]byte{0x02, 0x0c, 0x18, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14,
0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x00, 0x06, 0x06, 0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x00, 0x06, 0x06,
0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}), 0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}),
}, },

View File

@ -69,7 +69,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
} }
// read the number of chunks, chunk lengths // read the number of chunks, chunk lengths
var offset, clen uint64 var offset, loc uint64
numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64]) numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
if read <= 0 { if read <= 0 {
return nil, fmt.Errorf("failed to read the field "+ return nil, fmt.Errorf("failed to read the field "+
@ -83,11 +83,11 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
chunkOffsets: make([]uint64, int(numChunks)), chunkOffsets: make([]uint64, int(numChunks)),
} }
for i := 0; i < int(numChunks); i++ { for i := 0; i < int(numChunks); i++ {
clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64]) loc, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
if read <= 0 { if read <= 0 {
return nil, fmt.Errorf("corrupted chunk length during segment load") return nil, fmt.Errorf("corrupted chunk offset during segment load")
} }
fdvIter.chunkOffsets[i] = clen fdvIter.chunkOffsets[i] = loc
offset += uint64(read) offset += uint64(read)
} }

View File

@ -111,15 +111,13 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
} }
buf := c.buf buf := c.buf
// convert the chunk lengths into starting chunk offsets // convert the chunk lengths into chunk offsets
if len(c.chunkLens) > 1 { chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
chunkLengthsToOffsets(c.chunkLens)
}
// write out the number of chunks & each chunk starting offsets // write out the number of chunks & each chunk offsets
n := binary.PutUvarint(buf, uint64(len(c.chunkLens))) n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
for _, chunkLen := range c.chunkLens { for _, chunkOffset := range chunkOffsets {
n += binary.PutUvarint(buf[n:], uint64(chunkLen)) n += binary.PutUvarint(buf[n:], chunkOffset)
} }
tw, err := w.Write(buf[:n]) tw, err := w.Write(buf[:n])
@ -140,41 +138,35 @@ func (c *chunkedIntCoder) FinalSize() int {
return len(c.final) return len(c.final)
} }
// chunkLengthsToOffsets converts the chunk length array // modifyLengthsToEndOffsets converts the chunk length array
// to a chunk starting offset array. The readChunkBoundary // to a chunk offset array. The readChunkBoundary
// will figure out the start and end of every chunk from // will figure out the start and end of every chunk from
// these offsets. The starting offset of the first/single // these offsets. Starting offset of i'th index is stored
// array element will always be zero and this position is // in i-1'th position except for 0'th index and ending offset
// used for storing the size of the current last item in // is stored at i'th index position.
// the array at any given point. // For 0'th element, starting position is always zero.
// For eg: // eg:
// Lens -> 5 5 5 5 => 5 5 10 15 // Lens -> 5 5 5 5 => 5 10 15 20
// Lens -> 0 5 0 5 => 5 0 5 5 // Lens -> 0 5 0 5 => 0 5 5 10
// Lens -> 0 0 0 5 => 5 0 0 0 // Lens -> 0 0 0 5 => 0 0 0 5
// Lens -> 5 0 0 0 => 0 5 5 5 // Lens -> 5 0 0 0 => 5 5 5 5
// Lens -> 0 5 0 0 => 0 0 5 5 // Lens -> 0 5 0 0 => 0 5 5 5
// Lens -> 0 0 5 0 => 0 0 0 5 // Lens -> 0 0 5 0 => 0 0 5 5
func chunkLengthsToOffsets(lengths []uint64) { func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
lengths[1], lengths[0] = lengths[0], lengths[1] var runningOffset uint64
for i := 2; i < len(lengths); i++ { var index, i int
cur := lengths[i] for i = 1; i <= len(lengths); i++ {
lengths[i] = lengths[i-1] + lengths[0] runningOffset += lengths[i-1]
lengths[0] = cur lengths[index] = runningOffset
index++
} }
return lengths
} }
func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) { func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
var start, end uint64 var start uint64
if chunk > 0 { if chunk > 0 {
start = offsets[chunk] start = offsets[chunk-1]
} }
// single element case return start, offsets[chunk]
if chunk == 0 && len(offsets) == 1 {
end = offsets[chunk]
} else if chunk < len(offsets)-1 {
end = offsets[chunk+1]
} else { // for last element
end = start + offsets[0]
}
return start, end
} }

View File

@ -46,8 +46,8 @@ func TestChunkIntCoder(t *testing.T) {
[]uint64{3}, []uint64{3},
[]uint64{7}, []uint64{7},
}, },
// 2 chunks, chunk-0 length 1, chunk-1 length 1, value 3, value 7 // 2 chunks, chunk-0 offset 1, chunk-1 offset 2, value 3, value 7
expected: []byte{0x2, 0x1, 0x1, 0x3, 0x7}, expected: []byte{0x2, 0x1, 0x2, 0x3, 0x7},
}, },
} }
@ -80,40 +80,48 @@ func TestChunkLengthToOffsets(t *testing.T) {
}{ }{
{ {
lengths: []uint64{5, 5, 5, 5, 5}, lengths: []uint64{5, 5, 5, 5, 5},
expectedOffsets: []uint64{5, 5, 10, 15, 20}, expectedOffsets: []uint64{5, 10, 15, 20, 25},
}, },
{ {
lengths: []uint64{0, 5, 0, 5, 0}, lengths: []uint64{0, 5, 0, 5, 0},
expectedOffsets: []uint64{0, 0, 5, 5, 10}, expectedOffsets: []uint64{0, 5, 5, 10, 10},
}, },
{ {
lengths: []uint64{0, 0, 0, 0, 5}, lengths: []uint64{0, 0, 0, 0, 5},
expectedOffsets: []uint64{5, 0, 0, 0, 0},
},
{
lengths: []uint64{5, 0, 0, 0, 0},
expectedOffsets: []uint64{0, 5, 5, 5, 5},
},
{
lengths: []uint64{0, 5, 0, 0, 0},
expectedOffsets: []uint64{0, 0, 5, 5, 5},
},
{
lengths: []uint64{0, 0, 0, 5, 0},
expectedOffsets: []uint64{0, 0, 0, 0, 5}, expectedOffsets: []uint64{0, 0, 0, 0, 5},
}, },
{
lengths: []uint64{5, 0, 0, 0, 0},
expectedOffsets: []uint64{5, 5, 5, 5, 5},
},
{
lengths: []uint64{0, 5, 0, 0, 0},
expectedOffsets: []uint64{0, 5, 5, 5, 5},
},
{
lengths: []uint64{0, 0, 0, 5, 0},
expectedOffsets: []uint64{0, 0, 0, 5, 5},
},
{ {
lengths: []uint64{0, 0, 0, 5, 5}, lengths: []uint64{0, 0, 0, 5, 5},
expectedOffsets: []uint64{5, 0, 0, 0, 5}, expectedOffsets: []uint64{0, 0, 0, 5, 10},
}, },
{ {
lengths: []uint64{5, 5, 5, 0, 0}, lengths: []uint64{5, 5, 5, 0, 0},
expectedOffsets: []uint64{0, 5, 10, 15, 15}, expectedOffsets: []uint64{5, 10, 15, 15, 15},
},
{
lengths: []uint64{5},
expectedOffsets: []uint64{5},
},
{
lengths: []uint64{5, 5},
expectedOffsets: []uint64{5, 10},
}, },
} }
for i, test := range tests { for i, test := range tests {
chunkLengthsToOffsets(test.lengths) modifyLengthsToEndOffsets(test.lengths)
if !reflect.DeepEqual(test.expectedOffsets, test.lengths) { if !reflect.DeepEqual(test.expectedOffsets, test.lengths) {
t.Errorf("Test: %d failed, got %+v, expected %+v", i, test.lengths, test.expectedOffsets) t.Errorf("Test: %d failed, got %+v, expected %+v", i, test.lengths, test.expectedOffsets)
} }
@ -129,86 +137,80 @@ func TestChunkReadBoundaryFromOffsets(t *testing.T) {
expectedEnd uint64 expectedEnd uint64
}{ }{
{ {
offsets: []uint64{5, 5, 10, 15, 20}, offsets: []uint64{5, 10, 15, 20, 25},
chunkNumber: 4, chunkNumber: 4,
expectedStart: 20, expectedStart: 20,
expectedEnd: 25, expectedEnd: 25,
}, },
{ {
offsets: []uint64{5, 5, 10, 15, 20}, offsets: []uint64{5, 10, 15, 20, 25},
chunkNumber: 0, chunkNumber: 0,
expectedStart: 0, expectedStart: 0,
expectedEnd: 5, expectedEnd: 5,
}, },
{ {
offsets: []uint64{5, 5, 10, 15, 20}, offsets: []uint64{5, 10, 15, 20, 25},
chunkNumber: 2, chunkNumber: 2,
expectedStart: 10, expectedStart: 10,
expectedEnd: 15, expectedEnd: 15,
}, },
{ {
offsets: []uint64{0, 0, 5, 5, 10}, offsets: []uint64{0, 5, 5, 10, 10},
chunkNumber: 4, chunkNumber: 4,
expectedStart: 10, expectedStart: 10,
expectedEnd: 10, expectedEnd: 10,
}, },
{ {
offsets: []uint64{0, 0, 5, 5, 10}, offsets: []uint64{0, 5, 5, 10, 10},
chunkNumber: 1, chunkNumber: 1,
expectedStart: 0, expectedStart: 0,
expectedEnd: 5, expectedEnd: 5,
}, },
{ {
offsets: []uint64{5, 0, 0, 0, 0}, offsets: []uint64{5, 5, 5, 5, 5},
chunkNumber: 0, chunkNumber: 0,
expectedStart: 0, expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{5, 0, 0, 0, 0},
chunkNumber: 4,
expectedStart: 0,
expectedEnd: 5, expectedEnd: 5,
}, },
{ {
offsets: []uint64{5, 0, 0, 0, 0}, offsets: []uint64{5, 5, 5, 5, 5},
chunkNumber: 1, chunkNumber: 4,
expectedStart: 0, expectedStart: 5,
expectedEnd: 0, expectedEnd: 5,
}, },
{ {
offsets: []uint64{0, 5, 5, 5, 5}, offsets: []uint64{5, 5, 5, 5, 5},
chunkNumber: 1, chunkNumber: 1,
expectedStart: 5, expectedStart: 5,
expectedEnd: 5, expectedEnd: 5,
}, },
{ {
offsets: []uint64{0, 5, 5, 5, 5}, offsets: []uint64{0, 5, 5, 5, 5},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{0, 0, 5, 5, 5},
chunkNumber: 2,
expectedStart: 5,
expectedEnd: 5,
},
{
offsets: []uint64{0, 0, 5, 5, 5},
chunkNumber: 1, chunkNumber: 1,
expectedStart: 0, expectedStart: 0,
expectedEnd: 5, expectedEnd: 5,
}, },
{ {
offsets: []uint64{0, 0, 0, 0, 5}, offsets: []uint64{0, 5, 5, 5, 5},
chunkNumber: 4, chunkNumber: 0,
expectedStart: 5, expectedStart: 0,
expectedEnd: 5, expectedEnd: 0,
},
{
offsets: []uint64{0, 0, 0, 5, 5},
chunkNumber: 2,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{0, 0, 0, 5, 5},
chunkNumber: 1,
expectedStart: 0,
expectedEnd: 0,
}, },
{ {
offsets: []uint64{0, 0, 0, 0, 5}, offsets: []uint64{0, 0, 0, 0, 5},
chunkNumber: 3, chunkNumber: 4,
expectedStart: 0, expectedStart: 0,
expectedEnd: 5, expectedEnd: 5,
}, },
@ -219,59 +221,41 @@ func TestChunkReadBoundaryFromOffsets(t *testing.T) {
expectedEnd: 0, expectedEnd: 0,
}, },
{ {
offsets: []uint64{5, 0, 0, 0, 5}, offsets: []uint64{5, 10, 15, 15, 15},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{5, 0, 0, 0, 5},
chunkNumber: 1,
expectedStart: 0,
expectedEnd: 0,
},
{
offsets: []uint64{5, 0, 0, 0, 5},
chunkNumber: 3,
expectedStart: 0,
expectedEnd: 5,
},
{
offsets: []uint64{5, 0, 0, 0, 5},
chunkNumber: 4,
expectedStart: 5,
expectedEnd: 10,
},
{
offsets: []uint64{0, 5, 10, 15, 15},
chunkNumber: 0, chunkNumber: 0,
expectedStart: 0, expectedStart: 0,
expectedEnd: 5, expectedEnd: 5,
}, },
{ {
offsets: []uint64{0, 5, 10, 15, 15}, offsets: []uint64{5, 10, 15, 15, 15},
chunkNumber: 1, chunkNumber: 1,
expectedStart: 5, expectedStart: 5,
expectedEnd: 10, expectedEnd: 10,
}, },
{ {
offsets: []uint64{0, 5, 10, 15, 15}, offsets: []uint64{5, 10, 15, 15, 15},
chunkNumber: 2, chunkNumber: 2,
expectedStart: 10, expectedStart: 10,
expectedEnd: 15, expectedEnd: 15,
}, },
{ {
offsets: []uint64{0, 5, 10, 15, 15}, offsets: []uint64{5, 10, 15, 15, 15},
chunkNumber: 3, chunkNumber: 3,
expectedStart: 15, expectedStart: 15,
expectedEnd: 15, expectedEnd: 15,
}, },
{ {
offsets: []uint64{0, 5, 10, 15, 15}, offsets: []uint64{5, 10, 15, 15, 15},
chunkNumber: 4, chunkNumber: 4,
expectedStart: 15, expectedStart: 15,
expectedEnd: 15, expectedEnd: 15,
}, },
{
offsets: []uint64{5},
chunkNumber: 0,
expectedStart: 0,
expectedEnd: 5,
},
} }
for i, test := range tests { for i, test := range tests {

View File

@ -189,9 +189,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
var numFreqChunks uint64 var numFreqChunks uint64
numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64]) numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read) n += uint64(read)
rv.freqChunkLens = make([]uint64, int(numFreqChunks)) rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
for i := 0; i < int(numFreqChunks); i++ { for i := 0; i < int(numFreqChunks); i++ {
rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64]) rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read) n += uint64(read)
} }
rv.freqChunkStart = p.freqOffset + n rv.freqChunkStart = p.freqOffset + n
@ -201,9 +201,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
var numLocChunks uint64 var numLocChunks uint64
numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64]) numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read) n += uint64(read)
rv.locChunkLens = make([]uint64, int(numLocChunks)) rv.locChunkOffsets = make([]uint64, int(numLocChunks))
for i := 0; i < int(numLocChunks); i++ { for i := 0; i < int(numLocChunks); i++ {
rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64]) rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read) n += uint64(read)
} }
rv.locChunkStart = p.locOffset + n rv.locChunkStart = p.locOffset + n