Merge pull request #821 from blevesearch/minor_docvalue_space_savings
docValue space savings
This commit is contained in:
commit
53bf29763b
|
@ -209,9 +209,7 @@ var docvalueCmd = &cobra.Command{
|
|||
for i := 0; i < int(numDocs); i++ {
|
||||
curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(nread)
|
||||
curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(nread)
|
||||
curChunkHeader[i].DocDvLen, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
curChunkHeader[i].DocDvOffset, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(nread)
|
||||
}
|
||||
|
||||
|
@ -255,7 +253,7 @@ func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64)
|
|||
return metaHeader[i].DocNum >= docNum
|
||||
})
|
||||
if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
|
||||
return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
|
||||
return zap.ReadDocValueBoundary(i, metaHeader)
|
||||
}
|
||||
return math.MaxUint64, math.MaxUint64
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import (
|
|||
"github.com/Smerity/govarint"
|
||||
)
|
||||
|
||||
const version uint32 = 5
|
||||
const version uint32 = 6
|
||||
|
||||
const fieldNotUninverted = math.MaxUint64
|
||||
|
||||
|
|
|
@ -47,9 +47,8 @@ type chunkedContentCoder struct {
|
|||
// MetaData represents the data information inside a
|
||||
// chunk.
|
||||
type MetaData struct {
|
||||
DocNum uint64 // docNum of the data inside the chunk
|
||||
DocDvLoc uint64 // starting offset for a given docid
|
||||
DocDvLen uint64 // length of data inside the chunk for the given docid
|
||||
DocNum uint64 // docNum of the data inside the chunk
|
||||
DocDvOffset uint64 // offset of data inside the chunk for the given docid
|
||||
}
|
||||
|
||||
// newChunkedContentCoder returns a new chunk content coder which
|
||||
|
@ -96,7 +95,7 @@ func (c *chunkedContentCoder) flushContents() error {
|
|||
|
||||
// write out the metaData slice
|
||||
for _, meta := range c.chunkMeta {
|
||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
|
||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -130,7 +129,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
|||
c.currChunk = chunk
|
||||
}
|
||||
|
||||
// mark the starting offset for this doc
|
||||
// get the starting offset for this doc
|
||||
dvOffset := c.chunkBuf.Len()
|
||||
dvSize, err := c.chunkBuf.Write(vals)
|
||||
if err != nil {
|
||||
|
@ -138,9 +137,8 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
|||
}
|
||||
|
||||
c.chunkMeta = append(c.chunkMeta, MetaData{
|
||||
DocNum: docNum,
|
||||
DocDvLoc: uint64(dvOffset),
|
||||
DocDvLen: uint64(dvSize),
|
||||
DocNum: docNum,
|
||||
DocDvOffset: uint64(dvOffset + dvSize),
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
@ -175,3 +173,13 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
|
|||
}
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
// ReadDocValueBoundary elicits the start, end offsets from a
|
||||
// metaData header slice
|
||||
func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
|
||||
var start uint64
|
||||
if chunk > 0 {
|
||||
start = metaHeaders[chunk-1].DocDvOffset
|
||||
}
|
||||
return start, metaHeaders[chunk].DocDvOffset
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ func TestChunkContentCoder(t *testing.T) {
|
|||
docNums: []uint64{0},
|
||||
vals: [][]byte{[]byte("bleve")},
|
||||
// 1 chunk, chunk-0 length 11(b), value
|
||||
expected: string([]byte{0x1, 0xb, 0x1, 0x0, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
|
||||
expected: string([]byte{0x1, 0xa, 0x1, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
|
||||
},
|
||||
{
|
||||
maxDocNum: 1,
|
||||
|
@ -46,8 +46,8 @@ func TestChunkContentCoder(t *testing.T) {
|
|||
[]byte("scorch"),
|
||||
},
|
||||
|
||||
expected: string([]byte{0x02, 0x0c, 0x18, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14,
|
||||
0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x00, 0x06, 0x06,
|
||||
expected: string([]byte{0x02, 0x0b, 0x16, 0x01, 0x00, 0x06, 0x06, 0x14,
|
||||
0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x06, 0x06,
|
||||
0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}),
|
||||
},
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ func TestChunkContentCoder(t *testing.T) {
|
|||
}
|
||||
|
||||
if !reflect.DeepEqual(test.expected, string(actual.Bytes())) {
|
||||
t.Errorf("got % s, expected % s", string(actual.Bytes()), test.expected)
|
||||
t.Errorf("got:%s, expected:%s", string(actual.Bytes()), test.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -116,9 +116,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
|||
for i := 0; i < int(numDocs); i++ {
|
||||
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
}
|
||||
|
||||
|
@ -132,8 +130,8 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
|||
func (di *docValueIterator) visitDocValues(docNum uint64,
|
||||
visitor index.DocumentFieldTermVisitor) error {
|
||||
// binary search the term locations for the docNum
|
||||
start, length := di.getDocValueLocs(docNum)
|
||||
if start == math.MaxUint64 || length == math.MaxUint64 {
|
||||
start, end := di.getDocValueLocs(docNum)
|
||||
if start == math.MaxUint64 || end == math.MaxUint64 {
|
||||
return nil
|
||||
}
|
||||
// uncompress the already loaded data
|
||||
|
@ -143,7 +141,7 @@ func (di *docValueIterator) visitDocValues(docNum uint64,
|
|||
}
|
||||
|
||||
// pick the terms for the given docNum
|
||||
uncompressed = uncompressed[start : start+length]
|
||||
uncompressed = uncompressed[start:end]
|
||||
for {
|
||||
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
||||
if i < 0 {
|
||||
|
@ -162,7 +160,7 @@ func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
|||
return di.curChunkHeader[i].DocNum >= docNum
|
||||
})
|
||||
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
|
||||
return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
|
||||
return ReadDocValueBoundary(i, di.curChunkHeader)
|
||||
}
|
||||
return math.MaxUint64, math.MaxUint64
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue