scorch zap chunkedContentCoder reuses chunk metadata slice memory
And, renamed the chunk MetaData.DocID field to DocNum for naming correctness, where much of this commit is the mechanical effect of that rename.
This commit is contained in:
parent
3da191852d
commit
c09e2a08ca
|
@ -165,7 +165,7 @@ var docvalueCmd = &cobra.Command{
|
||||||
/*
|
/*
|
||||||
TODO => dump all chunk headers??
|
TODO => dump all chunk headers??
|
||||||
if len(args) == 3 && args[2] == ">" {
|
if len(args) == 3 && args[2] == ">" {
|
||||||
dumpChunkDocIDs(data, )
|
dumpChunkDocNums(data, )
|
||||||
|
|
||||||
}*/
|
}*/
|
||||||
}
|
}
|
||||||
|
@ -187,7 +187,7 @@ var docvalueCmd = &cobra.Command{
|
||||||
docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor())
|
docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor())
|
||||||
|
|
||||||
if numChunks < docInChunk {
|
if numChunks < docInChunk {
|
||||||
return fmt.Errorf("no chunk exists for chunk number: %d for docID: %d", docInChunk, localDocNum)
|
return fmt.Errorf("no chunk exists for chunk number: %d for localDocNum: %d", docInChunk, localDocNum)
|
||||||
}
|
}
|
||||||
|
|
||||||
destChunkDataLoc := fieldDvLoc + offset
|
destChunkDataLoc := fieldDvLoc + offset
|
||||||
|
@ -207,7 +207,7 @@ var docvalueCmd = &cobra.Command{
|
||||||
offset = uint64(0)
|
offset = uint64(0)
|
||||||
curChunkHeader := make([]zap.MetaData, int(numDocs))
|
curChunkHeader := make([]zap.MetaData, int(numDocs))
|
||||||
for i := 0; i < int(numDocs); i++ {
|
for i := 0; i < int(numDocs); i++ {
|
||||||
curChunkHeader[i].DocID, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||||
offset += uint64(nread)
|
offset += uint64(nread)
|
||||||
curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||||
offset += uint64(nread)
|
offset += uint64(nread)
|
||||||
|
@ -221,8 +221,8 @@ var docvalueCmd = &cobra.Command{
|
||||||
|
|
||||||
start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader)
|
start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader)
|
||||||
if start == math.MaxUint64 || length == math.MaxUint64 {
|
if start == math.MaxUint64 || length == math.MaxUint64 {
|
||||||
fmt.Printf("no field values found for docID %d\n", localDocNum)
|
fmt.Printf("no field values found for localDocNum: %d\n", localDocNum)
|
||||||
fmt.Printf("Try docIDs present in chunk: %s\n", assortDocID(curChunkHeader))
|
fmt.Printf("Try docNums present in chunk: %s\n", metaDataDocNums(curChunkHeader))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// uncompress the already loaded data
|
// uncompress the already loaded data
|
||||||
|
@ -234,7 +234,7 @@ var docvalueCmd = &cobra.Command{
|
||||||
|
|
||||||
var termSeparator byte = 0xff
|
var termSeparator byte = 0xff
|
||||||
var termSeparatorSplitSlice = []byte{termSeparator}
|
var termSeparatorSplitSlice = []byte{termSeparator}
|
||||||
// pick the terms for the given docID
|
// pick the terms for the given docNum
|
||||||
uncompressed = uncompressed[start : start+length]
|
uncompressed = uncompressed[start : start+length]
|
||||||
for {
|
for {
|
||||||
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
||||||
|
@ -250,23 +250,22 @@ var docvalueCmd = &cobra.Command{
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDocValueLocs(docID uint64, metaHeader []zap.MetaData) (uint64, uint64) {
|
func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64) {
|
||||||
i := sort.Search(len(metaHeader), func(i int) bool {
|
i := sort.Search(len(metaHeader), func(i int) bool {
|
||||||
return metaHeader[i].DocID >= docID
|
return metaHeader[i].DocNum >= docNum
|
||||||
})
|
})
|
||||||
if i < len(metaHeader) && metaHeader[i].DocID == docID {
|
if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
|
||||||
return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
|
return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
|
||||||
}
|
}
|
||||||
return math.MaxUint64, math.MaxUint64
|
return math.MaxUint64, math.MaxUint64
|
||||||
}
|
}
|
||||||
|
|
||||||
func assortDocID(metaHeader []zap.MetaData) string {
|
func metaDataDocNums(metaHeader []zap.MetaData) string {
|
||||||
docIDs := ""
|
docNums := ""
|
||||||
for _, meta := range metaHeader {
|
for _, meta := range metaHeader {
|
||||||
id := fmt.Sprintf("%d", meta.DocID)
|
docNums += fmt.Sprintf("%d", meta.DocNum) + ", "
|
||||||
docIDs += id + ", "
|
|
||||||
}
|
}
|
||||||
return docIDs
|
return docNums
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
|
|
@ -588,7 +588,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// resetting encoder for the next field
|
// reseting encoder for the next field
|
||||||
fdvEncoder.Reset()
|
fdvEncoder.Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ type chunkedContentCoder struct {
|
||||||
// MetaData represents the data information inside a
|
// MetaData represents the data information inside a
|
||||||
// chunk.
|
// chunk.
|
||||||
type MetaData struct {
|
type MetaData struct {
|
||||||
DocID uint64 // docid of the data inside the chunk
|
DocNum uint64 // docNum of the data inside the chunk
|
||||||
DocDvLoc uint64 // starting offset for a given docid
|
DocDvLoc uint64 // starting offset for a given docid
|
||||||
DocDvLen uint64 // length of data inside the chunk for the given docid
|
DocDvLen uint64 // length of data inside the chunk for the given docid
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,7 @@ func newChunkedContentCoder(chunkSize uint64,
|
||||||
rv := &chunkedContentCoder{
|
rv := &chunkedContentCoder{
|
||||||
chunkSize: chunkSize,
|
chunkSize: chunkSize,
|
||||||
chunkLens: make([]uint64, total),
|
chunkLens: make([]uint64, total),
|
||||||
chunkMeta: []MetaData{},
|
chunkMeta: make([]MetaData, 0, total),
|
||||||
}
|
}
|
||||||
|
|
||||||
return rv
|
return rv
|
||||||
|
@ -68,7 +68,7 @@ func (c *chunkedContentCoder) Reset() {
|
||||||
for i := range c.chunkLens {
|
for i := range c.chunkLens {
|
||||||
c.chunkLens[i] = 0
|
c.chunkLens[i] = 0
|
||||||
}
|
}
|
||||||
c.chunkMeta = []MetaData{}
|
c.chunkMeta = c.chunkMeta[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close indicates you are done calling Add() this allows
|
// Close indicates you are done calling Add() this allows
|
||||||
|
@ -88,7 +88,7 @@ func (c *chunkedContentCoder) flushContents() error {
|
||||||
|
|
||||||
// write out the metaData slice
|
// write out the metaData slice
|
||||||
for _, meta := range c.chunkMeta {
|
for _, meta := range c.chunkMeta {
|
||||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocID, meta.DocDvLoc, meta.DocDvLen)
|
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -118,7 +118,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
||||||
// clearing the chunk specific meta for next chunk
|
// clearing the chunk specific meta for next chunk
|
||||||
c.chunkBuf.Reset()
|
c.chunkBuf.Reset()
|
||||||
c.chunkMetaBuf.Reset()
|
c.chunkMetaBuf.Reset()
|
||||||
c.chunkMeta = []MetaData{}
|
c.chunkMeta = c.chunkMeta[:0]
|
||||||
c.currChunk = chunk
|
c.currChunk = chunk
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,7 +130,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
c.chunkMeta = append(c.chunkMeta, MetaData{
|
c.chunkMeta = append(c.chunkMeta, MetaData{
|
||||||
DocID: docNum,
|
DocNum: docNum,
|
||||||
DocDvLoc: uint64(dvOffset),
|
DocDvLoc: uint64(dvOffset),
|
||||||
DocDvLen: uint64(dvSize),
|
DocDvLen: uint64(dvSize),
|
||||||
})
|
})
|
||||||
|
|
|
@ -99,7 +99,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
|
||||||
func (di *docValueIterator) loadDvChunk(chunkNumber,
|
func (di *docValueIterator) loadDvChunk(chunkNumber,
|
||||||
localDocNum uint64, s *SegmentBase) error {
|
localDocNum uint64, s *SegmentBase) error {
|
||||||
// advance to the chunk where the docValues
|
// advance to the chunk where the docValues
|
||||||
// reside for the given docID
|
// reside for the given docNum
|
||||||
destChunkDataLoc := di.dvDataLoc
|
destChunkDataLoc := di.dvDataLoc
|
||||||
for i := 0; i < int(chunkNumber); i++ {
|
for i := 0; i < int(chunkNumber); i++ {
|
||||||
destChunkDataLoc += di.chunkLens[i]
|
destChunkDataLoc += di.chunkLens[i]
|
||||||
|
@ -116,7 +116,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
||||||
offset := uint64(0)
|
offset := uint64(0)
|
||||||
di.curChunkHeader = make([]MetaData, int(numDocs))
|
di.curChunkHeader = make([]MetaData, int(numDocs))
|
||||||
for i := 0; i < int(numDocs); i++ {
|
for i := 0; i < int(numDocs); i++ {
|
||||||
di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||||
offset += uint64(read)
|
offset += uint64(read)
|
||||||
di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||||
offset += uint64(read)
|
offset += uint64(read)
|
||||||
|
@ -131,10 +131,10 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (di *docValueIterator) visitDocValues(docID uint64,
|
func (di *docValueIterator) visitDocValues(docNum uint64,
|
||||||
visitor index.DocumentFieldTermVisitor) error {
|
visitor index.DocumentFieldTermVisitor) error {
|
||||||
// binary search the term locations for the docID
|
// binary search the term locations for the docNum
|
||||||
start, length := di.getDocValueLocs(docID)
|
start, length := di.getDocValueLocs(docNum)
|
||||||
if start == math.MaxUint64 || length == math.MaxUint64 {
|
if start == math.MaxUint64 || length == math.MaxUint64 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -144,7 +144,7 @@ func (di *docValueIterator) visitDocValues(docID uint64,
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// pick the terms for the given docID
|
// pick the terms for the given docNum
|
||||||
uncompressed = uncompressed[start : start+length]
|
uncompressed = uncompressed[start : start+length]
|
||||||
for {
|
for {
|
||||||
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
||||||
|
@ -159,11 +159,11 @@ func (di *docValueIterator) visitDocValues(docID uint64,
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
|
func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
||||||
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
|
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
|
||||||
return di.curChunkHeader[i].DocID >= docID
|
return di.curChunkHeader[i].DocNum >= docNum
|
||||||
})
|
})
|
||||||
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocID == docID {
|
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
|
||||||
return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
|
return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
|
||||||
}
|
}
|
||||||
return math.MaxUint64, math.MaxUint64
|
return math.MaxUint64, math.MaxUint64
|
||||||
|
|
Loading…
Reference in New Issue