0
0
Fork 0

scorch zap chunkedContentCoder reuses chunk metadata slice memory

And, renamed the chunk MetaData.DocID field to DocNum for naming
correctness, where much of this commit is the mechanical effect of
that rename.
This commit is contained in:
Steve Yen 2018-02-03 10:51:24 -08:00
parent 3da191852d
commit c09e2a08ca
4 changed files with 29 additions and 30 deletions

View File

@ -165,7 +165,7 @@ var docvalueCmd = &cobra.Command{
/*
TODO => dump all chunk headers??
if len(args) == 3 && args[2] == ">" {
dumpChunkDocIDs(data, )
dumpChunkDocNums(data, )
}*/
}
@ -187,7 +187,7 @@ var docvalueCmd = &cobra.Command{
docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor())
if numChunks < docInChunk {
return fmt.Errorf("no chunk exists for chunk number: %d for docID: %d", docInChunk, localDocNum)
return fmt.Errorf("no chunk exists for chunk number: %d for localDocNum: %d", docInChunk, localDocNum)
}
destChunkDataLoc := fieldDvLoc + offset
@ -207,7 +207,7 @@ var docvalueCmd = &cobra.Command{
offset = uint64(0)
curChunkHeader := make([]zap.MetaData, int(numDocs))
for i := 0; i < int(numDocs); i++ {
curChunkHeader[i].DocID, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(nread)
curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(nread)
@ -221,8 +221,8 @@ var docvalueCmd = &cobra.Command{
start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader)
if start == math.MaxUint64 || length == math.MaxUint64 {
fmt.Printf("no field values found for docID %d\n", localDocNum)
fmt.Printf("Try docIDs present in chunk: %s\n", assortDocID(curChunkHeader))
fmt.Printf("no field values found for localDocNum: %d\n", localDocNum)
fmt.Printf("Try docNums present in chunk: %s\n", metaDataDocNums(curChunkHeader))
return nil
}
// uncompress the already loaded data
@ -234,7 +234,7 @@ var docvalueCmd = &cobra.Command{
var termSeparator byte = 0xff
var termSeparatorSplitSlice = []byte{termSeparator}
// pick the terms for the given docID
// pick the terms for the given docNum
uncompressed = uncompressed[start : start+length]
for {
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
@ -250,23 +250,22 @@ var docvalueCmd = &cobra.Command{
},
}
func getDocValueLocs(docID uint64, metaHeader []zap.MetaData) (uint64, uint64) {
func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64) {
i := sort.Search(len(metaHeader), func(i int) bool {
return metaHeader[i].DocID >= docID
return metaHeader[i].DocNum >= docNum
})
if i < len(metaHeader) && metaHeader[i].DocID == docID {
if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
}
return math.MaxUint64, math.MaxUint64
}
func assortDocID(metaHeader []zap.MetaData) string {
docIDs := ""
func metaDataDocNums(metaHeader []zap.MetaData) string {
docNums := ""
for _, meta := range metaHeader {
id := fmt.Sprintf("%d", meta.DocID)
docIDs += id + ", "
docNums += fmt.Sprintf("%d", meta.DocNum) + ", "
}
return docIDs
return docNums
}
func init() {

View File

@ -588,7 +588,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
if err != nil {
return nil, err
}
// resetting encoder for the next field
// reseting encoder for the next field
fdvEncoder.Reset()
}

View File

@ -39,7 +39,7 @@ type chunkedContentCoder struct {
// MetaData represents the data information inside a
// chunk.
type MetaData struct {
DocID uint64 // docid of the data inside the chunk
DocNum uint64 // docNum of the data inside the chunk
DocDvLoc uint64 // starting offset for a given docid
DocDvLen uint64 // length of data inside the chunk for the given docid
}
@ -52,7 +52,7 @@ func newChunkedContentCoder(chunkSize uint64,
rv := &chunkedContentCoder{
chunkSize: chunkSize,
chunkLens: make([]uint64, total),
chunkMeta: []MetaData{},
chunkMeta: make([]MetaData, 0, total),
}
return rv
@ -68,7 +68,7 @@ func (c *chunkedContentCoder) Reset() {
for i := range c.chunkLens {
c.chunkLens[i] = 0
}
c.chunkMeta = []MetaData{}
c.chunkMeta = c.chunkMeta[:0]
}
// Close indicates you are done calling Add() this allows
@ -88,7 +88,7 @@ func (c *chunkedContentCoder) flushContents() error {
// write out the metaData slice
for _, meta := range c.chunkMeta {
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocID, meta.DocDvLoc, meta.DocDvLen)
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
if err != nil {
return err
}
@ -118,7 +118,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
// clearing the chunk specific meta for next chunk
c.chunkBuf.Reset()
c.chunkMetaBuf.Reset()
c.chunkMeta = []MetaData{}
c.chunkMeta = c.chunkMeta[:0]
c.currChunk = chunk
}
@ -130,7 +130,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
}
c.chunkMeta = append(c.chunkMeta, MetaData{
DocID: docNum,
DocNum: docNum,
DocDvLoc: uint64(dvOffset),
DocDvLen: uint64(dvSize),
})

View File

@ -99,7 +99,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
func (di *docValueIterator) loadDvChunk(chunkNumber,
localDocNum uint64, s *SegmentBase) error {
// advance to the chunk where the docValues
// reside for the given docID
// reside for the given docNum
destChunkDataLoc := di.dvDataLoc
for i := 0; i < int(chunkNumber); i++ {
destChunkDataLoc += di.chunkLens[i]
@ -116,7 +116,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
offset := uint64(0)
di.curChunkHeader = make([]MetaData, int(numDocs))
for i := 0; i < int(numDocs); i++ {
di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(read)
di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(read)
@ -131,10 +131,10 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
return nil
}
func (di *docValueIterator) visitDocValues(docID uint64,
func (di *docValueIterator) visitDocValues(docNum uint64,
visitor index.DocumentFieldTermVisitor) error {
// binary search the term locations for the docID
start, length := di.getDocValueLocs(docID)
// binary search the term locations for the docNum
start, length := di.getDocValueLocs(docNum)
if start == math.MaxUint64 || length == math.MaxUint64 {
return nil
}
@ -144,7 +144,7 @@ func (di *docValueIterator) visitDocValues(docID uint64,
return err
}
// pick the terms for the given docID
// pick the terms for the given docNum
uncompressed = uncompressed[start : start+length]
for {
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
@ -159,11 +159,11 @@ func (di *docValueIterator) visitDocValues(docID uint64,
return nil
}
func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
return di.curChunkHeader[i].DocID >= docID
return di.curChunkHeader[i].DocNum >= docNum
})
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocID == docID {
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
}
return math.MaxUint64, math.MaxUint64