diff --git a/cmd/bleve/cmd/zap/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go index 165829fd..74397495 100644 --- a/cmd/bleve/cmd/zap/docvalue.go +++ b/cmd/bleve/cmd/zap/docvalue.go @@ -165,7 +165,7 @@ var docvalueCmd = &cobra.Command{ /* TODO => dump all chunk headers?? if len(args) == 3 && args[2] == ">" { - dumpChunkDocIDs(data, ) + dumpChunkDocNums(data, ) }*/ } @@ -187,7 +187,7 @@ var docvalueCmd = &cobra.Command{ docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor()) if numChunks < docInChunk { - return fmt.Errorf("no chunk exists for chunk number: %d for docID: %d", docInChunk, localDocNum) + return fmt.Errorf("no chunk exists for chunk number: %d for localDocNum: %d", docInChunk, localDocNum) } destChunkDataLoc := fieldDvLoc + offset @@ -207,7 +207,7 @@ var docvalueCmd = &cobra.Command{ offset = uint64(0) curChunkHeader := make([]zap.MetaData, int(numDocs)) for i := 0; i < int(numDocs); i++ { - curChunkHeader[i].DocID, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) + curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) offset += uint64(nread) curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) offset += uint64(nread) @@ -221,8 +221,8 @@ var docvalueCmd = &cobra.Command{ start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader) if start == math.MaxUint64 || length == math.MaxUint64 { - fmt.Printf("no field values found for docID %d\n", localDocNum) - fmt.Printf("Try docIDs present in chunk: %s\n", assortDocID(curChunkHeader)) + fmt.Printf("no field values found for localDocNum: %d\n", localDocNum) + fmt.Printf("Try docNums present in chunk: %s\n", metaDataDocNums(curChunkHeader)) return nil } // uncompress the already loaded data @@ -234,7 +234,7 @@ var docvalueCmd = &cobra.Command{ var termSeparator byte = 0xff var termSeparatorSplitSlice = []byte{termSeparator} - // pick the terms for the given docID + // pick the terms for the given docNum uncompressed = uncompressed[start : start+length] for { i := bytes.Index(uncompressed, termSeparatorSplitSlice) @@ -250,23 +250,22 @@ var docvalueCmd = &cobra.Command{ }, } -func getDocValueLocs(docID uint64, metaHeader []zap.MetaData) (uint64, uint64) { +func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64) { i := sort.Search(len(metaHeader), func(i int) bool { - return metaHeader[i].DocID >= docID + return metaHeader[i].DocNum >= docNum }) - if i < len(metaHeader) && metaHeader[i].DocID == docID { + if i < len(metaHeader) && metaHeader[i].DocNum == docNum { return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen } return math.MaxUint64, math.MaxUint64 } -func assortDocID(metaHeader []zap.MetaData) string { - docIDs := "" +func metaDataDocNums(metaHeader []zap.MetaData) string { + docNums := "" for _, meta := range metaHeader { - id := fmt.Sprintf("%d", meta.DocID) - docIDs += id + ", " + docNums += fmt.Sprintf("%d", meta.DocNum) + ", " } - return docIDs + return docNums } func init() { diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index e6625528..60d168e6 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -588,7 +588,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, if err != nil { return nil, err } - // resetting encoder for the next field + // reseting encoder for the next field fdvEncoder.Reset() } diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go index b0394049..83457146 100644 --- a/index/scorch/segment/zap/contentcoder.go +++ b/index/scorch/segment/zap/contentcoder.go @@ -39,7 +39,7 @@ type chunkedContentCoder struct { // MetaData represents the data information inside a // chunk. type MetaData struct { - DocID uint64 // docid of the data inside the chunk + DocNum uint64 // docNum of the data inside the chunk DocDvLoc uint64 // starting offset for a given docid DocDvLen uint64 // length of data inside the chunk for the given docid } @@ -52,7 +52,7 @@ func newChunkedContentCoder(chunkSize uint64, rv := &chunkedContentCoder{ chunkSize: chunkSize, chunkLens: make([]uint64, total), - chunkMeta: []MetaData{}, + chunkMeta: make([]MetaData, 0, total), } return rv @@ -68,7 +68,7 @@ func (c *chunkedContentCoder) Reset() { for i := range c.chunkLens { c.chunkLens[i] = 0 } - c.chunkMeta = []MetaData{} + c.chunkMeta = c.chunkMeta[:0] } // Close indicates you are done calling Add() this allows @@ -88,7 +88,7 @@ func (c *chunkedContentCoder) flushContents() error { // write out the metaData slice for _, meta := range c.chunkMeta { - _, err := writeUvarints(&c.chunkMetaBuf, meta.DocID, meta.DocDvLoc, meta.DocDvLen) + _, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen) if err != nil { return err } @@ -118,7 +118,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error { // clearing the chunk specific meta for next chunk c.chunkBuf.Reset() c.chunkMetaBuf.Reset() - c.chunkMeta = []MetaData{} + c.chunkMeta = c.chunkMeta[:0] c.currChunk = chunk } @@ -130,7 +130,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error { } c.chunkMeta = append(c.chunkMeta, MetaData{ - DocID: docNum, + DocNum: docNum, DocDvLoc: uint64(dvOffset), DocDvLen: uint64(dvSize), }) diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go index fb5b348a..0514bd30 100644 --- a/index/scorch/segment/zap/docvalues.go +++ b/index/scorch/segment/zap/docvalues.go @@ -99,7 +99,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string, func (di *docValueIterator) loadDvChunk(chunkNumber, localDocNum uint64, s *SegmentBase) error { // advance to the chunk where the docValues - // reside for the given docID + // reside for the given docNum destChunkDataLoc := di.dvDataLoc for i := 0; i < int(chunkNumber); i++ { destChunkDataLoc += di.chunkLens[i] @@ -116,7 +116,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber, offset := uint64(0) di.curChunkHeader = make([]MetaData, int(numDocs)) for i := 0; i < int(numDocs); i++ { - di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) + di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) offset += uint64(read) di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) offset += uint64(read) @@ -131,10 +131,10 @@ func (di *docValueIterator) loadDvChunk(chunkNumber, return nil } -func (di *docValueIterator) visitDocValues(docID uint64, +func (di *docValueIterator) visitDocValues(docNum uint64, visitor index.DocumentFieldTermVisitor) error { - // binary search the term locations for the docID - start, length := di.getDocValueLocs(docID) + // binary search the term locations for the docNum + start, length := di.getDocValueLocs(docNum) if start == math.MaxUint64 || length == math.MaxUint64 { return nil } @@ -144,7 +144,7 @@ func (di *docValueIterator) visitDocValues(docID uint64, return err } - // pick the terms for the given docID + // pick the terms for the given docNum uncompressed = uncompressed[start : start+length] for { i := bytes.Index(uncompressed, termSeparatorSplitSlice) @@ -159,11 +159,11 @@ func (di *docValueIterator) visitDocValues(docID uint64, return nil } -func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) { +func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) { i := sort.Search(len(di.curChunkHeader), func(i int) bool { - return di.curChunkHeader[i].DocID >= docID + return di.curChunkHeader[i].DocNum >= docNum }) - if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocID == docID { + if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum { return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen } return math.MaxUint64, math.MaxUint64