improve command-line tool for zap
correctly handle/print additional loc bitmap address this fixes bitmap length that is output instantiate roaring bitmap and print it out removed some unnecessary debug logging updated dict command to print 1-hit encoded vals this makes dict command usable for seeing which doc ids are in a segment and their corresponding doc number
This commit is contained in:
parent
cf8e0d63bb
commit
e9b228bcdd
|
@ -17,7 +17,9 @@ package zap
|
|||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
"github.com/couchbase/vellum"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
@ -54,7 +56,14 @@ var dictCmd = &cobra.Command{
|
|||
itr, err := fst.Iterator(nil, nil)
|
||||
for err == nil {
|
||||
currTerm, currVal := itr.Current()
|
||||
fmt.Printf("%s - %d (%x)\n", currTerm, currVal, currVal)
|
||||
extra := ""
|
||||
if currVal&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
|
||||
docNum, normBits := zap.FSTValDecode1Hit(currVal)
|
||||
norm := math.Float32frombits(uint32(normBits))
|
||||
extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm)
|
||||
}
|
||||
|
||||
fmt.Printf("%s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
|
||||
err = itr.Next()
|
||||
}
|
||||
if err != nil && err != vellum.ErrIteratorDone {
|
||||
|
|
|
@ -17,9 +17,9 @@ package zap
|
|||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
"github.com/couchbase/vellum"
|
||||
"github.com/spf13/cobra"
|
||||
|
@ -59,7 +59,7 @@ var exploreCmd = &cobra.Command{
|
|||
return fmt.Errorf("error looking for term : %v", err)
|
||||
}
|
||||
if exists {
|
||||
fmt.Printf("fst val is %d (%x)\n", postingsAddr, postingsAddr)
|
||||
fmt.Printf("FST val is %d (%x)\n", postingsAddr, postingsAddr)
|
||||
|
||||
if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
|
||||
docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
|
||||
|
@ -81,10 +81,21 @@ var exploreCmd = &cobra.Command{
|
|||
locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
var locBitmapAddr uint64
|
||||
locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
var postingListLen uint64
|
||||
postingListLen, _ = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||
postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
fmt.Printf("Posting List Length: %d\n", postingListLen)
|
||||
bitmap := roaring.New()
|
||||
_, err = bitmap.FromBuffer(data[postingsAddr+n : postingsAddr+n+postingListLen])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("Posting List: %v\n", bitmap)
|
||||
|
||||
fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
|
||||
numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
|
||||
|
@ -109,11 +120,8 @@ var exploreCmd = &cobra.Command{
|
|||
|
||||
var locOffsets []uint64
|
||||
for j := uint64(0); j < numLChunks; j++ {
|
||||
log.Printf("reading from %d(%x)\n", locAddr+n, locAddr+n)
|
||||
log.Printf("data i see here: % x\n", data[locAddr+n:locAddr+n+binary.MaxVarintLen64])
|
||||
lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
|
||||
n += uint64(r4)
|
||||
log.Printf("see chunk len %d(%x)\n", lchunkLen, lchunkLen)
|
||||
locOffsets = append(locOffsets, lchunkLen)
|
||||
}
|
||||
|
||||
|
@ -123,6 +131,8 @@ var exploreCmd = &cobra.Command{
|
|||
running2 += offset
|
||||
}
|
||||
|
||||
fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
|
||||
|
||||
} else {
|
||||
fmt.Printf("dictionary does not contain term '%s'\n", args[2])
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue