0
0
Fork 0

improve command-line tool for zap

correctly handle/print additional loc bitmap address
this fixes bitmap length that is output
instantiate roaring bitmap and print it out
removed some unnecessary debug logging

updated dict command to print 1-hit encoded vals
this makes dict command usable for seeing which
doc ids are in a segment and their corresponding doc number
This commit is contained in:
Marty Schoch 2018-03-19 14:14:59 -04:00
parent cf8e0d63bb
commit e9b228bcdd
2 changed files with 26 additions and 7 deletions

View File

@ -17,7 +17,9 @@ package zap
import (
"encoding/binary"
"fmt"
"math"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/couchbase/vellum"
"github.com/spf13/cobra"
)
@ -54,7 +56,14 @@ var dictCmd = &cobra.Command{
itr, err := fst.Iterator(nil, nil)
for err == nil {
currTerm, currVal := itr.Current()
fmt.Printf("%s - %d (%x)\n", currTerm, currVal, currVal)
extra := ""
if currVal&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
docNum, normBits := zap.FSTValDecode1Hit(currVal)
norm := math.Float32frombits(uint32(normBits))
extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm)
}
fmt.Printf("%s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
err = itr.Next()
}
if err != nil && err != vellum.ErrIteratorDone {

View File

@ -17,9 +17,9 @@ package zap
import (
"encoding/binary"
"fmt"
"log"
"math"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/couchbase/vellum"
"github.com/spf13/cobra"
@ -59,7 +59,7 @@ var exploreCmd = &cobra.Command{
return fmt.Errorf("error looking for term : %v", err)
}
if exists {
fmt.Printf("fst val is %d (%x)\n", postingsAddr, postingsAddr)
fmt.Printf("FST val is %d (%x)\n", postingsAddr, postingsAddr)
if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
@ -81,10 +81,21 @@ var exploreCmd = &cobra.Command{
locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
n += uint64(read)
var locBitmapAddr uint64
locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
n += uint64(read)
var postingListLen uint64
postingListLen, _ = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
n += uint64(read)
fmt.Printf("Posting List Length: %d\n", postingListLen)
bitmap := roaring.New()
_, err = bitmap.FromBuffer(data[postingsAddr+n : postingsAddr+n+postingListLen])
if err != nil {
return err
}
fmt.Printf("Posting List: %v\n", bitmap)
fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
@ -109,11 +120,8 @@ var exploreCmd = &cobra.Command{
var locOffsets []uint64
for j := uint64(0); j < numLChunks; j++ {
log.Printf("reading from %d(%x)\n", locAddr+n, locAddr+n)
log.Printf("data i see here: % x\n", data[locAddr+n:locAddr+n+binary.MaxVarintLen64])
lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
n += uint64(r4)
log.Printf("see chunk len %d(%x)\n", lchunkLen, lchunkLen)
locOffsets = append(locOffsets, lchunkLen)
}
@ -123,6 +131,8 @@ var exploreCmd = &cobra.Command{
running2 += offset
}
fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
} else {
fmt.Printf("dictionary does not contain term '%s'\n", args[2])
}