improve command-line tool for zap
correctly handle/print additional loc bitmap address this fixes bitmap length that is output instantiate roaring bitmap and print it out removed some unnecessary debug logging updated dict command to print 1-hit encoded vals this makes dict command usable for seeing which doc ids are in a segment and their corresponding doc number
This commit is contained in:
parent
cf8e0d63bb
commit
e9b228bcdd
|
@ -17,7 +17,9 @@ package zap
|
||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||||
"github.com/couchbase/vellum"
|
"github.com/couchbase/vellum"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
@ -54,7 +56,14 @@ var dictCmd = &cobra.Command{
|
||||||
itr, err := fst.Iterator(nil, nil)
|
itr, err := fst.Iterator(nil, nil)
|
||||||
for err == nil {
|
for err == nil {
|
||||||
currTerm, currVal := itr.Current()
|
currTerm, currVal := itr.Current()
|
||||||
fmt.Printf("%s - %d (%x)\n", currTerm, currVal, currVal)
|
extra := ""
|
||||||
|
if currVal&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
|
||||||
|
docNum, normBits := zap.FSTValDecode1Hit(currVal)
|
||||||
|
norm := math.Float32frombits(uint32(normBits))
|
||||||
|
extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("%s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
|
||||||
err = itr.Next()
|
err = itr.Next()
|
||||||
}
|
}
|
||||||
if err != nil && err != vellum.ErrIteratorDone {
|
if err != nil && err != vellum.ErrIteratorDone {
|
||||||
|
|
|
@ -17,9 +17,9 @@ package zap
|
||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||||
"github.com/couchbase/vellum"
|
"github.com/couchbase/vellum"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
@ -59,7 +59,7 @@ var exploreCmd = &cobra.Command{
|
||||||
return fmt.Errorf("error looking for term : %v", err)
|
return fmt.Errorf("error looking for term : %v", err)
|
||||||
}
|
}
|
||||||
if exists {
|
if exists {
|
||||||
fmt.Printf("fst val is %d (%x)\n", postingsAddr, postingsAddr)
|
fmt.Printf("FST val is %d (%x)\n", postingsAddr, postingsAddr)
|
||||||
|
|
||||||
if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
|
if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
|
||||||
docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
|
docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
|
||||||
|
@ -81,10 +81,21 @@ var exploreCmd = &cobra.Command{
|
||||||
locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||||
n += uint64(read)
|
n += uint64(read)
|
||||||
|
|
||||||
|
var locBitmapAddr uint64
|
||||||
|
locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||||
|
n += uint64(read)
|
||||||
|
|
||||||
var postingListLen uint64
|
var postingListLen uint64
|
||||||
postingListLen, _ = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
|
||||||
|
n += uint64(read)
|
||||||
|
|
||||||
fmt.Printf("Posting List Length: %d\n", postingListLen)
|
fmt.Printf("Posting List Length: %d\n", postingListLen)
|
||||||
|
bitmap := roaring.New()
|
||||||
|
_, err = bitmap.FromBuffer(data[postingsAddr+n : postingsAddr+n+postingListLen])
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fmt.Printf("Posting List: %v\n", bitmap)
|
||||||
|
|
||||||
fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
|
fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
|
||||||
numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
|
numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
|
||||||
|
@ -109,11 +120,8 @@ var exploreCmd = &cobra.Command{
|
||||||
|
|
||||||
var locOffsets []uint64
|
var locOffsets []uint64
|
||||||
for j := uint64(0); j < numLChunks; j++ {
|
for j := uint64(0); j < numLChunks; j++ {
|
||||||
log.Printf("reading from %d(%x)\n", locAddr+n, locAddr+n)
|
|
||||||
log.Printf("data i see here: % x\n", data[locAddr+n:locAddr+n+binary.MaxVarintLen64])
|
|
||||||
lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
|
lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
|
||||||
n += uint64(r4)
|
n += uint64(r4)
|
||||||
log.Printf("see chunk len %d(%x)\n", lchunkLen, lchunkLen)
|
|
||||||
locOffsets = append(locOffsets, lchunkLen)
|
locOffsets = append(locOffsets, lchunkLen)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,6 +131,8 @@ var exploreCmd = &cobra.Command{
|
||||||
running2 += offset
|
running2 += offset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("dictionary does not contain term '%s'\n", args[2])
|
fmt.Printf("dictionary does not contain term '%s'\n", args[2])
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue