From e9b228bcdd09472170b27840539436e17da034c0 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Mon, 19 Mar 2018 14:14:59 -0400 Subject: [PATCH] improve command-line tool for zap correctly handle/print additional loc bitmap address this fixes bitmap length that is output instantiate roaring bitmap and print it out removed some unnecessary debug logging updated dict command to print 1-hit encoded vals this makes dict command usable for seeing which doc ids are in a segment and their corresponding doc number --- cmd/bleve/cmd/zap/dict.go | 11 ++++++++++- cmd/bleve/cmd/zap/explore.go | 22 ++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/cmd/bleve/cmd/zap/dict.go b/cmd/bleve/cmd/zap/dict.go index 3e272719..e80be360 100644 --- a/cmd/bleve/cmd/zap/dict.go +++ b/cmd/bleve/cmd/zap/dict.go @@ -17,7 +17,9 @@ package zap import ( "encoding/binary" "fmt" + "math" + "github.com/blevesearch/bleve/index/scorch/segment/zap" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) @@ -54,7 +56,14 @@ var dictCmd = &cobra.Command{ itr, err := fst.Iterator(nil, nil) for err == nil { currTerm, currVal := itr.Current() - fmt.Printf("%s - %d (%x)\n", currTerm, currVal, currVal) + extra := "" + if currVal&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit { + docNum, normBits := zap.FSTValDecode1Hit(currVal) + norm := math.Float32frombits(uint32(normBits)) + extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm) + } + + fmt.Printf("%s - %d (%x) %s\n", currTerm, currVal, currVal, extra) err = itr.Next() } if err != nil && err != vellum.ErrIteratorDone { diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go index 543b572f..225b7373 100644 --- a/cmd/bleve/cmd/zap/explore.go +++ b/cmd/bleve/cmd/zap/explore.go @@ -17,9 +17,9 @@ package zap import ( "encoding/binary" "fmt" - "log" "math" + "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/index/scorch/segment/zap" "github.com/couchbase/vellum" "github.com/spf13/cobra" @@ -59,7 +59,7 @@ var exploreCmd = &cobra.Command{ return fmt.Errorf("error looking for term : %v", err) } if exists { - fmt.Printf("fst val is %d (%x)\n", postingsAddr, postingsAddr) + fmt.Printf("FST val is %d (%x)\n", postingsAddr, postingsAddr) if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit { docNum, normBits := zap.FSTValDecode1Hit(postingsAddr) @@ -81,10 +81,21 @@ var exploreCmd = &cobra.Command{ locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64]) n += uint64(read) + var locBitmapAddr uint64 + locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64]) + n += uint64(read) + var postingListLen uint64 - postingListLen, _ = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64]) + postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64]) + n += uint64(read) fmt.Printf("Posting List Length: %d\n", postingListLen) + bitmap := roaring.New() + _, err = bitmap.FromBuffer(data[postingsAddr+n : postingsAddr+n+postingListLen]) + if err != nil { + return err + } + fmt.Printf("Posting List: %v\n", bitmap) fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr) numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64]) @@ -109,11 +120,8 @@ var exploreCmd = &cobra.Command{ var locOffsets []uint64 for j := uint64(0); j < numLChunks; j++ { - log.Printf("reading from %d(%x)\n", locAddr+n, locAddr+n) - log.Printf("data i see here: % x\n", data[locAddr+n:locAddr+n+binary.MaxVarintLen64]) lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64]) n += uint64(r4) - log.Printf("see chunk len %d(%x)\n", lchunkLen, lchunkLen) locOffsets = append(locOffsets, lchunkLen) } @@ -123,6 +131,8 @@ var exploreCmd = &cobra.Command{ running2 += offset } + fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr) + } else { fmt.Printf("dictionary does not contain term '%s'\n", args[2]) }