switch to binary search for finding segment from global doc num
added unit tests for this function specifically
This commit is contained in:
parent
c2047dcdf9
commit
7c964de8bf
|
@ -517,3 +517,107 @@ func TestIndexDocIdOnlyReader(t *testing.T) {
|
|||
// }
|
||||
|
||||
}
|
||||
|
||||
func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) {
|
||||
tests := []struct {
|
||||
offsets []uint64
|
||||
globalDocNum uint64
|
||||
segmentIndex int
|
||||
localDocNum uint64
|
||||
}{
|
||||
// just 1 segment
|
||||
{
|
||||
offsets: []uint64{0},
|
||||
globalDocNum: 0,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0},
|
||||
globalDocNum: 1,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 1,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0},
|
||||
globalDocNum: 25,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 25,
|
||||
},
|
||||
// now 2 segments, 30 docs in first
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 0,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 1,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 1,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 25,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 25,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 30,
|
||||
segmentIndex: 1,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 35,
|
||||
segmentIndex: 1,
|
||||
localDocNum: 5,
|
||||
},
|
||||
// lots of segments
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 0,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 25,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 25,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 35,
|
||||
segmentIndex: 1,
|
||||
localDocNum: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 100,
|
||||
segmentIndex: 4,
|
||||
localDocNum: 1,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 825,
|
||||
segmentIndex: 6,
|
||||
localDocNum: 25,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
i := &IndexSnapshot{
|
||||
offsets: test.offsets,
|
||||
}
|
||||
gotSegmentIndex, gotLocalDocNum := i.segmentIndexAndLocalDocNumFromGlobal(test.globalDocNum)
|
||||
if gotSegmentIndex != test.segmentIndex {
|
||||
t.Errorf("got segment index %d expected %d for offsets %v globalDocNum %d", gotSegmentIndex, test.segmentIndex, test.offsets, test.globalDocNum)
|
||||
}
|
||||
if gotLocalDocNum != test.localDocNum {
|
||||
t.Errorf("got localDocNum %d expected %d for offsets %v globalDocNum %d", gotLocalDocNum, test.localDocNum, test.offsets, test.globalDocNum)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"container/heap"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
|
@ -191,14 +192,10 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
|
|||
}
|
||||
|
||||
func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
|
||||
var segmentIndex uint64
|
||||
for j := 1; j < len(i.offsets); j++ {
|
||||
if docNum >= i.offsets[j] {
|
||||
segmentIndex = uint64(j)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
segmentIndex := sort.Search(len(i.offsets),
|
||||
func(x int) bool {
|
||||
return i.offsets[x] > docNum
|
||||
}) - 1
|
||||
|
||||
localDocNum := docNum - i.offsets[segmentIndex]
|
||||
return int(segmentIndex), localDocNum
|
||||
|
|
Loading…
Reference in New Issue