ed4826b189
The optimization to byte-copy all the storedDocs for a given segment during merging kicks in when the fields are the same across all segments and when there are no deletions for that given segment. This can happen, for example, during data loading or insert-only scenarios. As part of this commit, the Segment.copyStoredDocs() method was added, which uses a single Write() call to copy all the stored docs bytes of a segment to a writer in one shot. And, getDocStoredMetaAndCompressed() was refactored into a related helper function, getDocStoredOffsets(), which provides the storedDocs metadata (offsets & lengths) for a doc.
785 lines
21 KiB
Go
785 lines
21 KiB
Go
// Copyright (c) 2017 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package zap
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"reflect"
|
|
"sort"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/RoaringBitmap/roaring"
|
|
"github.com/blevesearch/bleve/analysis"
|
|
"github.com/blevesearch/bleve/document"
|
|
"github.com/blevesearch/bleve/index"
|
|
"github.com/blevesearch/bleve/index/scorch/segment/mem"
|
|
)
|
|
|
|
func TestMerge(t *testing.T) {
|
|
_ = os.RemoveAll("/tmp/scorch.zap")
|
|
_ = os.RemoveAll("/tmp/scorch2.zap")
|
|
_ = os.RemoveAll("/tmp/scorch3.zap")
|
|
|
|
memSegment := buildMemSegmentMulti()
|
|
err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
memSegment2 := buildMemSegmentMulti2()
|
|
err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
segment, err := Open("/tmp/scorch.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening segment: %v", err)
|
|
}
|
|
defer func() {
|
|
cerr := segment.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
segment2, err := Open("/tmp/scorch2.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening segment: %v", err)
|
|
}
|
|
defer func() {
|
|
cerr := segment2.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
segsToMerge := make([]*Segment, 2)
|
|
segsToMerge[0] = segment.(*Segment)
|
|
segsToMerge[1] = segment2.(*Segment)
|
|
|
|
_, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
segm, err := Open("/tmp/scorch3.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening merged segment: %v", err)
|
|
}
|
|
seg3 := segm.(*Segment)
|
|
defer func() {
|
|
cerr := seg3.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
if seg3.Path() != "/tmp/scorch3.zap" {
|
|
t.Fatalf("wrong path")
|
|
}
|
|
if seg3.Count() != 4 {
|
|
t.Fatalf("wrong count")
|
|
}
|
|
if len(seg3.Fields()) != 5 {
|
|
t.Fatalf("wrong # fields: %#v\n", seg3.Fields())
|
|
}
|
|
|
|
testMergeWithSelf(t, seg3, 4)
|
|
}
|
|
|
|
func TestMergeWithEmptySegment(t *testing.T) {
|
|
testMergeWithEmptySegments(t, true, 1)
|
|
}
|
|
|
|
func TestMergeWithEmptySegments(t *testing.T) {
|
|
testMergeWithEmptySegments(t, true, 5)
|
|
}
|
|
|
|
func TestMergeWithEmptySegmentFirst(t *testing.T) {
|
|
testMergeWithEmptySegments(t, false, 1)
|
|
}
|
|
|
|
func TestMergeWithEmptySegmentsFirst(t *testing.T) {
|
|
testMergeWithEmptySegments(t, false, 5)
|
|
}
|
|
|
|
func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int) {
|
|
_ = os.RemoveAll("/tmp/scorch.zap")
|
|
|
|
memSegment := buildMemSegmentMulti()
|
|
err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
segment, err := Open("/tmp/scorch.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening segment: %v", err)
|
|
}
|
|
defer func() {
|
|
cerr := segment.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
var segsToMerge []*Segment
|
|
|
|
if before {
|
|
segsToMerge = append(segsToMerge, segment.(*Segment))
|
|
}
|
|
|
|
for i := 0; i < numEmptySegments; i++ {
|
|
fname := fmt.Sprintf("scorch-empty-%d.zap", i)
|
|
|
|
_ = os.RemoveAll("/tmp/" + fname)
|
|
|
|
emptySegment := mem.NewFromAnalyzedDocs([]*index.AnalysisResult{})
|
|
err = PersistSegment(emptySegment, "/tmp/"+fname, 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
emptyFileSegment, err := Open("/tmp/" + fname)
|
|
if err != nil {
|
|
t.Fatalf("error opening segment: %v", err)
|
|
}
|
|
defer func(emptyFileSegment *Segment) {
|
|
cerr := emptyFileSegment.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}(emptyFileSegment.(*Segment))
|
|
|
|
segsToMerge = append(segsToMerge, emptyFileSegment.(*Segment))
|
|
}
|
|
|
|
if !before {
|
|
segsToMerge = append(segsToMerge, segment.(*Segment))
|
|
}
|
|
|
|
_ = os.RemoveAll("/tmp/scorch3.zap")
|
|
|
|
drops := make([]*roaring.Bitmap, len(segsToMerge))
|
|
|
|
_, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
segm, err := Open("/tmp/scorch3.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening merged segment: %v", err)
|
|
}
|
|
segCur := segm.(*Segment)
|
|
defer func() {
|
|
cerr := segCur.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
if segCur.Path() != "/tmp/scorch3.zap" {
|
|
t.Fatalf("wrong path")
|
|
}
|
|
if segCur.Count() != 2 {
|
|
t.Fatalf("wrong count, numEmptySegments: %d, got count: %d", numEmptySegments, segCur.Count())
|
|
}
|
|
if len(segCur.Fields()) != 5 {
|
|
t.Fatalf("wrong # fields: %#v\n", segCur.Fields())
|
|
}
|
|
|
|
testMergeWithSelf(t, segCur, 2)
|
|
}
|
|
|
|
func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
|
|
// trying merging the segment with itself for a few rounds
|
|
var diffs []string
|
|
|
|
for i := 0; i < 10; i++ {
|
|
fname := fmt.Sprintf("scorch-self-%d.zap", i)
|
|
|
|
_ = os.RemoveAll("/tmp/" + fname)
|
|
|
|
segsToMerge := make([]*Segment, 1)
|
|
segsToMerge[0] = segCur
|
|
|
|
_, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
segm, err := Open("/tmp/" + fname)
|
|
if err != nil {
|
|
t.Fatalf("error opening merged segment: %v", err)
|
|
}
|
|
segNew := segm.(*Segment)
|
|
defer func(s *Segment) {
|
|
cerr := s.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}(segNew)
|
|
|
|
if segNew.Count() != expectedCount {
|
|
t.Fatalf("wrong count")
|
|
}
|
|
if len(segNew.Fields()) != 5 {
|
|
t.Fatalf("wrong # fields: %#v\n", segNew.Fields())
|
|
}
|
|
|
|
diff := compareSegments(segCur, segNew)
|
|
if diff != "" {
|
|
diffs = append(diffs, fname+" is different than previous:\n"+diff)
|
|
}
|
|
|
|
segCur = segNew
|
|
}
|
|
|
|
if len(diffs) > 0 {
|
|
t.Errorf("mismatches after repeated self-merging: %v", strings.Join(diffs, "\n"))
|
|
}
|
|
}
|
|
|
|
func compareSegments(a, b *Segment) string {
|
|
var rv []string
|
|
|
|
if a.Count() != b.Count() {
|
|
return "counts"
|
|
}
|
|
|
|
afields := append([]string(nil), a.Fields()...)
|
|
bfields := append([]string(nil), b.Fields()...)
|
|
sort.Strings(afields)
|
|
sort.Strings(bfields)
|
|
if !reflect.DeepEqual(afields, bfields) {
|
|
return "fields"
|
|
}
|
|
|
|
for _, fieldName := range afields {
|
|
adict, err := a.Dictionary(fieldName)
|
|
if err != nil {
|
|
return fmt.Sprintf("adict err: %v", err)
|
|
}
|
|
bdict, err := b.Dictionary(fieldName)
|
|
if err != nil {
|
|
return fmt.Sprintf("bdict err: %v", err)
|
|
}
|
|
|
|
if adict.(*Dictionary).fst.Len() != bdict.(*Dictionary).fst.Len() {
|
|
rv = append(rv, fmt.Sprintf("field %s, dict fst Len()'s different: %v %v",
|
|
fieldName, adict.(*Dictionary).fst.Len(), bdict.(*Dictionary).fst.Len()))
|
|
}
|
|
|
|
aitr := adict.Iterator()
|
|
bitr := bdict.Iterator()
|
|
for {
|
|
anext, aerr := aitr.Next()
|
|
bnext, berr := bitr.Next()
|
|
if aerr != berr {
|
|
rv = append(rv, fmt.Sprintf("field %s, dict iterator Next() errors different: %v %v",
|
|
fieldName, aerr, berr))
|
|
break
|
|
}
|
|
if !reflect.DeepEqual(anext, bnext) {
|
|
rv = append(rv, fmt.Sprintf("field %s, dict iterator Next() results different: %#v %#v",
|
|
fieldName, anext, bnext))
|
|
// keep going to try to see more diff details at the postingsList level
|
|
}
|
|
if aerr != nil || anext == nil ||
|
|
berr != nil || bnext == nil {
|
|
break
|
|
}
|
|
|
|
for _, next := range []*index.DictEntry{anext, bnext} {
|
|
if next == nil {
|
|
continue
|
|
}
|
|
|
|
aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
|
|
bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
|
|
if aerr != berr {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() errors different: %v %v",
|
|
fieldName, next.Term, aerr, berr))
|
|
}
|
|
|
|
if (aplist != nil) != (bplist != nil) {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() results different: %v %v",
|
|
fieldName, next.Term, aplist, bplist))
|
|
break
|
|
}
|
|
|
|
if aerr != nil || aplist == nil ||
|
|
berr != nil || bplist == nil {
|
|
break
|
|
}
|
|
|
|
if aplist.Count() != bplist.Count() {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList().Count()'s different: %v %v",
|
|
fieldName, next.Term, aplist.Count(), bplist.Count()))
|
|
}
|
|
|
|
apitr := aplist.Iterator()
|
|
bpitr := bplist.Iterator()
|
|
if (apitr != nil) != (bpitr != nil) {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList.Iterator() results different: %v %v",
|
|
fieldName, next.Term, apitr, bpitr))
|
|
break
|
|
}
|
|
|
|
for {
|
|
apitrn, aerr := apitr.Next()
|
|
bpitrn, aerr := bpitr.Next()
|
|
if aerr != berr {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() errors different: %v %v",
|
|
fieldName, next.Term, aerr, berr))
|
|
}
|
|
|
|
if (apitrn != nil) != (bpitrn != nil) {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() results different: %v %v",
|
|
fieldName, next.Term, apitrn, bpitrn))
|
|
break
|
|
}
|
|
|
|
if aerr != nil || apitrn == nil ||
|
|
berr != nil || bpitrn == nil {
|
|
break
|
|
}
|
|
|
|
if apitrn.Number() != bpitrn.Number() {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Number()'s different: %v %v",
|
|
fieldName, next.Term, apitrn.Number(), bpitrn.Number()))
|
|
}
|
|
|
|
if apitrn.Frequency() != bpitrn.Frequency() {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Frequency()'s different: %v %v",
|
|
fieldName, next.Term, apitrn.Frequency(), bpitrn.Frequency()))
|
|
}
|
|
|
|
if apitrn.Norm() != bpitrn.Norm() {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Norm()'s different: %v %v",
|
|
fieldName, next.Term, apitrn.Norm(), bpitrn.Norm()))
|
|
}
|
|
|
|
if len(apitrn.Locations()) != len(bpitrn.Locations()) {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Locations() len's different: %v %v",
|
|
fieldName, next.Term, len(apitrn.Locations()), len(bpitrn.Locations())))
|
|
}
|
|
|
|
for loci, aloc := range apitrn.Locations() {
|
|
bloc := bpitrn.Locations()[loci]
|
|
|
|
if (aloc != nil) != (bloc != nil) {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() loc different: %v %v",
|
|
fieldName, next.Term, aloc, bloc))
|
|
break
|
|
}
|
|
|
|
if aloc.Field() != bloc.Field() ||
|
|
aloc.Start() != bloc.Start() ||
|
|
aloc.End() != bloc.End() ||
|
|
aloc.Pos() != bloc.Pos() ||
|
|
!reflect.DeepEqual(aloc.ArrayPositions(), bloc.ArrayPositions()) {
|
|
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() loc details different: %v %v",
|
|
fieldName, next.Term, aloc, bloc))
|
|
}
|
|
}
|
|
|
|
if fieldName == "_id" {
|
|
docId := next.Term
|
|
docNumA := apitrn.Number()
|
|
docNumB := bpitrn.Number()
|
|
afields := map[string]interface{}{}
|
|
err = a.VisitDocument(apitrn.Number(),
|
|
func(field string, typ byte, value []byte, pos []uint64) bool {
|
|
afields[field+"-typ"] = typ
|
|
afields[field+"-value"] = value
|
|
afields[field+"-pos"] = pos
|
|
return true
|
|
})
|
|
if err != nil {
|
|
rv = append(rv, fmt.Sprintf("a.VisitDocument err: %v", err))
|
|
}
|
|
bfields := map[string]interface{}{}
|
|
err = b.VisitDocument(bpitrn.Number(),
|
|
func(field string, typ byte, value []byte, pos []uint64) bool {
|
|
bfields[field+"-typ"] = typ
|
|
bfields[field+"-value"] = value
|
|
bfields[field+"-pos"] = pos
|
|
return true
|
|
})
|
|
if err != nil {
|
|
rv = append(rv, fmt.Sprintf("b.VisitDocument err: %v", err))
|
|
}
|
|
if !reflect.DeepEqual(afields, bfields) {
|
|
rv = append(rv, fmt.Sprintf("afields != bfields,"+
|
|
" id: %s, docNumA: %d, docNumB: %d,"+
|
|
" afields: %#v, bfields: %#v",
|
|
docId, docNumA, docNumB, afields, bfields))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return strings.Join(rv, "\n")
|
|
}
|
|
|
|
func TestMergeAndDrop(t *testing.T) {
|
|
docsToDrop := make([]*roaring.Bitmap, 2)
|
|
docsToDrop[0] = roaring.NewBitmap()
|
|
docsToDrop[0].AddInt(1)
|
|
docsToDrop[1] = roaring.NewBitmap()
|
|
docsToDrop[1].AddInt(1)
|
|
testMergeAndDrop(t, docsToDrop)
|
|
}
|
|
|
|
func TestMergeAndDropAllFromOneSegment(t *testing.T) {
|
|
docsToDrop := make([]*roaring.Bitmap, 2)
|
|
docsToDrop[0] = roaring.NewBitmap()
|
|
docsToDrop[0].AddInt(0)
|
|
docsToDrop[0].AddInt(1)
|
|
docsToDrop[1] = roaring.NewBitmap()
|
|
testMergeAndDrop(t, docsToDrop)
|
|
}
|
|
|
|
func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
|
|
_ = os.RemoveAll("/tmp/scorch.zap")
|
|
_ = os.RemoveAll("/tmp/scorch2.zap")
|
|
|
|
memSegment := buildMemSegmentMulti()
|
|
err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
segment, err := Open("/tmp/scorch.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening segment: %v", err)
|
|
}
|
|
defer func() {
|
|
cerr := segment.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
memSegment2 := buildMemSegmentMulti2()
|
|
err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
segment2, err := Open("/tmp/scorch2.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening segment: %v", err)
|
|
}
|
|
defer func() {
|
|
cerr := segment2.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
segsToMerge := make([]*Segment, 2)
|
|
segsToMerge[0] = segment.(*Segment)
|
|
segsToMerge[1] = segment2.(*Segment)
|
|
|
|
testMergeAndDropSegments(t, segsToMerge, docsToDrop, 2)
|
|
}
|
|
|
|
func TestMergeWithUpdates(t *testing.T) {
|
|
segmentDocIds := [][]string{
|
|
[]string{"a", "b"},
|
|
[]string{"b", "c"}, // doc "b" updated
|
|
}
|
|
|
|
docsToDrop := make([]*roaring.Bitmap, 2)
|
|
docsToDrop[0] = roaring.NewBitmap()
|
|
docsToDrop[0].AddInt(1) // doc "b" updated
|
|
docsToDrop[1] = roaring.NewBitmap()
|
|
|
|
testMergeWithUpdates(t, segmentDocIds, docsToDrop, 3)
|
|
}
|
|
|
|
func TestMergeWithUpdatesOnManySegments(t *testing.T) {
|
|
segmentDocIds := [][]string{
|
|
[]string{"a", "b"},
|
|
[]string{"b", "c"}, // doc "b" updated
|
|
[]string{"c", "d"}, // doc "c" updated
|
|
[]string{"d", "e"}, // doc "d" updated
|
|
}
|
|
|
|
docsToDrop := make([]*roaring.Bitmap, 4)
|
|
docsToDrop[0] = roaring.NewBitmap()
|
|
docsToDrop[0].AddInt(1) // doc "b" updated
|
|
docsToDrop[1] = roaring.NewBitmap()
|
|
docsToDrop[1].AddInt(1) // doc "c" updated
|
|
docsToDrop[2] = roaring.NewBitmap()
|
|
docsToDrop[2].AddInt(1) // doc "d" updated
|
|
docsToDrop[3] = roaring.NewBitmap()
|
|
|
|
testMergeWithUpdates(t, segmentDocIds, docsToDrop, 5)
|
|
}
|
|
|
|
func TestMergeWithUpdatesOnOneDoc(t *testing.T) {
|
|
segmentDocIds := [][]string{
|
|
[]string{"a", "b"},
|
|
[]string{"a", "c"}, // doc "a" updated
|
|
[]string{"a", "d"}, // doc "a" updated
|
|
[]string{"a", "e"}, // doc "a" updated
|
|
}
|
|
|
|
docsToDrop := make([]*roaring.Bitmap, 4)
|
|
docsToDrop[0] = roaring.NewBitmap()
|
|
docsToDrop[0].AddInt(0) // doc "a" updated
|
|
docsToDrop[1] = roaring.NewBitmap()
|
|
docsToDrop[1].AddInt(0) // doc "a" updated
|
|
docsToDrop[2] = roaring.NewBitmap()
|
|
docsToDrop[2].AddInt(0) // doc "a" updated
|
|
docsToDrop[3] = roaring.NewBitmap()
|
|
|
|
testMergeWithUpdates(t, segmentDocIds, docsToDrop, 5)
|
|
}
|
|
|
|
func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
|
|
var segsToMerge []*Segment
|
|
|
|
// convert segmentDocIds to segsToMerge
|
|
for i, docIds := range segmentDocIds {
|
|
fname := fmt.Sprintf("scorch%d.zap", i)
|
|
|
|
_ = os.RemoveAll("/tmp/" + fname)
|
|
|
|
memSegment := buildMemSegmentMultiHelper(docIds)
|
|
err := PersistSegment(memSegment, "/tmp/"+fname, 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
segment, err := Open("/tmp/" + fname)
|
|
if err != nil {
|
|
t.Fatalf("error opening segment: %v", err)
|
|
}
|
|
defer func(segment *Segment) {
|
|
cerr := segment.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}(segment.(*Segment))
|
|
|
|
segsToMerge = append(segsToMerge, segment.(*Segment))
|
|
}
|
|
|
|
testMergeAndDropSegments(t, segsToMerge, docsToDrop, expectedNumDocs)
|
|
}
|
|
|
|
func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
|
|
_ = os.RemoveAll("/tmp/scorch-merged.zap")
|
|
|
|
_, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
segm, err := Open("/tmp/scorch-merged.zap")
|
|
if err != nil {
|
|
t.Fatalf("error opening merged segment: %v", err)
|
|
}
|
|
defer func() {
|
|
cerr := segm.Close()
|
|
if cerr != nil {
|
|
t.Fatalf("error closing segment: %v", err)
|
|
}
|
|
}()
|
|
|
|
if segm.Count() != expectedNumDocs {
|
|
t.Fatalf("wrong count, got: %d, wanted: %d", segm.Count(), expectedNumDocs)
|
|
}
|
|
if len(segm.Fields()) != 5 {
|
|
t.Fatalf("wrong # fields: %#v\n", segm.Fields())
|
|
}
|
|
|
|
testMergeWithSelf(t, segm.(*Segment), expectedNumDocs)
|
|
}
|
|
|
|
func buildMemSegmentMulti2() *mem.Segment {
|
|
return buildMemSegmentMultiHelper([]string{"c", "d"})
|
|
}
|
|
|
|
func buildMemSegmentMultiHelper(docIds []string) *mem.Segment {
|
|
doc := &document.Document{
|
|
ID: "c",
|
|
Fields: []document.Field{
|
|
document.NewTextFieldCustom("_id", nil, []byte(docIds[0]), document.IndexField|document.StoreField, nil),
|
|
document.NewTextFieldCustom("name", nil, []byte("mat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
},
|
|
CompositeFields: []*document.CompositeField{
|
|
document.NewCompositeField("_all", true, nil, []string{"_id"}),
|
|
},
|
|
}
|
|
|
|
doc2 := &document.Document{
|
|
ID: "d",
|
|
Fields: []document.Field{
|
|
document.NewTextFieldCustom("_id", nil, []byte(docIds[1]), document.IndexField|document.StoreField, nil),
|
|
document.NewTextFieldCustom("name", nil, []byte("joa"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
|
|
},
|
|
CompositeFields: []*document.CompositeField{
|
|
document.NewCompositeField("_all", true, nil, []string{"_id"}),
|
|
},
|
|
}
|
|
|
|
// forge analyzed docs
|
|
results := []*index.AnalysisResult{
|
|
&index.AnalysisResult{
|
|
Document: doc,
|
|
Analyzed: []analysis.TokenFrequencies{
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 1,
|
|
Position: 1,
|
|
Term: []byte(docIds[0]),
|
|
},
|
|
}, nil, false),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 3,
|
|
Position: 1,
|
|
Term: []byte("mat"),
|
|
},
|
|
}, nil, true),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 4,
|
|
Position: 1,
|
|
Term: []byte("some"),
|
|
},
|
|
&analysis.Token{
|
|
Start: 5,
|
|
End: 10,
|
|
Position: 2,
|
|
Term: []byte("thing"),
|
|
},
|
|
}, nil, true),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 4,
|
|
Position: 1,
|
|
Term: []byte("cold"),
|
|
},
|
|
}, []uint64{0}, true),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 4,
|
|
Position: 1,
|
|
Term: []byte("dark"),
|
|
},
|
|
}, []uint64{1}, true),
|
|
},
|
|
Length: []int{
|
|
1,
|
|
1,
|
|
2,
|
|
1,
|
|
1,
|
|
},
|
|
},
|
|
&index.AnalysisResult{
|
|
Document: doc2,
|
|
Analyzed: []analysis.TokenFrequencies{
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 1,
|
|
Position: 1,
|
|
Term: []byte(docIds[1]),
|
|
},
|
|
}, nil, false),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 3,
|
|
Position: 1,
|
|
Term: []byte("joa"),
|
|
},
|
|
}, nil, true),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 4,
|
|
Position: 1,
|
|
Term: []byte("some"),
|
|
},
|
|
&analysis.Token{
|
|
Start: 5,
|
|
End: 10,
|
|
Position: 2,
|
|
Term: []byte("thing"),
|
|
},
|
|
}, nil, true),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 4,
|
|
Position: 1,
|
|
Term: []byte("cold"),
|
|
},
|
|
}, []uint64{0}, true),
|
|
analysis.TokenFrequency(analysis.TokenStream{
|
|
&analysis.Token{
|
|
Start: 0,
|
|
End: 4,
|
|
Position: 1,
|
|
Term: []byte("dark"),
|
|
},
|
|
}, []uint64{1}, true),
|
|
},
|
|
Length: []int{
|
|
1,
|
|
1,
|
|
2,
|
|
1,
|
|
1,
|
|
},
|
|
},
|
|
}
|
|
|
|
// fix up composite fields
|
|
for _, ar := range results {
|
|
for i, f := range ar.Document.Fields {
|
|
for _, cf := range ar.Document.CompositeFields {
|
|
cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
|
|
}
|
|
}
|
|
}
|
|
|
|
segment := mem.NewFromAnalyzedDocs(results)
|
|
|
|
return segment
|
|
}
|