Merge pull request #842 from abhinavdangeti/segment-tests
Unit tests for segments with docs with non-overlapping fields
This commit is contained in:
commit
1fcfc0a5f1
|
@ -697,3 +697,180 @@ func TestMultiple(t *testing.T) {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
func TestMultipleWithNonOverlappingFields(t *testing.T) {
|
||||
doc1 := &document.Document{
|
||||
ID: "a",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("_id", []uint64{}, []byte("a")),
|
||||
document.NewTextField("name", []uint64{}, []byte("ABC")),
|
||||
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
|
||||
document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
|
||||
document.NewTextField("manages.count", []uint64{}, []byte("1")),
|
||||
},
|
||||
CompositeFields: []*document.CompositeField{
|
||||
document.NewCompositeField("_all", true, nil, []string{"_id"}),
|
||||
},
|
||||
}
|
||||
|
||||
doc2 := &document.Document{
|
||||
ID: "b",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("_id", []uint64{}, []byte("b")),
|
||||
document.NewTextField("name", []uint64{}, []byte("XYZ")),
|
||||
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
|
||||
document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
|
||||
},
|
||||
CompositeFields: []*document.CompositeField{
|
||||
document.NewCompositeField("_all", true, nil, []string{"_id"}),
|
||||
},
|
||||
}
|
||||
|
||||
results := []*index.AnalysisResult{
|
||||
&index.AnalysisResult{
|
||||
Document: doc1,
|
||||
Analyzed: []analysis.TokenFrequencies{
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 1,
|
||||
Position: 1,
|
||||
Term: []byte("a"),
|
||||
},
|
||||
}, nil, false),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Start: 4,
|
||||
End: 8,
|
||||
Position: 2,
|
||||
Term: []byte("dept"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("XYZ"),
|
||||
},
|
||||
}, []uint64{0}, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 1,
|
||||
Position: 1,
|
||||
Term: []byte("1"),
|
||||
},
|
||||
}, []uint64{1}, true),
|
||||
},
|
||||
Length: []int{
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
},
|
||||
},
|
||||
&index.AnalysisResult{
|
||||
Document: doc2,
|
||||
Analyzed: []analysis.TokenFrequencies{
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 1,
|
||||
Position: 1,
|
||||
Term: []byte("b"),
|
||||
},
|
||||
}, nil, false),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("XYZ"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Start: 4,
|
||||
End: 8,
|
||||
Position: 2,
|
||||
Term: []byte("dept"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
}, []uint64{0}, true),
|
||||
},
|
||||
Length: []int{
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// fix up composite fields
|
||||
for _, ar := range results {
|
||||
for i, f := range ar.Document.Fields {
|
||||
for _, cf := range ar.Document.CompositeFields {
|
||||
cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
segment := NewFromAnalyzedDocs(results)
|
||||
if segment == nil {
|
||||
t.Fatalf("segment nil, not expected")
|
||||
}
|
||||
|
||||
if segment.Count() != 2 {
|
||||
t.Errorf("expected count 2, got %d", segment.Count())
|
||||
}
|
||||
|
||||
expectFields := map[string]struct{}{
|
||||
"_id": struct{}{},
|
||||
"_all": struct{}{},
|
||||
"name": struct{}{},
|
||||
"dept": struct{}{},
|
||||
"manages.id": struct{}{},
|
||||
"manages.count": struct{}{},
|
||||
"reportsTo.id": struct{}{},
|
||||
}
|
||||
|
||||
fields := segment.Fields()
|
||||
if len(fields) != len(expectFields) {
|
||||
t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
|
||||
}
|
||||
for _, field := range fields {
|
||||
if _, ok := expectFields[field]; !ok {
|
||||
t.Errorf("got unexpected field: %s", field)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -137,6 +137,12 @@ func buildTestSegmentMultiWithChunkFactor(chunkFactor uint32) (*SegmentBase, err
|
|||
return AnalysisResultsToSegmentBase(results, chunkFactor)
|
||||
}
|
||||
|
||||
func buildTestSegmentMultiWithDifferentFields(includeDocA, includeDocB bool) (*SegmentBase, error) {
|
||||
results := buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB)
|
||||
|
||||
return AnalysisResultsToSegmentBase(results, 1024)
|
||||
}
|
||||
|
||||
func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
|
||||
doc := &document.Document{
|
||||
ID: "a",
|
||||
|
@ -298,6 +304,165 @@ func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
|
|||
return results
|
||||
}
|
||||
|
||||
func buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB bool) []*index.AnalysisResult {
|
||||
results := []*index.AnalysisResult{}
|
||||
|
||||
if includeDocA {
|
||||
doc := &document.Document{
|
||||
ID: "a",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("_id", []uint64{}, []byte("a")),
|
||||
document.NewTextField("name", []uint64{}, []byte("ABC")),
|
||||
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
|
||||
document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
|
||||
document.NewTextField("manages.count", []uint64{}, []byte("1")),
|
||||
},
|
||||
CompositeFields: []*document.CompositeField{
|
||||
document.NewCompositeField("_all", true, nil, []string{"_id"}),
|
||||
},
|
||||
}
|
||||
|
||||
result := &index.AnalysisResult{
|
||||
Document: doc,
|
||||
Analyzed: []analysis.TokenFrequencies{
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 1,
|
||||
Position: 1,
|
||||
Term: []byte("a"),
|
||||
},
|
||||
}, nil, false),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Start: 4,
|
||||
End: 8,
|
||||
Position: 2,
|
||||
Term: []byte("dept"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("XYZ"),
|
||||
},
|
||||
}, []uint64{0}, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 1,
|
||||
Position: 1,
|
||||
Term: []byte("1"),
|
||||
},
|
||||
}, []uint64{1}, true),
|
||||
},
|
||||
Length: []int{
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
},
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
if includeDocB {
|
||||
doc := &document.Document{
|
||||
ID: "b",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("_id", []uint64{}, []byte("b")),
|
||||
document.NewTextField("name", []uint64{}, []byte("XYZ")),
|
||||
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
|
||||
document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
|
||||
},
|
||||
CompositeFields: []*document.CompositeField{
|
||||
document.NewCompositeField("_all", true, nil, []string{"_id"}),
|
||||
},
|
||||
}
|
||||
|
||||
result := &index.AnalysisResult{
|
||||
Document: doc,
|
||||
Analyzed: []analysis.TokenFrequencies{
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 1,
|
||||
Position: 1,
|
||||
Term: []byte("b"),
|
||||
},
|
||||
}, nil, false),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("XYZ"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Start: 4,
|
||||
End: 8,
|
||||
Position: 2,
|
||||
Term: []byte("dept"),
|
||||
},
|
||||
}, nil, true),
|
||||
analysis.TokenFrequency(analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: 3,
|
||||
Position: 1,
|
||||
Term: []byte("ABC"),
|
||||
},
|
||||
}, []uint64{0}, true),
|
||||
},
|
||||
Length: []int{
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
},
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
// fix up composite fields
|
||||
for _, ar := range results {
|
||||
for i, f := range ar.Document.Fields {
|
||||
for _, cf := range ar.Document.CompositeFields {
|
||||
cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func buildTestSegmentWithDefaultFieldMapping(chunkFactor uint32) (
|
||||
*SegmentBase, []string, error) {
|
||||
doc := &document.Document{
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
)
|
||||
|
@ -600,3 +601,139 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
func TestSegmentDocsWithNonOverlappingFields(t *testing.T) {
|
||||
_ = os.RemoveAll("/tmp/scorch.zap")
|
||||
|
||||
testSeg, err := buildTestSegmentMultiWithDifferentFields(true, true)
|
||||
if err != nil {
|
||||
t.Fatalf("error building segment: %v", err)
|
||||
}
|
||||
err = PersistSegmentBase(testSeg, "/tmp/scorch.zap")
|
||||
if err != nil {
|
||||
t.Fatalf("error persisting segment: %v", err)
|
||||
}
|
||||
|
||||
segment, err := Open("/tmp/scorch.zap")
|
||||
if err != nil {
|
||||
t.Fatalf("error opening segment: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
cerr := segment.Close()
|
||||
if cerr != nil {
|
||||
t.Fatalf("error closing segment: %v", cerr)
|
||||
}
|
||||
}()
|
||||
|
||||
if segment.Count() != 2 {
|
||||
t.Errorf("expected 2, got %d", segment.Count())
|
||||
}
|
||||
|
||||
expectFields := map[string]struct{}{
|
||||
"_id": struct{}{},
|
||||
"_all": struct{}{},
|
||||
"name": struct{}{},
|
||||
"dept": struct{}{},
|
||||
"manages.id": struct{}{},
|
||||
"manages.count": struct{}{},
|
||||
"reportsTo.id": struct{}{},
|
||||
}
|
||||
|
||||
fields := segment.Fields()
|
||||
if len(fields) != len(expectFields) {
|
||||
t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
|
||||
}
|
||||
for _, field := range fields {
|
||||
if _, ok := expectFields[field]; !ok {
|
||||
t.Errorf("got unexpected field: %s", field)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergedSegmentDocsWithNonOverlappingFields(t *testing.T) {
|
||||
_ = os.RemoveAll("/tmp/scorch1.zap")
|
||||
_ = os.RemoveAll("/tmp/scorch2.zap")
|
||||
_ = os.RemoveAll("/tmp/scorch3.zap")
|
||||
|
||||
testSeg1, _ := buildTestSegmentMultiWithDifferentFields(true, false)
|
||||
err := PersistSegmentBase(testSeg1, "/tmp/scorch1.zap")
|
||||
if err != nil {
|
||||
t.Fatalf("error persisting segment: %v", err)
|
||||
}
|
||||
|
||||
testSeg2, _ := buildTestSegmentMultiWithDifferentFields(false, true)
|
||||
err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
|
||||
if err != nil {
|
||||
t.Fatalf("error persisting segment: %v", err)
|
||||
}
|
||||
|
||||
segment1, err := Open("/tmp/scorch1.zap")
|
||||
if err != nil {
|
||||
t.Fatalf("error opening segment: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
cerr := segment1.Close()
|
||||
if cerr != nil {
|
||||
t.Fatalf("error closing segment: %v", cerr)
|
||||
}
|
||||
}()
|
||||
|
||||
segment2, err := Open("/tmp/scorch2.zap")
|
||||
if err != nil {
|
||||
t.Fatalf("error opening segment: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
cerr := segment2.Close()
|
||||
if cerr != nil {
|
||||
t.Fatalf("error closing segment: %v", cerr)
|
||||
}
|
||||
}()
|
||||
|
||||
segsToMerge := make([]*Segment, 2)
|
||||
segsToMerge[0] = segment1.(*Segment)
|
||||
segsToMerge[1] = segment2.(*Segment)
|
||||
|
||||
_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if nBytes == 0 {
|
||||
t.Fatalf("expected a non zero total_compaction_written_bytes")
|
||||
}
|
||||
|
||||
segmentM, err := Open("/tmp/scorch3.zap")
|
||||
if err != nil {
|
||||
t.Fatalf("error opening merged segment: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
cerr := segmentM.Close()
|
||||
if cerr != nil {
|
||||
t.Fatalf("error closing segment: %v", cerr)
|
||||
}
|
||||
}()
|
||||
|
||||
if segmentM.Count() != 2 {
|
||||
t.Errorf("expected 2, got %d", segmentM.Count())
|
||||
}
|
||||
|
||||
expectFields := map[string]struct{}{
|
||||
"_id": struct{}{},
|
||||
"_all": struct{}{},
|
||||
"name": struct{}{},
|
||||
"dept": struct{}{},
|
||||
"manages.id": struct{}{},
|
||||
"manages.count": struct{}{},
|
||||
"reportsTo.id": struct{}{},
|
||||
}
|
||||
|
||||
fields := segmentM.Fields()
|
||||
if len(fields) != len(expectFields) {
|
||||
t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
|
||||
}
|
||||
for _, field := range fields {
|
||||
if _, ok := expectFields[field]; !ok {
|
||||
t.Errorf("got unexpected field: %s", field)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue