0
0
Fork 0

Unit tests for segments with docs with non-overlapping fields

This commit is contained in:
abhinavdangeti 2018-03-19 12:33:13 -07:00
parent cf8e0d63bb
commit 85df86ba17
3 changed files with 479 additions and 0 deletions

View File

@ -697,3 +697,180 @@ func TestMultiple(t *testing.T) {
}
}
func TestMultipleWithNonOverlappingFields(t *testing.T) {
doc1 := &document.Document{
ID: "a",
Fields: []document.Field{
document.NewTextField("_id", []uint64{}, []byte("a")),
document.NewTextField("name", []uint64{}, []byte("ABC")),
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
document.NewTextField("manages.count", []uint64{}, []byte("1")),
},
CompositeFields: []*document.CompositeField{
document.NewCompositeField("_all", true, nil, []string{"_id"}),
},
}
doc2 := &document.Document{
ID: "b",
Fields: []document.Field{
document.NewTextField("_id", []uint64{}, []byte("b")),
document.NewTextField("name", []uint64{}, []byte("XYZ")),
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
},
CompositeFields: []*document.CompositeField{
document.NewCompositeField("_all", true, nil, []string{"_id"}),
},
}
results := []*index.AnalysisResult{
&index.AnalysisResult{
Document: doc1,
Analyzed: []analysis.TokenFrequencies{
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 1,
Position: 1,
Term: []byte("a"),
},
}, nil, false),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
&analysis.Token{
Start: 4,
End: 8,
Position: 2,
Term: []byte("dept"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("XYZ"),
},
}, []uint64{0}, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 1,
Position: 1,
Term: []byte("1"),
},
}, []uint64{1}, true),
},
Length: []int{
1,
1,
2,
1,
1,
},
},
&index.AnalysisResult{
Document: doc2,
Analyzed: []analysis.TokenFrequencies{
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 1,
Position: 1,
Term: []byte("b"),
},
}, nil, false),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("XYZ"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
&analysis.Token{
Start: 4,
End: 8,
Position: 2,
Term: []byte("dept"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
}, []uint64{0}, true),
},
Length: []int{
1,
1,
2,
1,
},
},
}
// fix up composite fields
for _, ar := range results {
for i, f := range ar.Document.Fields {
for _, cf := range ar.Document.CompositeFields {
cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
}
}
}
segment := NewFromAnalyzedDocs(results)
if segment == nil {
t.Fatalf("segment nil, not expected")
}
if segment.Count() != 2 {
t.Errorf("expected count 2, got %d", segment.Count())
}
expectFields := map[string]struct{}{
"_id": struct{}{},
"_all": struct{}{},
"name": struct{}{},
"dept": struct{}{},
"manages.id": struct{}{},
"manages.count": struct{}{},
"reportsTo.id": struct{}{},
}
fields := segment.Fields()
if len(fields) != len(expectFields) {
t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
}
for _, field := range fields {
if _, ok := expectFields[field]; !ok {
t.Errorf("got unexpected field: %s", field)
}
}
}

View File

@ -137,6 +137,12 @@ func buildTestSegmentMultiWithChunkFactor(chunkFactor uint32) (*SegmentBase, err
return AnalysisResultsToSegmentBase(results, chunkFactor)
}
func buildTestSegmentMultiWithDifferentFields(includeDocA, includeDocB bool) (*SegmentBase, error) {
results := buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB)
return AnalysisResultsToSegmentBase(results, 1024)
}
func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
doc := &document.Document{
ID: "a",
@ -298,6 +304,165 @@ func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
return results
}
func buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB bool) []*index.AnalysisResult {
results := []*index.AnalysisResult{}
if includeDocA {
doc := &document.Document{
ID: "a",
Fields: []document.Field{
document.NewTextField("_id", []uint64{}, []byte("a")),
document.NewTextField("name", []uint64{}, []byte("ABC")),
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
document.NewTextField("manages.count", []uint64{}, []byte("1")),
},
CompositeFields: []*document.CompositeField{
document.NewCompositeField("_all", true, nil, []string{"_id"}),
},
}
result := &index.AnalysisResult{
Document: doc,
Analyzed: []analysis.TokenFrequencies{
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 1,
Position: 1,
Term: []byte("a"),
},
}, nil, false),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
&analysis.Token{
Start: 4,
End: 8,
Position: 2,
Term: []byte("dept"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("XYZ"),
},
}, []uint64{0}, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 1,
Position: 1,
Term: []byte("1"),
},
}, []uint64{1}, true),
},
Length: []int{
1,
1,
2,
1,
1,
},
}
results = append(results, result)
}
if includeDocB {
doc := &document.Document{
ID: "b",
Fields: []document.Field{
document.NewTextField("_id", []uint64{}, []byte("b")),
document.NewTextField("name", []uint64{}, []byte("XYZ")),
document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
},
CompositeFields: []*document.CompositeField{
document.NewCompositeField("_all", true, nil, []string{"_id"}),
},
}
result := &index.AnalysisResult{
Document: doc,
Analyzed: []analysis.TokenFrequencies{
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 1,
Position: 1,
Term: []byte("b"),
},
}, nil, false),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("XYZ"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
&analysis.Token{
Start: 4,
End: 8,
Position: 2,
Term: []byte("dept"),
},
}, nil, true),
analysis.TokenFrequency(analysis.TokenStream{
&analysis.Token{
Start: 0,
End: 3,
Position: 1,
Term: []byte("ABC"),
},
}, []uint64{0}, true),
},
Length: []int{
1,
1,
2,
1,
},
}
results = append(results, result)
}
// fix up composite fields
for _, ar := range results {
for i, f := range ar.Document.Fields {
for _, cf := range ar.Document.CompositeFields {
cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
}
}
}
return results
}
func buildTestSegmentWithDefaultFieldMapping(chunkFactor uint32) (
*SegmentBase, []string, error) {
doc := &document.Document{

View File

@ -21,6 +21,7 @@ import (
"sort"
"testing"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
)
@ -600,3 +601,139 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
}
}
func TestSegmentDocsWithNonOverlappingFields(t *testing.T) {
_ = os.RemoveAll("/tmp/scorch.zap")
testSeg, err := buildTestSegmentMultiWithDifferentFields(true, true)
if err != nil {
t.Fatalf("error building segment: %v", err)
}
err = PersistSegmentBase(testSeg, "/tmp/scorch.zap")
if err != nil {
t.Fatalf("error persisting segment: %v", err)
}
segment, err := Open("/tmp/scorch.zap")
if err != nil {
t.Fatalf("error opening segment: %v", err)
}
defer func() {
cerr := segment.Close()
if cerr != nil {
t.Fatalf("error closing segment: %v", cerr)
}
}()
if segment.Count() != 2 {
t.Errorf("expected 2, got %d", segment.Count())
}
expectFields := map[string]struct{}{
"_id": struct{}{},
"_all": struct{}{},
"name": struct{}{},
"dept": struct{}{},
"manages.id": struct{}{},
"manages.count": struct{}{},
"reportsTo.id": struct{}{},
}
fields := segment.Fields()
if len(fields) != len(expectFields) {
t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
}
for _, field := range fields {
if _, ok := expectFields[field]; !ok {
t.Errorf("got unexpected field: %s", field)
}
}
}
func TestMergedSegmentDocsWithNonOverlappingFields(t *testing.T) {
_ = os.RemoveAll("/tmp/scorch1.zap")
_ = os.RemoveAll("/tmp/scorch2.zap")
_ = os.RemoveAll("/tmp/scorch3.zap")
testSeg1, _ := buildTestSegmentMultiWithDifferentFields(true, false)
err := PersistSegmentBase(testSeg1, "/tmp/scorch1.zap")
if err != nil {
t.Fatalf("error persisting segment: %v", err)
}
testSeg2, _ := buildTestSegmentMultiWithDifferentFields(false, true)
err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
if err != nil {
t.Fatalf("error persisting segment: %v", err)
}
segment1, err := Open("/tmp/scorch1.zap")
if err != nil {
t.Fatalf("error opening segment: %v", err)
}
defer func() {
cerr := segment1.Close()
if cerr != nil {
t.Fatalf("error closing segment: %v", cerr)
}
}()
segment2, err := Open("/tmp/scorch2.zap")
if err != nil {
t.Fatalf("error opening segment: %v", err)
}
defer func() {
cerr := segment2.Close()
if cerr != nil {
t.Fatalf("error closing segment: %v", cerr)
}
}()
segsToMerge := make([]*Segment, 2)
segsToMerge[0] = segment1.(*Segment)
segsToMerge[1] = segment2.(*Segment)
_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
if err != nil {
t.Fatal(err)
}
if nBytes == 0 {
t.Fatalf("expected a non zero total_compaction_written_bytes")
}
segmentM, err := Open("/tmp/scorch3.zap")
if err != nil {
t.Fatalf("error opening merged segment: %v", err)
}
defer func() {
cerr := segmentM.Close()
if cerr != nil {
t.Fatalf("error closing segment: %v", cerr)
}
}()
if segmentM.Count() != 2 {
t.Errorf("expected 2, got %d", segmentM.Count())
}
expectFields := map[string]struct{}{
"_id": struct{}{},
"_all": struct{}{},
"name": struct{}{},
"dept": struct{}{},
"manages.id": struct{}{},
"manages.count": struct{}{},
"reportsTo.id": struct{}{},
}
fields := segmentM.Fields()
if len(fields) != len(expectFields) {
t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
}
for _, field := range fields {
if _, ok := expectFields[field]; !ok {
t.Errorf("got unexpected field: %s", field)
}
}
}