From fc34a97875840b2ae24517e7d746b69bdae9be90 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Tue, 19 Jan 2016 14:21:48 -0500 Subject: [PATCH] copy locations on merge for more safe/predictable behavior fixes #328 --- analysis/freq.go | 7 +++++- index/firestorm/analysis_test.go | 39 ++++++++++++++++++++++++++++++ index/upside_down/analysis_test.go | 39 ++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/analysis/freq.go b/analysis/freq.go index 85b5d1a6..2235bbfb 100644 --- a/analysis/freq.go +++ b/analysis/freq.go @@ -49,7 +49,12 @@ func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) existingTf.Locations = append(existingTf.Locations, tf.Locations...) existingTf.frequency = existingTf.frequency + tf.frequency } else { - tfs[tfk] = tf + tfs[tfk] = &TokenFreq{ + Term: tf.Term, + frequency: tf.frequency, + Locations: make([]*TokenLocation, len(tf.Locations)), + } + copy(tfs[tfk].Locations, tf.Locations) } } } diff --git a/index/firestorm/analysis_test.go b/index/firestorm/analysis_test.go index 4fe0c775..180a1ab5 100644 --- a/index/firestorm/analysis_test.go +++ b/index/firestorm/analysis_test.go @@ -101,6 +101,45 @@ func TestAnalysis(t *testing.T) { } } +func TestAnalysisBug328(t *testing.T) { + cache := registry.NewCache() + analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name) + if err != nil { + t.Fatal(err) + } + + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewFirestorm(gtreap.Name, nil, analysisQueue) + if err != nil { + t.Fatal(err) + } + + d := document.NewDocument("1") + f := document.NewTextFieldCustom("title", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer) + d.AddField(f) + f = document.NewTextFieldCustom("body", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer) + d.AddField(f) + cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, document.IndexField|document.IncludeTermVectors) + d.AddField(cf) + + rv := idx.Analyze(d) + fieldIndexes := make(map[uint16]string) + for _, row := range rv.Rows { + if row, ok := row.(*FieldRow); ok { + fieldIndexes[row.index] = row.Name() + } + if row, ok := row.(*TermFreqRow); ok && string(row.term) == "bleve" { + for _, vec := range row.Vectors() { + if vec.GetField() != uint32(row.field) { + if fieldIndexes[row.field] != "_all" { + t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.GetField()) + } + } + } + } + } +} + func BenchmarkAnalyze(b *testing.B) { cache := registry.NewCache() diff --git a/index/upside_down/analysis_test.go b/index/upside_down/analysis_test.go index 6b46d2d3..820ade2b 100644 --- a/index/upside_down/analysis_test.go +++ b/index/upside_down/analysis_test.go @@ -10,6 +10,45 @@ import ( "github.com/blevesearch/bleve/registry" ) +func TestAnalysisBug328(t *testing.T) { + cache := registry.NewCache() + analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name) + if err != nil { + t.Fatal(err) + } + + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue) + if err != nil { + t.Fatal(err) + } + + d := document.NewDocument("1") + f := document.NewTextFieldCustom("title", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer) + d.AddField(f) + f = document.NewTextFieldCustom("body", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer) + d.AddField(f) + cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, document.IndexField|document.IncludeTermVectors) + d.AddField(cf) + + rv := idx.Analyze(d) + fieldIndexes := make(map[uint16]string) + for _, row := range rv.Rows { + if row, ok := row.(*FieldRow); ok { + fieldIndexes[row.index] = row.name + } + if row, ok := row.(*TermFrequencyRow); ok && string(row.term) == "bleve" { + for _, vec := range row.vectors { + if vec.field != row.field { + if fieldIndexes[row.field] != "_all" { + t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.field) + } + } + } + } + } +} + func BenchmarkAnalyze(b *testing.B) { cache := registry.NewCache()