0
0
Fork 0

copy locations on merge for more safe/predictable behavior

fixes #328
This commit is contained in:
Marty Schoch 2016-01-19 14:21:48 -05:00
parent 035d9d0e40
commit fc34a97875
3 changed files with 84 additions and 1 deletions

View File

@ -49,7 +49,12 @@ func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies)
existingTf.Locations = append(existingTf.Locations, tf.Locations...)
existingTf.frequency = existingTf.frequency + tf.frequency
} else {
tfs[tfk] = tf
tfs[tfk] = &TokenFreq{
Term: tf.Term,
frequency: tf.frequency,
Locations: make([]*TokenLocation, len(tf.Locations)),
}
copy(tfs[tfk].Locations, tf.Locations)
}
}
}

View File

@ -101,6 +101,45 @@ func TestAnalysis(t *testing.T) {
}
}
func TestAnalysisBug328(t *testing.T) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
if err != nil {
t.Fatal(err)
}
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewFirestorm(gtreap.Name, nil, analysisQueue)
if err != nil {
t.Fatal(err)
}
d := document.NewDocument("1")
f := document.NewTextFieldCustom("title", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer)
d.AddField(f)
f = document.NewTextFieldCustom("body", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer)
d.AddField(f)
cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, document.IndexField|document.IncludeTermVectors)
d.AddField(cf)
rv := idx.Analyze(d)
fieldIndexes := make(map[uint16]string)
for _, row := range rv.Rows {
if row, ok := row.(*FieldRow); ok {
fieldIndexes[row.index] = row.Name()
}
if row, ok := row.(*TermFreqRow); ok && string(row.term) == "bleve" {
for _, vec := range row.Vectors() {
if vec.GetField() != uint32(row.field) {
if fieldIndexes[row.field] != "_all" {
t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.GetField())
}
}
}
}
}
}
func BenchmarkAnalyze(b *testing.B) {
cache := registry.NewCache()

View File

@ -10,6 +10,45 @@ import (
"github.com/blevesearch/bleve/registry"
)
func TestAnalysisBug328(t *testing.T) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
if err != nil {
t.Fatal(err)
}
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue)
if err != nil {
t.Fatal(err)
}
d := document.NewDocument("1")
f := document.NewTextFieldCustom("title", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer)
d.AddField(f)
f = document.NewTextFieldCustom("body", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer)
d.AddField(f)
cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, document.IndexField|document.IncludeTermVectors)
d.AddField(cf)
rv := idx.Analyze(d)
fieldIndexes := make(map[uint16]string)
for _, row := range rv.Rows {
if row, ok := row.(*FieldRow); ok {
fieldIndexes[row.index] = row.name
}
if row, ok := row.(*TermFrequencyRow); ok && string(row.term) == "bleve" {
for _, vec := range row.vectors {
if vec.field != row.field {
if fieldIndexes[row.field] != "_all" {
t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.field)
}
}
}
}
}
}
func BenchmarkAnalyze(b *testing.B) {
cache := registry.NewCache()