0
0
Fork 0

update to correctly work with composite fields

also updated search results to return array positions
This commit is contained in:
Marty Schoch 2015-07-31 11:16:11 -04:00
parent 70b23cbe7f
commit 3682c25467
10 changed files with 107 additions and 24 deletions

View File

@ -10,10 +10,11 @@
package analysis
type TokenLocation struct {
Field string
Start int
End int
Position int
Field string
ArrayPositions []uint64
Start int
End int
Position int
}
type TokenFreq struct {
@ -52,25 +53,27 @@ func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies)
return rv
}
func TokenFrequency(tokens TokenStream) TokenFrequencies {
func TokenFrequency(tokens TokenStream, arrayPositions []uint64) TokenFrequencies {
index := make(map[string]*TokenFreq)
for _, token := range tokens {
curr, ok := index[string(token.Term)]
if ok {
curr.Locations = append(curr.Locations, &TokenLocation{
Start: token.Start,
End: token.End,
Position: token.Position,
ArrayPositions: arrayPositions,
Start: token.Start,
End: token.End,
Position: token.Position,
})
} else {
index[string(token.Term)] = &TokenFreq{
Term: token.Term,
Locations: []*TokenLocation{
&TokenLocation{
Start: token.Start,
End: token.End,
Position: token.Position,
ArrayPositions: arrayPositions,
Start: token.Start,
End: token.End,
Position: token.Position,
},
},
}

View File

@ -46,7 +46,7 @@ func TestTokenFrequency(t *testing.T) {
},
},
}
result := TokenFrequency(tokens)
result := TokenFrequency(tokens, nil)
if !reflect.DeepEqual(result, expectedResult) {
t.Errorf("expected %#v, got %#v", expectedResult, result)
}

View File

@ -75,7 +75,7 @@ func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) {
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions)
return fieldLength, tokenFreqs
}

View File

@ -71,7 +71,7 @@ func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions)
return fieldLength, tokenFreqs
}

View File

@ -60,7 +60,7 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
}
}
fieldLength := len(tokens) // number of tokens in this doc field
tokenFreqs := analysis.TokenFrequency(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions)
return fieldLength, tokenFreqs
}

View File

@ -9,9 +9,7 @@
package upside_down
import (
"github.com/blevesearch/bleve/document"
)
import "github.com/blevesearch/bleve/document"
type AnalysisResult struct {
docID string

View File

@ -415,7 +415,7 @@ func (udc *UpsideDownCouch) indexField(docID string, field document.Field, field
for _, tf := range tokenFreqs {
var termFreqRow *TermFrequencyRow
if field.Options().IncludeTermVectors() {
tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, field.ArrayPositions(), tf)
tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, tf)
rows = append(rows, newFieldRows...)
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
} else {
@ -542,7 +542,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
return len(tf.Locations)
}
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, arrayPositions []uint64, tf *analysis.TokenFreq) ([]*TermVector, []UpsideDownCouchRow) {
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) ([]*TermVector, []UpsideDownCouchRow) {
rv := make([]*TermVector, len(tf.Locations))
newFieldRows := make([]UpsideDownCouchRow, 0)
@ -558,7 +558,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, arrayPosition
}
tv := TermVector{
field: fieldIndex,
arrayPositions: arrayPositions,
arrayPositions: l.ArrayPositions,
pos: uint64(l.Position),
start: uint64(l.Start),
end: uint64(l.End),

View File

@ -962,3 +962,77 @@ func TestKeywordSearchBug207(t *testing.T) {
t.Fatal(err)
}
}
func TestTermVectorArrayPositions(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
t.Fatal(err)
}
}()
index, err := New("testidx", NewIndexMapping())
if err != nil {
t.Fatal(err)
}
// index a document with an array of strings
err = index.Index("k", struct {
Messages []string
}{
Messages: []string{
"first",
"second",
"third",
"last",
},
})
if err != nil {
t.Fatal(err)
}
// search for this document in all field
tq := NewTermQuery("second")
tsr := NewSearchRequest(tq)
results, err := index.Search(tsr)
if err != nil {
t.Fatal(err)
}
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["second"]) < 1 {
t.Fatalf("expected at least one location")
}
if len(results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions) < 1 {
t.Fatalf("expected at least one location array position")
}
if results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0] != 1 {
t.Fatalf("expected array position 1, got %f", results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0])
}
// repeat search for this document in Messages field
tq2 := NewTermQuery("third").SetField("Messages")
tsr = NewSearchRequest(tq2)
results, err = index.Search(tsr)
if err != nil {
t.Fatal(err)
}
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["third"]) < 1 {
t.Fatalf("expected at least one location")
}
if len(results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions) < 1 {
t.Fatalf("expected at least one location array position")
}
if results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0] != 2 {
t.Fatalf("expected array position 2, got %f", results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0])
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}

View File

@ -151,6 +151,13 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentM
End: float64(v.End),
}
if len(v.ArrayPositions) > 0 {
loc.ArrayPositions = make([]float64, len(v.ArrayPositions))
for i, ap := range v.ArrayPositions {
loc.ArrayPositions[i] = float64(ap)
}
}
locations := tlm[s.queryTerm]
if locations == nil {
locations = make(search.Locations, 1)

View File

@ -10,9 +10,10 @@
package search
type Location struct {
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
ArrayPositions []float64 `json:"array_positions"`
}
type Locations []*Location