update to correctly work with composite fields
also updated search results to return array positions
This commit is contained in:
parent
70b23cbe7f
commit
3682c25467
|
@ -10,10 +10,11 @@
|
|||
package analysis
|
||||
|
||||
type TokenLocation struct {
|
||||
Field string
|
||||
Start int
|
||||
End int
|
||||
Position int
|
||||
Field string
|
||||
ArrayPositions []uint64
|
||||
Start int
|
||||
End int
|
||||
Position int
|
||||
}
|
||||
|
||||
type TokenFreq struct {
|
||||
|
@ -52,25 +53,27 @@ func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies)
|
|||
return rv
|
||||
}
|
||||
|
||||
func TokenFrequency(tokens TokenStream) TokenFrequencies {
|
||||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64) TokenFrequencies {
|
||||
index := make(map[string]*TokenFreq)
|
||||
|
||||
for _, token := range tokens {
|
||||
curr, ok := index[string(token.Term)]
|
||||
if ok {
|
||||
curr.Locations = append(curr.Locations, &TokenLocation{
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
ArrayPositions: arrayPositions,
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
})
|
||||
} else {
|
||||
index[string(token.Term)] = &TokenFreq{
|
||||
Term: token.Term,
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
ArrayPositions: arrayPositions,
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ func TestTokenFrequency(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
result := TokenFrequency(tokens)
|
||||
result := TokenFrequency(tokens, nil)
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
||||
}
|
||||
|
|
|
@ -75,7 +75,7 @@ func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
}
|
||||
|
||||
fieldLength := len(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions)
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
}
|
||||
|
||||
fieldLength := len(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions)
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
}
|
||||
}
|
||||
fieldLength := len(tokens) // number of tokens in this doc field
|
||||
tokenFreqs := analysis.TokenFrequency(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions)
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
|
|
|
@ -9,9 +9,7 @@
|
|||
|
||||
package upside_down
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/document"
|
||||
)
|
||||
import "github.com/blevesearch/bleve/document"
|
||||
|
||||
type AnalysisResult struct {
|
||||
docID string
|
||||
|
|
|
@ -415,7 +415,7 @@ func (udc *UpsideDownCouch) indexField(docID string, field document.Field, field
|
|||
for _, tf := range tokenFreqs {
|
||||
var termFreqRow *TermFrequencyRow
|
||||
if field.Options().IncludeTermVectors() {
|
||||
tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, field.ArrayPositions(), tf)
|
||||
tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, tf)
|
||||
rows = append(rows, newFieldRows...)
|
||||
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
|
||||
} else {
|
||||
|
@ -542,7 +542,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
|
|||
return len(tf.Locations)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, arrayPositions []uint64, tf *analysis.TokenFreq) ([]*TermVector, []UpsideDownCouchRow) {
|
||||
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) ([]*TermVector, []UpsideDownCouchRow) {
|
||||
rv := make([]*TermVector, len(tf.Locations))
|
||||
newFieldRows := make([]UpsideDownCouchRow, 0)
|
||||
|
||||
|
@ -558,7 +558,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, arrayPosition
|
|||
}
|
||||
tv := TermVector{
|
||||
field: fieldIndex,
|
||||
arrayPositions: arrayPositions,
|
||||
arrayPositions: l.ArrayPositions,
|
||||
pos: uint64(l.Position),
|
||||
start: uint64(l.Start),
|
||||
end: uint64(l.End),
|
||||
|
|
|
@ -962,3 +962,77 @@ func TestKeywordSearchBug207(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTermVectorArrayPositions(t *testing.T) {
|
||||
defer func() {
|
||||
err := os.RemoveAll("testidx")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
index, err := New("testidx", NewIndexMapping())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// index a document with an array of strings
|
||||
err = index.Index("k", struct {
|
||||
Messages []string
|
||||
}{
|
||||
Messages: []string{
|
||||
"first",
|
||||
"second",
|
||||
"third",
|
||||
"last",
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// search for this document in all field
|
||||
tq := NewTermQuery("second")
|
||||
tsr := NewSearchRequest(tq)
|
||||
results, err := index.Search(tsr)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if results.Total != 1 {
|
||||
t.Fatalf("expected 1 result, got %d", results.Total)
|
||||
}
|
||||
if len(results.Hits[0].Locations["Messages"]["second"]) < 1 {
|
||||
t.Fatalf("expected at least one location")
|
||||
}
|
||||
if len(results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions) < 1 {
|
||||
t.Fatalf("expected at least one location array position")
|
||||
}
|
||||
if results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0] != 1 {
|
||||
t.Fatalf("expected array position 1, got %f", results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0])
|
||||
}
|
||||
|
||||
// repeat search for this document in Messages field
|
||||
tq2 := NewTermQuery("third").SetField("Messages")
|
||||
tsr = NewSearchRequest(tq2)
|
||||
results, err = index.Search(tsr)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if results.Total != 1 {
|
||||
t.Fatalf("expected 1 result, got %d", results.Total)
|
||||
}
|
||||
if len(results.Hits[0].Locations["Messages"]["third"]) < 1 {
|
||||
t.Fatalf("expected at least one location")
|
||||
}
|
||||
if len(results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions) < 1 {
|
||||
t.Fatalf("expected at least one location array position")
|
||||
}
|
||||
if results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0] != 2 {
|
||||
t.Fatalf("expected array position 2, got %f", results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0])
|
||||
}
|
||||
|
||||
err = index.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -151,6 +151,13 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentM
|
|||
End: float64(v.End),
|
||||
}
|
||||
|
||||
if len(v.ArrayPositions) > 0 {
|
||||
loc.ArrayPositions = make([]float64, len(v.ArrayPositions))
|
||||
for i, ap := range v.ArrayPositions {
|
||||
loc.ArrayPositions[i] = float64(ap)
|
||||
}
|
||||
}
|
||||
|
||||
locations := tlm[s.queryTerm]
|
||||
if locations == nil {
|
||||
locations = make(search.Locations, 1)
|
||||
|
|
|
@ -10,9 +10,10 @@
|
|||
package search
|
||||
|
||||
type Location struct {
|
||||
Pos float64 `json:"pos"`
|
||||
Start float64 `json:"start"`
|
||||
End float64 `json:"end"`
|
||||
Pos float64 `json:"pos"`
|
||||
Start float64 `json:"start"`
|
||||
End float64 `json:"end"`
|
||||
ArrayPositions []float64 `json:"array_positions"`
|
||||
}
|
||||
|
||||
type Locations []*Location
|
||||
|
|
Loading…
Reference in New Issue