parent
3682c25467
commit
f35e2e42df
|
@ -69,7 +69,7 @@ func (t *TextField) Value() []byte {
|
|||
}
|
||||
|
||||
func (t *TextField) GoString() string {
|
||||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s}", t.name, t.options, t.analyzer, t.value)
|
||||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
|
||||
}
|
||||
|
||||
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
|
||||
|
|
|
@ -15,11 +15,12 @@ import (
|
|||
)
|
||||
|
||||
type Fragment struct {
|
||||
Orig []byte
|
||||
Start int
|
||||
End int
|
||||
Score float64
|
||||
Index int // used by heap
|
||||
Orig []byte
|
||||
ArrayPositions []uint64
|
||||
Start int
|
||||
End int
|
||||
Score float64
|
||||
Index int // used by heap
|
||||
}
|
||||
|
||||
func (f *Fragment) Overlaps(other *Fragment) bool {
|
||||
|
|
|
@ -32,7 +32,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) {
|
|||
OUTER:
|
||||
for _, locations := range s.tlm {
|
||||
for _, location := range locations {
|
||||
if int(location.Start) >= f.Start && int(location.End) <= f.End {
|
||||
if sameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
|
||||
score += 1.0
|
||||
// once we find a term in the fragment
|
||||
// don't care about additional matches
|
||||
|
|
|
@ -80,9 +80,18 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
|
|||
if f.Name() == field {
|
||||
_, ok := f.(*document.TextField)
|
||||
if ok {
|
||||
|
||||
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
|
||||
for _, otl := range orderedTermLocations {
|
||||
if sameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
|
||||
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
|
||||
}
|
||||
}
|
||||
|
||||
fieldData := f.Value()
|
||||
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
|
||||
fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
|
||||
for _, fragment := range fragments {
|
||||
fragment.ArrayPositions = f.ArrayPositions()
|
||||
scorer.Score(fragment)
|
||||
heap.Push(&fq, fragment)
|
||||
}
|
||||
|
@ -143,6 +152,18 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
|
|||
return formattedFragments
|
||||
}
|
||||
|
||||
func sameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool {
|
||||
if len(fieldArrayPositions) != len(termLocationArrayPositions) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(fieldArrayPositions); i++ {
|
||||
if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// FragmentQueue implements heap.Interface and holds Items.
|
||||
type FragmentQueue []*highlight.Fragment
|
||||
|
||||
|
|
|
@ -10,32 +10,61 @@
|
|||
package highlight
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
type TermLocation struct {
|
||||
Term string
|
||||
Pos int
|
||||
Start int
|
||||
End int
|
||||
Term string
|
||||
ArrayPositions []float64
|
||||
Pos int
|
||||
Start int
|
||||
End int
|
||||
}
|
||||
|
||||
func (tl *TermLocation) Overlaps(other *TermLocation) bool {
|
||||
if other.Start >= tl.Start && other.Start < tl.End {
|
||||
return true
|
||||
} else if tl.Start >= other.Start && tl.Start < other.End {
|
||||
return true
|
||||
if reflect.DeepEqual(tl.ArrayPositions, other.ArrayPositions) {
|
||||
if other.Start >= tl.Start && other.Start < tl.End {
|
||||
return true
|
||||
} else if tl.Start >= other.Start && tl.Start < other.End {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type TermLocations []*TermLocation
|
||||
|
||||
func (t TermLocations) Len() int { return len(t) }
|
||||
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t TermLocations) Less(i, j int) bool { return t[i].Start < t[j].Start }
|
||||
func (t TermLocations) Len() int { return len(t) }
|
||||
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t TermLocations) Less(i, j int) bool {
|
||||
|
||||
shortestArrayPositions := len(t[i].ArrayPositions)
|
||||
if len(t[j].ArrayPositions) < shortestArrayPositions {
|
||||
shortestArrayPositions = len(t[j].ArrayPositions)
|
||||
}
|
||||
|
||||
// compare all the common array positions
|
||||
for api := 0; api < shortestArrayPositions; api++ {
|
||||
if t[i].ArrayPositions[api] < t[j].ArrayPositions[api] {
|
||||
return true
|
||||
}
|
||||
if t[i].ArrayPositions[api] > t[j].ArrayPositions[api] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// all the common array positions are the same
|
||||
if len(t[i].ArrayPositions) < len(t[j].ArrayPositions) {
|
||||
return true // j array positions, longer so greather
|
||||
} else if len(t[i].ArrayPositions) > len(t[j].ArrayPositions) {
|
||||
return false // j array positions, shorter so less
|
||||
}
|
||||
|
||||
// array positions the same, compare starts
|
||||
return t[i].Start < t[j].Start
|
||||
}
|
||||
|
||||
func (t TermLocations) MergeOverlapping() {
|
||||
var lastTl *TermLocation
|
||||
|
@ -57,10 +86,11 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
|
|||
for term, locations := range tlm {
|
||||
for _, location := range locations {
|
||||
tl := TermLocation{
|
||||
Term: term,
|
||||
Pos: int(location.Pos),
|
||||
Start: int(location.Start),
|
||||
End: int(location.End),
|
||||
Term: term,
|
||||
ArrayPositions: location.ArrayPositions,
|
||||
Pos: int(location.Pos),
|
||||
Start: int(location.Start),
|
||||
End: int(location.End),
|
||||
}
|
||||
rv = append(rv, &tl)
|
||||
}
|
||||
|
|
|
@ -3,6 +3,8 @@ package highlight
|
|||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
func TestTermLocationOverlaps(t *testing.T) {
|
||||
|
@ -45,6 +47,59 @@ func TestTermLocationOverlaps(t *testing.T) {
|
|||
},
|
||||
expected: false,
|
||||
},
|
||||
// with array positions
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: []float64{1},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: []float64{1},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -162,6 +217,107 @@ func TestTermLocationsMergeOverlapping(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
// with array positions
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{1},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{1},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 11,
|
||||
},
|
||||
nil,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{1},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{1},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -171,3 +327,172 @@ func TestTermLocationsMergeOverlapping(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTermLocationsOrder(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
input search.TermLocationMap
|
||||
output TermLocations
|
||||
}{
|
||||
{
|
||||
input: search.TermLocationMap{},
|
||||
output: TermLocations{},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": search.Locations{
|
||||
&search.Location{
|
||||
Start: 0,
|
||||
},
|
||||
&search.Location{
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": search.Locations{
|
||||
&search.Location{
|
||||
Start: 5,
|
||||
},
|
||||
&search.Location{
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
// with array positions
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": search.Locations{
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
},
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": search.Locations{
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 5,
|
||||
},
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": search.Locations{
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 5,
|
||||
},
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{1},
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{1},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": search.Locations{
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{0},
|
||||
Start: 5,
|
||||
},
|
||||
&search.Location{
|
||||
ArrayPositions: []float64{0, 1},
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: []float64{0, 1},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
actual := OrderTermLocations(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected: %#v got %#v", test.output, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -426,5 +426,56 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"comment": "highlight results including non-matching field (which should be produced in its entirety, though unhighlighted)",
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "name",
|
||||
"match": "long"
|
||||
},
|
||||
"highlight": {
|
||||
"fields": ["name", "title"]
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "b",
|
||||
"fragments": {
|
||||
"name": ["steve has a <mark>long</mark> name"],
|
||||
"title": ["missess"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"comment": "search and highlight an array field",
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "tags",
|
||||
"match": "gopher"
|
||||
},
|
||||
"highlight": {
|
||||
"fields": ["tags"]
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a",
|
||||
"fragments": {
|
||||
"tags": ["<mark>gopher</mark>"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue