0
0
Fork 0

fix highlighting to work on fields containing arrays

fixes #170
This commit is contained in:
Marty Schoch 2015-07-31 14:43:12 -04:00
parent 3682c25467
commit f35e2e42df
7 changed files with 451 additions and 23 deletions

View File

@ -69,7 +69,7 @@ func (t *TextField) Value() []byte {
}
func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s}", t.name, t.options, t.analyzer, t.value)
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {

View File

@ -15,11 +15,12 @@ import (
)
type Fragment struct {
Orig []byte
Start int
End int
Score float64
Index int // used by heap
Orig []byte
ArrayPositions []uint64
Start int
End int
Score float64
Index int // used by heap
}
func (f *Fragment) Overlaps(other *Fragment) bool {

View File

@ -32,7 +32,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) {
OUTER:
for _, locations := range s.tlm {
for _, location := range locations {
if int(location.Start) >= f.Start && int(location.End) <= f.End {
if sameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0
// once we find a term in the fragment
// don't care about additional matches

View File

@ -80,9 +80,18 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
if f.Name() == field {
_, ok := f.(*document.TextField)
if ok {
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
for _, otl := range orderedTermLocations {
if sameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
}
}
fieldData := f.Value()
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
for _, fragment := range fragments {
fragment.ArrayPositions = f.ArrayPositions()
scorer.Score(fragment)
heap.Push(&fq, fragment)
}
@ -143,6 +152,18 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
return formattedFragments
}
func sameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool {
if len(fieldArrayPositions) != len(termLocationArrayPositions) {
return false
}
for i := 0; i < len(fieldArrayPositions); i++ {
if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) {
return false
}
}
return true
}
// FragmentQueue implements heap.Interface and holds Items.
type FragmentQueue []*highlight.Fragment

View File

@ -10,32 +10,61 @@
package highlight
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/search"
)
type TermLocation struct {
Term string
Pos int
Start int
End int
Term string
ArrayPositions []float64
Pos int
Start int
End int
}
func (tl *TermLocation) Overlaps(other *TermLocation) bool {
if other.Start >= tl.Start && other.Start < tl.End {
return true
} else if tl.Start >= other.Start && tl.Start < other.End {
return true
if reflect.DeepEqual(tl.ArrayPositions, other.ArrayPositions) {
if other.Start >= tl.Start && other.Start < tl.End {
return true
} else if tl.Start >= other.Start && tl.Start < other.End {
return true
}
}
return false
}
type TermLocations []*TermLocation
func (t TermLocations) Len() int { return len(t) }
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TermLocations) Less(i, j int) bool { return t[i].Start < t[j].Start }
func (t TermLocations) Len() int { return len(t) }
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TermLocations) Less(i, j int) bool {
shortestArrayPositions := len(t[i].ArrayPositions)
if len(t[j].ArrayPositions) < shortestArrayPositions {
shortestArrayPositions = len(t[j].ArrayPositions)
}
// compare all the common array positions
for api := 0; api < shortestArrayPositions; api++ {
if t[i].ArrayPositions[api] < t[j].ArrayPositions[api] {
return true
}
if t[i].ArrayPositions[api] > t[j].ArrayPositions[api] {
return false
}
}
// all the common array positions are the same
if len(t[i].ArrayPositions) < len(t[j].ArrayPositions) {
return true // j array positions, longer so greather
} else if len(t[i].ArrayPositions) > len(t[j].ArrayPositions) {
return false // j array positions, shorter so less
}
// array positions the same, compare starts
return t[i].Start < t[j].Start
}
func (t TermLocations) MergeOverlapping() {
var lastTl *TermLocation
@ -57,10 +86,11 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
for term, locations := range tlm {
for _, location := range locations {
tl := TermLocation{
Term: term,
Pos: int(location.Pos),
Start: int(location.Start),
End: int(location.End),
Term: term,
ArrayPositions: location.ArrayPositions,
Pos: int(location.Pos),
Start: int(location.Start),
End: int(location.End),
}
rv = append(rv, &tl)
}

View File

@ -3,6 +3,8 @@ package highlight
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/search"
)
func TestTermLocationOverlaps(t *testing.T) {
@ -45,6 +47,59 @@ func TestTermLocationOverlaps(t *testing.T) {
},
expected: false,
},
// with array positions
{
left: &TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: []float64{1},
Start: 7,
End: 11,
},
expected: false,
},
{
left: &TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: []float64{1},
Start: 3,
End: 11,
},
expected: false,
},
{
left: &TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: []float64{0},
Start: 3,
End: 11,
},
expected: true,
},
{
left: &TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: []float64{0},
Start: 7,
End: 11,
},
expected: false,
},
}
for _, test := range tests {
@ -162,6 +217,107 @@ func TestTermLocationsMergeOverlapping(t *testing.T) {
},
},
},
// with array positions
{
input: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: []float64{1},
Start: 7,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: []float64{1},
Start: 7,
End: 11,
},
},
},
{
input: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: []float64{0},
Start: 7,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: []float64{0},
Start: 7,
End: 11,
},
},
},
{
input: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: []float64{0},
Start: 3,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 11,
},
nil,
},
},
{
input: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: []float64{1},
Start: 3,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: []float64{1},
Start: 3,
End: 11,
},
},
},
}
for _, test := range tests {
@ -171,3 +327,172 @@ func TestTermLocationsMergeOverlapping(t *testing.T) {
}
}
}
func TestTermLocationsOrder(t *testing.T) {
tests := []struct {
input search.TermLocationMap
output TermLocations
}{
{
input: search.TermLocationMap{},
output: TermLocations{},
},
{
input: search.TermLocationMap{
"term": search.Locations{
&search.Location{
Start: 0,
},
&search.Location{
Start: 5,
},
},
},
output: TermLocations{
&TermLocation{
Term: "term",
Start: 0,
},
&TermLocation{
Term: "term",
Start: 5,
},
},
},
{
input: search.TermLocationMap{
"term": search.Locations{
&search.Location{
Start: 5,
},
&search.Location{
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
Term: "term",
Start: 0,
},
&TermLocation{
Term: "term",
Start: 5,
},
},
},
// with array positions
{
input: search.TermLocationMap{
"term": search.Locations{
&search.Location{
ArrayPositions: []float64{0},
Start: 0,
},
&search.Location{
ArrayPositions: []float64{0},
Start: 5,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Term: "term",
Start: 0,
},
&TermLocation{
ArrayPositions: []float64{0},
Term: "term",
Start: 5,
},
},
},
{
input: search.TermLocationMap{
"term": search.Locations{
&search.Location{
ArrayPositions: []float64{0},
Start: 5,
},
&search.Location{
ArrayPositions: []float64{0},
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Term: "term",
Start: 0,
},
&TermLocation{
ArrayPositions: []float64{0},
Term: "term",
Start: 5,
},
},
},
{
input: search.TermLocationMap{
"term": search.Locations{
&search.Location{
ArrayPositions: []float64{0},
Start: 5,
},
&search.Location{
ArrayPositions: []float64{1},
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Term: "term",
Start: 5,
},
&TermLocation{
ArrayPositions: []float64{1},
Term: "term",
Start: 0,
},
},
},
{
input: search.TermLocationMap{
"term": search.Locations{
&search.Location{
ArrayPositions: []float64{0},
Start: 5,
},
&search.Location{
ArrayPositions: []float64{0, 1},
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: []float64{0},
Term: "term",
Start: 5,
},
&TermLocation{
ArrayPositions: []float64{0, 1},
Term: "term",
Start: 0,
},
},
},
}
for _, test := range tests {
actual := OrderTermLocations(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected: %#v got %#v", test.output, actual)
}
}
}

View File

@ -426,5 +426,56 @@
}
]
}
},
{
"comment": "highlight results including non-matching field (which should be produced in its entirety, though unhighlighted)",
"search": {
"from": 0,
"size": 10,
"query": {
"field": "name",
"match": "long"
},
"highlight": {
"fields": ["name", "title"]
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b",
"fragments": {
"name": ["steve has a <mark>long</mark> name"],
"title": ["missess"]
}
}
]
}
},
{
"comment": "search and highlight an array field",
"search": {
"from": 0,
"size": 10,
"query": {
"field": "tags",
"match": "gopher"
},
"highlight": {
"fields": ["tags"]
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a",
"fragments": {
"tags": ["<mark>gopher</mark>"]
}
}
]
}
}
]