From d87b4f88bff6df10f6cfdf78ea884b580f811581 Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Tue, 22 Dec 2015 16:01:10 +0100 Subject: [PATCH 1/4] Refactor phrase searching Reduce nesting by using early continues. --- search/searcher/search_phrase.go | 57 ++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index cab87ba7..67d287c7 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -104,37 +104,38 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, for field, termLocMap := range s.currMust.Locations { rvtlm := make(search.TermLocationMap, 0) locations, ok := termLocMap[firstTerm] - if ok { - OUTER: - for _, location := range locations { - crvtlm := make(search.TermLocationMap, 0) - INNER: - for i := 0; i < len(s.terms); i++ { - nextTerm := s.terms[i] - if nextTerm != "" { - // look through all these term locations - // to try and find the correct offsets - nextLocations, ok := termLocMap[nextTerm] - if ok { - for _, nextLocation := range nextLocations { - if nextLocation.Pos == location.Pos+float64(i) && nextLocation.SameArrayElement(location) { - // found a location match for this term - crvtlm.AddLocation(nextTerm, nextLocation) - continue INNER - } - } - // if we got here we didn't find a location match for this term - continue OUTER - } else { - continue OUTER - } + if !ok { + continue + } + OUTER: + for _, location := range locations { + crvtlm := make(search.TermLocationMap, 0) + INNER: + for i := 0; i < len(s.terms); i++ { + nextTerm := s.terms[i] + if nextTerm == "" { + continue + } + // look through all these term locations + // to try and find the correct offsets + nextLocations, ok := termLocMap[nextTerm] + if !ok { + continue OUTER + } + for _, nextLocation := range nextLocations { + if nextLocation.Pos == location.Pos+float64(i) && nextLocation.SameArrayElement(location) { + // found a location match for this term + crvtlm.AddLocation(nextTerm, nextLocation) + continue INNER } } - // if we got here all the terms matched - freq++ - search.MergeTermLocationMaps(rvtlm, crvtlm) - rvftlm[field] = rvtlm + // if we got here we didn't find a location match for this term + continue OUTER } + // if we got here all the terms matched + freq++ + search.MergeTermLocationMaps(rvtlm, crvtlm) + rvftlm[field] = rvtlm } } From 3dd363afaad27a46e5fee8cbaac3fbd64956025a Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Wed, 26 Oct 2016 15:03:24 +0200 Subject: [PATCH 2/4] Don't search the same term twice We have searched for the first term in the phrase query already so we can skip it. Before doing so we have to add the location of the first term. --- search/searcher/search_phrase.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index 67d287c7..fe7e20e4 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -110,8 +110,9 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, OUTER: for _, location := range locations { crvtlm := make(search.TermLocationMap, 0) + crvtlm.AddLocation(firstTerm, location) INNER: - for i := 0; i < len(s.terms); i++ { + for i := 1; i < len(s.terms); i++ { nextTerm := s.terms[i] if nextTerm == "" { continue From 33e2432fc6be892017fa6a295beadd4593fa2b0b Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Wed, 26 Oct 2016 15:20:40 +0200 Subject: [PATCH 3/4] Initialize the return value as late as possible --- search/searcher/search_phrase.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index fe7e20e4..3f47a7c0 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -102,11 +102,13 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, freq := 0 firstTerm := s.terms[0] for field, termLocMap := range s.currMust.Locations { - rvtlm := make(search.TermLocationMap, 0) locations, ok := termLocMap[firstTerm] if !ok { continue } + + rvtlm := make(search.TermLocationMap, 0) + OUTER: for _, location := range locations { crvtlm := make(search.TermLocationMap, 0) From 1a6a4c493b6b49466090c4d800095b5234b694e3 Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Thu, 27 Oct 2016 11:25:03 +0200 Subject: [PATCH 4/4] Check locations in the phrase searcher as well --- search/searcher/search_phrase_test.go | 28 ++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go index c42d6c9a..91fe84f3 100644 --- a/search/searcher/search_phrase_test.go +++ b/search/searcher/search_phrase_test.go @@ -15,6 +15,7 @@ package searcher import ( + "reflect" "testing" "github.com/blevesearch/bleve/index" @@ -52,8 +53,10 @@ func TestPhraseSearch(t *testing.T) { } tests := []struct { - searcher search.Searcher - results []*search.DocumentMatch + searcher search.Searcher + results []*search.DocumentMatch + locations map[string]map[string][]search.Location + fieldterms [][2]string }{ { searcher: phraseSearcher, @@ -63,6 +66,8 @@ func TestPhraseSearch(t *testing.T) { Score: 1.0807601687084403, }, }, + locations: map[string]map[string][]search.Location{"desc": map[string][]search.Location{"beer": []search.Location{search.Location{Pos: 2, Start: 6, End: 10, ArrayPositions: []float64(nil)}}, "angst": []search.Location{search.Location{Pos: 1, Start: 0, End: 5, ArrayPositions: []float64(nil)}}}}, + fieldterms: [][2]string{[2]string{"desc", "beer"}, [2]string{"desc", "angst"}}, }, } @@ -82,13 +87,26 @@ func TestPhraseSearch(t *testing.T) { for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { - t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) + t.Errorf("expected result %d to have id %s got %s for test %d\n", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if next.Score != test.results[i].Score { - t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) - t.Logf("scoring explanation: %s", next.Expl) + t.Errorf("expected result %d to have score %v got %v for test %d\n", i, test.results[i].Score, next.Score, testIndex) + t.Logf("scoring explanation: %s\n", next.Expl) + } + for _, ft := range test.fieldterms { + locs := next.Locations[ft[0]][ft[1]] + explocs := test.locations[ft[0]][ft[1]] + if len(explocs) != len(locs) { + t.Fatalf("expected result %d to have %d Locations (%#v) but got %d (%#v) for test %d with field %q and term %q\n", i, len(explocs), explocs, len(locs), locs, testIndex, ft[0], ft[1]) + } + for ind, exploc := range explocs { + if !reflect.DeepEqual(*locs[ind], exploc) { + t.Errorf("expected result %d to have Location %v got %v for test %d\n", i, exploc, locs[ind], testIndex) + } + } } } + ctx.DocumentMatchPool.Put(next) next, err = test.searcher.Next(ctx) i++