From 8096d9fb9055e838652956c2d1de7d498e929342 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Thu, 9 Feb 2017 20:02:12 -0500 Subject: [PATCH] remove use of float64 to represent int things this originated from a misunderstanding of mine going back several years. the values need not be float64 just because we plan to serialize them as json. there are still larger questions about what the right type should be, and where should any conversions go. but, this commit simply attempts to address the most egregious problems --- index_test.go | 4 +- search/highlight/format/ansi/ansi.go | 2 +- search/highlight/format/html/html.go | 2 +- .../simple/fragment_scorer_simple.go | 2 +- .../highlighter/simple/highlighter_simple.go | 2 +- search/highlight/term_locations.go | 14 +--- search/highlight/term_locations_test.go | 78 +++++++++---------- search/scorer/scorer_term.go | 10 +-- search/search.go | 8 +- search/searcher/search_phrase.go | 24 +++--- search/searcher/search_phrase_test.go | 2 +- 11 files changed, 67 insertions(+), 81 deletions(-) diff --git a/index_test.go b/index_test.go index d0ea62e5..9ca46d29 100644 --- a/index_test.go +++ b/index_test.go @@ -1114,7 +1114,7 @@ func TestTermVectorArrayPositions(t *testing.T) { t.Fatalf("expected at least one location array position") } if results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0] != 1 { - t.Fatalf("expected array position 1, got %f", results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0]) + t.Fatalf("expected array position 1, got %d", results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0]) } // repeat search for this document in Messages field @@ -1136,7 +1136,7 @@ func TestTermVectorArrayPositions(t *testing.T) { t.Fatalf("expected at least one location array position") } if results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0] != 2 { - t.Fatalf("expected array position 2, got %f", results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0]) + t.Fatalf("expected array position 2, got %d", results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0]) } err = index.Close() diff --git a/search/highlight/format/ansi/ansi.go b/search/highlight/format/ansi/ansi.go index 2ee6934f..2500409c 100644 --- a/search/highlight/format/ansi/ansi.go +++ b/search/highlight/format/ansi/ansi.go @@ -41,7 +41,7 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h continue } // make sure the array positions match - if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) { + if !termLocation.ArrayPositions.Equals(f.ArrayPositions) { continue } if termLocation.Start < curr { diff --git a/search/highlight/format/html/html.go b/search/highlight/format/html/html.go index 2ff4e9ba..8154e790 100644 --- a/search/highlight/format/html/html.go +++ b/search/highlight/format/html/html.go @@ -44,7 +44,7 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h continue } // make sure the array positions match - if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) { + if !termLocation.ArrayPositions.Equals(f.ArrayPositions) { continue } if termLocation.Start < curr { diff --git a/search/highlight/highlighter/simple/fragment_scorer_simple.go b/search/highlight/highlighter/simple/fragment_scorer_simple.go index 2a6ce68f..3ec4c3d2 100644 --- a/search/highlight/highlighter/simple/fragment_scorer_simple.go +++ b/search/highlight/highlighter/simple/fragment_scorer_simple.go @@ -37,7 +37,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) { OUTER: for _, locations := range s.tlm { for _, location := range locations { - if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { + if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { score += 1.0 // once we find a term in the fragment // don't care about additional matches diff --git a/search/highlight/highlighter/simple/highlighter_simple.go b/search/highlight/highlighter/simple/highlighter_simple.go index 3815c8e1..4849516b 100644 --- a/search/highlight/highlighter/simple/highlighter_simple.go +++ b/search/highlight/highlighter/simple/highlighter_simple.go @@ -87,7 +87,7 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume if ok { termLocationsSameArrayPosition := make(highlight.TermLocations, 0) for _, otl := range orderedTermLocations { - if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) { + if otl.ArrayPositions.Equals(f.ArrayPositions()) { termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl) } } diff --git a/search/highlight/term_locations.go b/search/highlight/term_locations.go index c53c2dba..6d2cb133 100644 --- a/search/highlight/term_locations.go +++ b/search/highlight/term_locations.go @@ -23,7 +23,7 @@ import ( type TermLocation struct { Term string - ArrayPositions []float64 + ArrayPositions search.ArrayPositions Pos int Start int End int @@ -103,15 +103,3 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations { sort.Sort(rv) return rv } - -func SameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool { - if len(fieldArrayPositions) != len(termLocationArrayPositions) { - return false - } - for i := 0; i < len(fieldArrayPositions); i++ { - if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) { - return false - } - } - return true -} diff --git a/search/highlight/term_locations_test.go b/search/highlight/term_locations_test.go index 6f18a598..c45376ea 100644 --- a/search/highlight/term_locations_test.go +++ b/search/highlight/term_locations_test.go @@ -64,12 +64,12 @@ func TestTermLocationOverlaps(t *testing.T) { // with array positions { left: &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, right: &TermLocation{ - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Start: 7, End: 11, }, @@ -77,12 +77,12 @@ func TestTermLocationOverlaps(t *testing.T) { }, { left: &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, right: &TermLocation{ - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Start: 3, End: 11, }, @@ -90,12 +90,12 @@ func TestTermLocationOverlaps(t *testing.T) { }, { left: &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, right: &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 3, End: 11, }, @@ -103,12 +103,12 @@ func TestTermLocationOverlaps(t *testing.T) { }, { left: &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, right: &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 7, End: 11, }, @@ -235,24 +235,24 @@ func TestTermLocationsMergeOverlapping(t *testing.T) { { input: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, &TermLocation{ - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Start: 7, End: 11, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, &TermLocation{ - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Start: 7, End: 11, }, @@ -261,24 +261,24 @@ func TestTermLocationsMergeOverlapping(t *testing.T) { { input: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 7, End: 11, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 7, End: 11, }, @@ -287,19 +287,19 @@ func TestTermLocationsMergeOverlapping(t *testing.T) { { input: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 3, End: 11, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 11, }, @@ -309,24 +309,24 @@ func TestTermLocationsMergeOverlapping(t *testing.T) { { input: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, &TermLocation{ - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Start: 3, End: 11, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, End: 5, }, &TermLocation{ - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Start: 3, End: 11, }, @@ -401,23 +401,23 @@ func TestTermLocationsOrder(t *testing.T) { input: search.TermLocationMap{ "term": []*search.Location{ { - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, }, { - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 5, }, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Term: "term", Start: 0, }, &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Term: "term", Start: 5, }, @@ -427,23 +427,23 @@ func TestTermLocationsOrder(t *testing.T) { input: search.TermLocationMap{ "term": []*search.Location{ { - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 5, }, { - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 0, }, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Term: "term", Start: 0, }, &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Term: "term", Start: 5, }, @@ -453,23 +453,23 @@ func TestTermLocationsOrder(t *testing.T) { input: search.TermLocationMap{ "term": []*search.Location{ { - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 5, }, { - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Start: 0, }, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Term: "term", Start: 5, }, &TermLocation{ - ArrayPositions: []float64{1}, + ArrayPositions: search.ArrayPositions{1}, Term: "term", Start: 0, }, @@ -479,23 +479,23 @@ func TestTermLocationsOrder(t *testing.T) { input: search.TermLocationMap{ "term": []*search.Location{ { - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Start: 5, }, { - ArrayPositions: []float64{0, 1}, + ArrayPositions: search.ArrayPositions{0, 1}, Start: 0, }, }, }, output: TermLocations{ &TermLocation{ - ArrayPositions: []float64{0}, + ArrayPositions: search.ArrayPositions{0}, Term: "term", Start: 5, }, &TermLocation{ - ArrayPositions: []float64{0, 1}, + ArrayPositions: search.ArrayPositions{0, 1}, Term: "term", Start: 0, }, diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go index 81e472fb..ca964892 100644 --- a/search/scorer/scorer_term.go +++ b/search/scorer/scorer_term.go @@ -148,7 +148,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term for _, v := range termMatch.Vectors { totalPositions += len(v.ArrayPositions) } - positions := make([]float64, totalPositions) + positions := make(search.ArrayPositions, totalPositions) positionsUsed := 0 rv.Locations = make(search.FieldTermLocationMap) @@ -162,14 +162,14 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term loc := &locs[locsUsed] locsUsed++ - loc.Pos = float64(v.Pos) - loc.Start = float64(v.Start) - loc.End = float64(v.End) + loc.Pos = v.Pos + loc.Start = v.Start + loc.End = v.End if len(v.ArrayPositions) > 0 { loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)] for i, ap := range v.ArrayPositions { - loc.ArrayPositions[i] = float64(ap) + loc.ArrayPositions[i] = ap } positionsUsed += len(v.ArrayPositions) } diff --git a/search/search.go b/search/search.go index 9c49554b..c6f2109d 100644 --- a/search/search.go +++ b/search/search.go @@ -21,7 +21,7 @@ import ( "github.com/blevesearch/bleve/index" ) -type ArrayPositions []float64 +type ArrayPositions []uint64 func (ap ArrayPositions) Equals(other ArrayPositions) bool { if len(ap) != len(other) { @@ -36,9 +36,9 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool { } type Location struct { - Pos float64 `json:"pos"` - Start float64 `json:"start"` - End float64 `json:"end"` + Pos uint64 `json:"pos"` + Start uint64 `json:"start"` + End uint64 `json:"end"` ArrayPositions ArrayPositions `json:"array_positions"` } diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index e6728723..baf640f4 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -185,7 +185,7 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) { // this is the primary state being built during the traversal // // returns slice of paths, or nil if invocation did not find any successul paths -func findPhrasePaths(prevPos float64, ap search.ArrayPositions, phraseTerms []string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath { +func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms []string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath { // no more terms if len(phraseTerms) < 1 { @@ -197,10 +197,10 @@ func findPhrasePaths(prevPos float64, ap search.ArrayPositions, phraseTerms []st // empty term is treated as match (continue) if car == "" { - nextPos := prevPos + 1.0 - if prevPos == 0.0 { - // if prevPos was 0.0, don't set it to 1 (as thats not a real abs pos) - nextPos = 0.0 // don't advance nextPos if prevPos was 0 + nextPos := prevPos + 1 + if prevPos == 0 { + // if prevPos was 0, don't set it to 1 (as thats not a real abs pos) + nextPos = 0 // don't advance nextPos if prevPos was 0 } return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop) } @@ -209,19 +209,19 @@ func findPhrasePaths(prevPos float64, ap search.ArrayPositions, phraseTerms []st locations := tlm[car] var rv []phrasePath for _, loc := range locations { - if prevPos != 0.0 && !loc.ArrayPositions.Equals(ap) { + if prevPos != 0 && !loc.ArrayPositions.Equals(ap) { // if the array positions are wrong, can't match, try next location continue } // compute distance from previous phrase term dist := 0 - if prevPos != 0.0 { - dist = editDistance(prevPos+1.0, loc.Pos) + if prevPos != 0 { + dist = editDistance(prevPos+1, loc.Pos) } // if enough slop reamining, continue recursively - if prevPos == 0.0 || (remainingSlop-dist) >= 0 { + if prevPos == 0 || (remainingSlop-dist) >= 0 { // this location works, add it to the path (but not for empty term) px := append(p, &phrasePart{term: car, loc: loc}) rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...) @@ -230,10 +230,8 @@ func findPhrasePaths(prevPos float64, ap search.ArrayPositions, phraseTerms []st return rv } -func editDistance(p1, p2 float64) int { - i1 := int(p1) - i2 := int(p2) - dist := i1 - i2 +func editDistance(p1, p2 uint64) int { + dist := int(p1 - p2) if dist < 0 { return -dist } diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go index ea4e7b88..d6741178 100644 --- a/search/searcher/search_phrase_test.go +++ b/search/searcher/search_phrase_test.go @@ -68,7 +68,7 @@ func TestPhraseSearch(t *testing.T) { Score: 1.0807601687084403, }, }, - locations: map[string]map[string][]search.Location{"desc": map[string][]search.Location{"beer": []search.Location{search.Location{Pos: 2, Start: 6, End: 10, ArrayPositions: []float64(nil)}}, "angst": []search.Location{search.Location{Pos: 1, Start: 0, End: 5, ArrayPositions: []float64(nil)}}}}, + locations: map[string]map[string][]search.Location{"desc": map[string][]search.Location{"beer": []search.Location{search.Location{Pos: 2, Start: 6, End: 10}}, "angst": []search.Location{search.Location{Pos: 1, Start: 0, End: 5}}}}, fieldterms: [][2]string{[2]string{"desc", "beer"}, [2]string{"desc", "angst"}}, }, }