From a16efa5e78bf07508cbc640790a34d0d5c521b91 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Fri, 24 Mar 2017 17:06:14 -0700 Subject: [PATCH] add experimental support for indexing/query geo points New field type GeoPointField, or "geopoint" in mapping JSON. Currently structs and maps are considered when a mapping explicitly marks a field as type "geopoint". Several variants of "lon", "lng", and "lat" are looked for in map keys, struct field names, or method names. New query type GeoBoundingBoxQuery searches for documents which have a GeoPointField indexed with a value that is inside the specified bounding box. New query type GeoDistanceQuery searches for documents which have a GeoPointField indexed with a value that is less than or equal to the specified distance from the specified location. New sort by method "geo_distance". Hits can be sorted by their distance from the specified location. New geo utility package with all routines ported from Lucene. New FilteringSearcher, which wraps an existing Searcher, but filters all hits with a user-provided callback. --- document/field_geopoint.go | 137 +++++++++++ document/field_geopoint_test.go | 14 ++ geo/geo.go | 158 ++++++++++++ geo/geo_dist.go | 59 +++++ geo/geo_dist_test.go | 81 +++++++ geo/geo_test.go | 81 +++++++ geo/parse.go | 93 ++++++++ geo/sloppy.go | 185 +++++++++++++++ geo/sloppy_test.go | 73 ++++++ mapping.go | 4 + mapping/document.go | 18 +- mapping/field.go | 25 ++ mapping/mapping_test.go | 55 +++++ numeric/bin.go | 43 ++++ numeric/bin_test.go | 27 +++ search/query/geo_boundingbox.go | 93 ++++++++ search/query/geo_distance.go | 75 ++++++ search/query/query.go | 19 ++ search/searcher/search_filter.go | 88 +++++++ search/searcher/search_geoboundingbox.go | 224 ++++++++++++++++++ search/searcher/search_geoboundingbox_test.go | 156 ++++++++++++ search/searcher/search_geopointdistance.go | 139 +++++++++++ search/sort.go | 115 ++++++++- 23 files changed, 1960 insertions(+), 2 deletions(-) create mode 100644 document/field_geopoint.go create mode 100644 document/field_geopoint_test.go create mode 100644 geo/geo.go create mode 100644 geo/geo_dist.go create mode 100644 geo/geo_dist_test.go create mode 100644 geo/geo_test.go create mode 100644 geo/parse.go create mode 100644 geo/sloppy.go create mode 100644 geo/sloppy_test.go create mode 100644 numeric/bin.go create mode 100644 numeric/bin_test.go create mode 100644 search/query/geo_boundingbox.go create mode 100644 search/query/geo_distance.go create mode 100644 search/searcher/search_filter.go create mode 100644 search/searcher/search_geoboundingbox.go create mode 100644 search/searcher/search_geoboundingbox_test.go create mode 100644 search/searcher/search_geopointdistance.go diff --git a/document/field_geopoint.go b/document/field_geopoint.go new file mode 100644 index 00000000..f508b362 --- /dev/null +++ b/document/field_geopoint.go @@ -0,0 +1,137 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package document + +import ( + "fmt" + + "github.com/blevesearch/bleve/analysis" + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/numeric" +) + +var GeoPrecisionStep uint = 9 + +type GeoPointField struct { + name string + arrayPositions []uint64 + options IndexingOptions + value numeric.PrefixCoded + numPlainTextBytes uint64 +} + +func (n *GeoPointField) Name() string { + return n.name +} + +func (n *GeoPointField) ArrayPositions() []uint64 { + return n.arrayPositions +} + +func (n *GeoPointField) Options() IndexingOptions { + return n.options +} + +func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) { + tokens := make(analysis.TokenStream, 0) + tokens = append(tokens, &analysis.Token{ + Start: 0, + End: len(n.value), + Term: n.value, + Position: 1, + Type: analysis.Numeric, + }) + + original, err := n.value.Int64() + if err == nil { + + shift := GeoPrecisionStep + for shift < 64 { + shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) + if err != nil { + break + } + token := analysis.Token{ + Start: 0, + End: len(shiftEncoded), + Term: shiftEncoded, + Position: 1, + Type: analysis.Numeric, + } + tokens = append(tokens, &token) + shift += GeoPrecisionStep + } + } + + fieldLength := len(tokens) + tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) + return fieldLength, tokenFreqs +} + +func (n *GeoPointField) Value() []byte { + return n.value +} + +func (n *GeoPointField) Lon() (float64, error) { + i64, err := n.value.Int64() + if err != nil { + return 0.0, err + } + return geo.MortonUnhashLon(uint64(i64)), nil +} + +func (n *GeoPointField) Lat() (float64, error) { + i64, err := n.value.Int64() + if err != nil { + return 0.0, err + } + return geo.MortonUnhashLat(uint64(i64)), nil +} + +func (n *GeoPointField) GoString() string { + return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) +} + +func (n *GeoPointField) NumPlainTextBytes() uint64 { + return n.numPlainTextBytes +} + +func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField { + return &GeoPointField{ + name: name, + arrayPositions: arrayPositions, + value: value, + options: DefaultNumericIndexingOptions, + numPlainTextBytes: uint64(len(value)), + } +} + +func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField { + return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions) +} + +func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField { + mhash := geo.MortonHash(lon, lat) + prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0) + return &GeoPointField{ + name: name, + arrayPositions: arrayPositions, + value: prefixCoded, + options: options, + // not correct, just a place holder until we revisit how fields are + // represented and can fix this better + numPlainTextBytes: uint64(8), + } +} diff --git a/document/field_geopoint_test.go b/document/field_geopoint_test.go new file mode 100644 index 00000000..d705ac70 --- /dev/null +++ b/document/field_geopoint_test.go @@ -0,0 +1,14 @@ +package document + +import "testing" + +func TestGeoPointField(t *testing.T) { + gf := NewGeoPointField("loc", []uint64{}, 0.0015, 0.0015) + numTokens, tokenFreqs := gf.Analyze() + if numTokens != 8 { + t.Errorf("expected 8 tokens, got %d", numTokens) + } + if len(tokenFreqs) != 8 { + t.Errorf("expected 8 token freqs") + } +} diff --git a/geo/geo.go b/geo/geo.go new file mode 100644 index 00000000..37ed819e --- /dev/null +++ b/geo/geo.go @@ -0,0 +1,158 @@ +package geo + +import ( + "math" + + "github.com/blevesearch/bleve/numeric" +) + +var minLon = -180.0 +var minLat = -90.0 +var GeoBits uint = 32 +var geoTolerance = 1E-6 +var lonScale = float64((uint64(0x1)<> 1)) +} + +func unscaleLon(lon uint64) float64 { + return (float64(lon) / lonScale) + minLon +} + +func unscaleLat(lat uint64) float64 { + return (float64(lat) / latScale) + minLat +} + +func compareGeo(a, b float64) float64 { + compare := a - b + if math.Abs(compare) <= geoTolerance { + return 0 + } + return compare +} + +func RectIntersects(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool { + return !(aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY) +} + +func RectWithin(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool { + rv := !(aMinX < bMinX || aMinY < bMinY || aMaxX > bMaxX || aMaxY > bMaxY) + return rv +} + +func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool { + return compareGeo(lon, minLon) >= 0 && compareGeo(lon, maxLon) <= 0 && compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0 +} + +func ComputeBoundingBox(centerLon, centerLat, radius float64) (upperLeftLon float64, upperLeftLat float64, lowerRightLon float64, lowerRightLat float64) { + _, tlat := pointFromLonLatBearing(centerLon, centerLat, 0, radius) + rlon, _ := pointFromLonLatBearing(centerLon, centerLat, 90, radius) + _, blat := pointFromLonLatBearing(centerLon, centerLat, 180, radius) + llon, _ := pointFromLonLatBearing(centerLon, centerLat, 270, radius) + return normalizeLon(llon), normalizeLat(tlat), normalizeLon(rlon), normalizeLat(blat) +} + +const degreesToRadian = math.Pi / 180 +const radiansToDegrees = 180 / math.Pi +const flattening = 1.0 / 298.257223563 +const semiMajorAxis = 6378137 +const semiMinorAxis = semiMajorAxis * (1.0 - flattening) +const semiMajorAxis2 = semiMajorAxis * semiMajorAxis +const semiMinorAxis2 = semiMinorAxis * semiMinorAxis + +func DegreesToRadians(d float64) float64 { + return d * degreesToRadian +} + +func RadiansToDegrees(r float64) float64 { + return r * radiansToDegrees +} + +func pointFromLonLatBearing(lon, lat, bearing, dist float64) (float64, float64) { + + alpha1 := DegreesToRadians(bearing) + cosA1 := math.Cos(alpha1) + sinA1 := math.Sin(alpha1) + tanU1 := (1 - flattening) * math.Tan(DegreesToRadians(lat)) + cosU1 := 1 / math.Sqrt(1+tanU1*tanU1) + sinU1 := tanU1 * cosU1 + sig1 := math.Atan2(tanU1, cosA1) + sinAlpha := cosU1 * sinA1 + cosSqAlpha := 1 - sinAlpha*sinAlpha + uSq := cosSqAlpha * (semiMajorAxis2 - semiMinorAxis2) / semiMinorAxis2 + A := 1 + uSq/16384*(4096+uSq*(-768+uSq*(320-175*uSq))) + B := uSq / 1024 * (256 + uSq*(-128+uSq*(74-47*uSq))) + + sigma := dist / (semiMinorAxis * A) + + cos25SigmaM := math.Cos(2*sig1 + sigma) + sinSigma := math.Sin(sigma) + cosSigma := math.Cos(sigma) + deltaSigma := B * sinSigma * (cos25SigmaM + (B/4)*(cosSigma*(-1+2*cos25SigmaM*cos25SigmaM)-(B/6)*cos25SigmaM*(-1+4*sinSigma*sinSigma)*(-3+4*cos25SigmaM*cos25SigmaM))) + sigmaP := sigma + sigma = dist/(semiMinorAxis*A) + deltaSigma + for math.Abs(sigma-sigmaP) > 1E-12 { + cos25SigmaM = math.Cos(2*sig1 + sigma) + sinSigma = math.Sin(sigma) + cosSigma = math.Cos(sigma) + deltaSigma = B * sinSigma * (cos25SigmaM + (B/4)*(cosSigma*(-1+2*cos25SigmaM*cos25SigmaM)-(B/6)*cos25SigmaM*(-1+4*sinSigma*sinSigma)*(-3+4*cos25SigmaM*cos25SigmaM))) + sigmaP = sigma + sigma = dist/(semiMinorAxis*A) + deltaSigma + } + + tmp := sinU1*sinSigma - cosU1*cosSigma*cosA1 + lat2 := math.Atan2(sinU1*cosSigma+cosU1*sinSigma*cosA1, (1-flattening)*math.Sqrt(sinAlpha*sinAlpha+tmp*tmp)) + lamda := math.Atan2(sinSigma*sinA1, cosU1*cosSigma-sinU1*sinSigma*cosA1) + c := flattening / 16 * cosSqAlpha * (4 + flattening*(4-3*cosSqAlpha)) + lam := lamda - (1-c)*flattening*sinAlpha*(sigma+c*sinSigma*(cos25SigmaM+c*cosSigma*(-1+2*cos25SigmaM*cos25SigmaM))) + + rvlon := lon + RadiansToDegrees(lam) + rvlat := RadiansToDegrees(lat2) + + return rvlon, rvlat +} + +func normalizeLon(lonDeg float64) float64 { + if lonDeg >= -180 && lonDeg <= 180 { + return lonDeg + } + + off := math.Mod(lonDeg+180, 360) + if off < 0 { + return 180 + off + } else if off == 0 && lonDeg > 0 { + return 180 + } + return -180 + off +} + +func normalizeLat(latDeg float64) float64 { + if latDeg >= -90 && latDeg <= 90 { + return latDeg + } + off := math.Abs(math.Mod(latDeg+90, 360)) + if off <= 180 { + return off - 90 + } + return (360 - off) - 90 +} diff --git a/geo/geo_dist.go b/geo/geo_dist.go new file mode 100644 index 00000000..d9da1bde --- /dev/null +++ b/geo/geo_dist.go @@ -0,0 +1,59 @@ +package geo + +import ( + "math" + "strconv" + "strings" +) + +type distanceUnit struct { + conv float64 + suffixes []string +} + +var inch = distanceUnit{0.0254, []string{"in", "inch"}} +var yard = distanceUnit{0.9144, []string{"yd", "yards"}} +var feet = distanceUnit{0.3048, []string{"ft", "feet"}} +var kilom = distanceUnit{1000, []string{"km", "kilometers"}} +var nauticalm = distanceUnit{1852.0, []string{"nm", "nauticalmiles"}} +var millim = distanceUnit{0.001, []string{"mm", "millimeters"}} +var centim = distanceUnit{0.01, []string{"cm", "centimeters"}} +var miles = distanceUnit{1609.344, []string{"mi", "miles"}} +var meters = distanceUnit{1, []string{"m", "meters"}} + +var distanceUnits = []*distanceUnit{ + &inch, &yard, &feet, &kilom, &nauticalm, &millim, ¢im, &miles, &meters, +} + +// ParseDistance attempts to parse a distance, return distance in meters +func ParseDistance(d string) (float64, error) { + for _, unit := range distanceUnits { + for _, unitSuffix := range unit.suffixes { + if strings.HasSuffix(d, unitSuffix) { + parsedNum, err := strconv.ParseFloat(d[0:len(d)-len(unitSuffix)], 64) + if err != nil { + return 0, err + } + return parsedNum * unit.conv, nil + } + } + } + // no unit matched, try assuming meters? + parsedNum, err := strconv.ParseFloat(d, 64) + if err != nil { + return 0, err + } + return parsedNum, nil +} + +func Haversin(lon1, lat1, lon2, lat2 float64) float64 { + x1 := lat1 * degreesToRadian + x2 := lat2 * degreesToRadian + h1 := 1 - cos(x1-x2) + h2 := 1 - cos((lon1-lon2)*degreesToRadian) + h := (h1 + cos(x1)*cos(x2)*h2) / 2 + avgLat := (x1 + x2) / 2 + diameter := earthDiameter(avgLat) + + return diameter * asin(math.Min(1, math.Sqrt(h))) +} diff --git a/geo/geo_dist_test.go b/geo/geo_dist_test.go new file mode 100644 index 00000000..2ed57ac1 --- /dev/null +++ b/geo/geo_dist_test.go @@ -0,0 +1,81 @@ +package geo + +import ( + "math" + "reflect" + "strconv" + "testing" +) + +func TestParseDistance(t *testing.T) { + tests := []struct { + dist string + want float64 + wantErr error + }{ + {"5mi", 5 * 1609.344, nil}, + {"3", 3, nil}, + {"3m", 3, nil}, + {"5km", 5000, nil}, + {"km", 0, &strconv.NumError{Func: "ParseFloat", Num: "", Err: strconv.ErrSyntax}}, + {"", 0, &strconv.NumError{Func: "ParseFloat", Num: "", Err: strconv.ErrSyntax}}, + } + + for _, test := range tests { + got, err := ParseDistance(test.dist) + if !reflect.DeepEqual(err, test.wantErr) { + t.Errorf("expected err: %v, got %v for %s", test.wantErr, err, test.dist) + } + if got != test.want { + t.Errorf("expected distance %f got %f for %s", test.want, got, test.dist) + } + } +} + +func TestHaversinDistance(t *testing.T) { + earthRadiusKMs := 6378.137 + halfCircle := earthRadiusKMs * math.Pi + + tests := []struct { + lon1 float64 + lat1 float64 + lon2 float64 + lat2 float64 + want float64 + }{ + {1, 1, math.NaN(), 1, math.NaN()}, + {1, 1, 1, math.NaN(), math.NaN()}, + {1, math.NaN(), 1, 1, math.NaN()}, + {math.NaN(), 1, 1, 1, math.NaN()}, + + {0, 0, 0, 0, 0}, + {-180, 0, -180, 0, 0}, + {-180, 0, 180, 0, 0}, + {180, 0, 180, 0, 0}, + + {0, 90, 0, 90, 0}, + {-180, 90, -180, 90, 0}, + {-180, 90, 180, 90, 0}, + {180, 90, 180, 90, 0}, + + {0, 0, 180, 0, halfCircle}, + + {-74.0059731, 40.7143528, -74.0059731, 40.7143528, 0}, + {-74.0059731, 40.7143528, -73.9844722, 40.759011, 5.286}, + {-74.0059731, 40.7143528, -74.007819, 40.718266, 0.4621}, + {-74.0059731, 40.7143528, -74.0088305, 40.7051157, 1.055}, + {-74.0059731, 40.7143528, -74, 40.7247222, 1.258}, + {-74.0059731, 40.7143528, -73.9962255, 40.731033, 2.029}, + {-74.0059731, 40.7143528, -73.95, 40.65, 8.572}, + } + + for _, test := range tests { + got := Haversin(test.lon1, test.lat1, test.lon2, test.lat2) + if math.IsNaN(test.want) && !math.IsNaN(got) { + t.Errorf("expected NaN, got %f", got) + } + if !math.IsNaN(test.want) && math.Abs(got-test.want) > 1E-2 { + t.Errorf("expected %f got %f", test.want, got) + } + } +} diff --git a/geo/geo_test.go b/geo/geo_test.go new file mode 100644 index 00000000..2981dedb --- /dev/null +++ b/geo/geo_test.go @@ -0,0 +1,81 @@ +package geo + +import ( + "math" + "testing" +) + +func TestMortonHashMortonUnhash(t *testing.T) { + tests := []struct { + lon float64 + lat float64 + }{ + {-180.0, -90.0}, + {-5, 27.3}, + {0, 0}, + {1.0, 1.0}, + {24.7, -80.4}, + {180.0, 90.0}, + } + + for _, test := range tests { + hash := MortonHash(test.lon, test.lat) + lon := MortonUnhashLon(hash) + lat := MortonUnhashLat(hash) + if compareGeo(test.lon, lon) != 0 { + t.Errorf("expected lon %f, got %f, hash %x", test.lon, lon, hash) + } + if compareGeo(test.lat, lat) != 0 { + t.Errorf("expected lat %f, got %f, hash %x", test.lat, lat, hash) + } + } +} + +func TestScaleLonUnscaleLon(t *testing.T) { + tests := []struct { + lon float64 + }{ + {-180.0}, + {0.0}, + {1.0}, + {180.0}, + } + + for _, test := range tests { + s := scaleLon(test.lon) + lon := unscaleLon(s) + if compareGeo(test.lon, lon) != 0 { + t.Errorf("expected %f, got %f, scaled was %d", test.lon, lon, s) + } + } +} + +func TestScaleLatUnscaleLat(t *testing.T) { + tests := []struct { + lat float64 + }{ + {-90.0}, + {0.0}, + {1.0}, + {90.0}, + } + + for _, test := range tests { + s := scaleLat(test.lat) + lat := unscaleLat(s) + if compareGeo(test.lat, lat) != 0 { + t.Errorf("expected %.16f, got %.16f, scaled was %d", test.lat, lat, s) + } + } +} + +func TestComputeBoundingBoxCheckLatitudeAtEquator(t *testing.T) { + // at the equator 1 degree of latitude is about 110567 meters + _, upperLeftLat, _, lowerRightLat := ComputeBoundingBox(0, 0, 110567) + if math.Abs(upperLeftLat-1) > 1E-4 { + t.Errorf("expected bounding box upper left lat to be almost 1, got %f", upperLeftLat) + } + if math.Abs(lowerRightLat+1) > 1E-4 { + t.Errorf("expected bounding box lower right lat to be almost -1, got %f", lowerRightLat) + } +} diff --git a/geo/parse.go b/geo/parse.go new file mode 100644 index 00000000..d0585abf --- /dev/null +++ b/geo/parse.go @@ -0,0 +1,93 @@ +package geo + +import ( + "reflect" + "strings" +) + +// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to +// interpret it is as geo point +func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { + var foundLon, foundLat bool + // is it a map + if l, ok := thing.(map[string]interface{}); ok { + if lval, ok := l["lon"]; ok { + lon, foundLon = extractNumericVal(lval) + } else if lval, ok := l["lng"]; ok { + lon, foundLon = extractNumericVal(lval) + } + if lval, ok := l["lat"]; ok { + lat, foundLat = extractNumericVal(lval) + } + return lon, lat, foundLon && foundLat + } + + // now try reflection on struct fields + thingVal := reflect.ValueOf(thing) + thingTyp := thingVal.Type() + if thingVal.IsValid() && thingVal.Kind() == reflect.Struct { + for i := 0; i < thingVal.NumField(); i++ { + field := thingTyp.Field(i) + fieldName := field.Name + if strings.HasPrefix(strings.ToLower(fieldName), "lon") { + if thingVal.Field(i).CanInterface() { + fieldVal := thingVal.Field(i).Interface() + lon, foundLon = extractNumericVal(fieldVal) + } + } + if strings.HasPrefix(strings.ToLower(fieldName), "lng") { + if thingVal.Field(i).CanInterface() { + fieldVal := thingVal.Field(i).Interface() + lon, foundLon = extractNumericVal(fieldVal) + } + } + if strings.HasPrefix(strings.ToLower(fieldName), "lat") { + if thingVal.Field(i).CanInterface() { + fieldVal := thingVal.Field(i).Interface() + lat, foundLat = extractNumericVal(fieldVal) + } + } + } + } + + // last hope, some interfaces + // lon + if l, ok := thing.(loner); ok { + lon = l.Lon() + foundLon = true + } else if l, ok := thing.(lnger); ok { + lon = l.Lng() + foundLon = true + } + // lat + if l, ok := thing.(later); ok { + lat = l.Lat() + foundLat = true + } + + return lon, lat, foundLon && foundLat +} + +// extract numeric value (if possible) and returna s float64 +func extractNumericVal(v interface{}) (float64, bool) { + switch v := v.(type) { + case float64: + return v, true + case float32: + return float64(v), true + } + return 0, false +} + +// various support interfaces which can be used to find lat/lon +type loner interface { + Lon() float64 +} + +type later interface { + Lat() float64 +} + +type lnger interface { + Lng() float64 +} diff --git a/geo/sloppy.go b/geo/sloppy.go new file mode 100644 index 00000000..5a2ba6bc --- /dev/null +++ b/geo/sloppy.go @@ -0,0 +1,185 @@ +package geo + +import "math" + +var earthDiameterPerLatitude []float64 +var sinTab []float64 +var cosTab []float64 +var asinTab []float64 +var asinDer1DivF1Tab []float64 +var asinDer2DivF2Tab []float64 +var asinDer3DivF3Tab []float64 +var asinDer4DivF4Tab []float64 + +const radiusTabsSize = (1 << 10) + 1 +const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1) +const radiusIndexer = 1 / radiusDelta +const sinCosTabsSize = (1 << 11) + 1 +const asinTabsSize = (1 << 13) + 1 +const oneDivF2 = 1 / 2.0 +const oneDivF3 = 1 / 6.0 +const oneDivF4 = 1 / 24.0 + +// 1.57079632673412561417e+00 first 33 bits of pi/2 +var pio2Hi = math.Float64frombits(0x3FF921FB54400000) + +// 6.07710050650619224932e-11 pi/2 - PIO2_HI +var pio2Lo = math.Float64frombits(0x3DD0B4611A626331) + +var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00 +var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17 +var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01 +var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01 +var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01 +var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02 +var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04 +var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05 +var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00 +var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00 +var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01 +var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02 + +var twoPiHi = 4 * pio2Hi +var twoPiLo = 4 * pio2Lo +var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1 +var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1 +var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo) +var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99 +var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian) + +var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1) +var asinIndexer = 1 / asinDelta + +func init() { + + // sin and cos + sinTab = make([]float64, sinCosTabsSize) + cosTab = make([]float64, sinCosTabsSize) + sinCosPiIndex := (sinCosTabsSize - 1) / 2 + sinCosPiMul2Index := 2 * sinCosPiIndex + sinCosPiMul05Index := sinCosPiIndex / 2 + sinCosPiMul15Index := 3 * sinCosPiIndex / 2 + for i := 0; i < sinCosTabsSize; i++ { + // angle: in [0,2*PI]. + angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo + sinAngle := math.Sin(angle) + cosAngle := math.Cos(angle) + // For indexes corresponding to null cosine or sine, we make sure the value is zero + // and not an epsilon. This allows for a much better accuracy for results close to zero. + if i == sinCosPiIndex { + sinAngle = 0.0 + } else if i == sinCosPiMul2Index { + sinAngle = 0.0 + } else if i == sinCosPiMul05Index { + sinAngle = 0.0 + } else if i == sinCosPiMul15Index { + sinAngle = 0.0 + } + sinTab[i] = sinAngle + cosTab[i] = cosAngle + } + + // asin + asinTab = make([]float64, asinTabsSize) + asinDer1DivF1Tab = make([]float64, asinTabsSize) + asinDer2DivF2Tab = make([]float64, asinTabsSize) + asinDer3DivF3Tab = make([]float64, asinTabsSize) + asinDer4DivF4Tab = make([]float64, asinTabsSize) + for i := 0; i < asinTabsSize; i++ { + // x: in [0,ASIN_MAX_VALUE_FOR_TABS]. + x := float64(i) * asinDelta + asinTab[i] = math.Asin(x) + oneMinusXSqInv := 1.0 / (1 - x*x) + oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv) + oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv + oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv + oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv + asinDer1DivF1Tab[i] = oneMinusXSqInv05 + asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2 + asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3 + asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4 + } + + // earth radius + a := 6378137.0 + b := 6356752.31420 + a2 := a * a + b2 := b * b + earthDiameterPerLatitude = make([]float64, radiusTabsSize) + earthDiameterPerLatitude[0] = 2.0 * a / 1000 + earthDiameterPerLatitude[radiusTabsSize-1] = 2.0 * b / 1000 + for i := 1; i < radiusTabsSize-1; i++ { + lat := math.Pi * float64(i) / (2*radiusTabsSize - 1) + one := math.Pow(a2*math.Cos(lat), 2) + two := math.Pow(b2*math.Sin(lat), 2) + three := math.Pow(float64(a)*math.Cos(lat), 2) + four := math.Pow(b*math.Sin(lat), 2) + radius := math.Sqrt((one + two) / (three + four)) + earthDiameterPerLatitude[i] = 2 * radius / 1000 + } +} + +func earthDiameter(lat float64) float64 { + index := math.Mod(math.Abs(lat)*radiusIndexer+0.5, float64(len(earthDiameterPerLatitude))) + if math.IsNaN(index) { + return 0 + } + return earthDiameterPerLatitude[int(index)] +} + +func cos(a float64) float64 { + if a < 0.0 { + a = -a + } + if a > sinCosMaxValueForIntModulo { + return math.Cos(a) + } + // index: possibly outside tables range. + index := int(a*sinCosIndexer + 0.5) + delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo + // Making sure index is within tables range. + // Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo. + index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1) + indexCos := cosTab[index] + indexSin := sinTab[index] + return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4))) +} + +func asin(a float64) float64 { + var negateResult bool + if a < 0 { + a = -a + negateResult = true + } + if a <= asinMaxValueForTabs { + index := int(a*asinIndexer + 0.5) + delta := a - float64(index)*asinDelta + result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index]))) + if negateResult { + return -result + } + return result + } + // value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN + // This part is derived from fdlibm. + if a < 1 { + t := (1.0 - a) * 0.5 + p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5))))) + q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4))) + s := math.Sqrt(t) + z := s + s*(p/q) + result := asinPio2Hi - ((z + z) - asinPio2Lo) + if negateResult { + return -result + } + return result + } + // value >= 1.0, or value is NaN + if a == 1.0 { + if negateResult { + return -math.Pi / 2 + } + return math.Pi / 2 + } + return math.NaN() +} diff --git a/geo/sloppy_test.go b/geo/sloppy_test.go new file mode 100644 index 00000000..4ad4ff3b --- /dev/null +++ b/geo/sloppy_test.go @@ -0,0 +1,73 @@ +package geo + +import ( + "math" + "testing" +) + +func TestCos(t *testing.T) { + + cosDelta := 1E-15 + + tests := []struct { + in float64 + want float64 + }{ + {math.NaN(), math.NaN()}, + {math.Inf(-1), math.NaN()}, + {math.Inf(1), math.NaN()}, + {1, math.Cos(1)}, + {0, math.Cos(0)}, + {math.Pi / 2, math.Cos(math.Pi / 2)}, + {-math.Pi / 2, math.Cos(-math.Pi / 2)}, + {math.Pi / 4, math.Cos(math.Pi / 4)}, + {-math.Pi / 4, math.Cos(-math.Pi / 4)}, + {math.Pi * 2 / 3, math.Cos(math.Pi * 2 / 3)}, + {-math.Pi * -2 / 3, math.Cos(-math.Pi * -2 / 3)}, + {math.Pi / 6, math.Cos(math.Pi / 6)}, + {-math.Pi / 6, math.Cos(-math.Pi / 6)}, + } + + for _, test := range tests { + got := cos(test.in) + if math.IsNaN(test.want) && !math.IsNaN(got) { + t.Errorf("wanted NaN, got %f for cos(%f)", got, test.in) + } + if !math.IsNaN(test.want) && math.Abs(got-test.want) > cosDelta { + t.Errorf("wanted: %f, got %f for cos(%f) diff %f", test.want, got, test.in, math.Abs(got-test.want)) + } + } +} + +func TestAsin(t *testing.T) { + + asinDelta := 1E-7 + + tests := []struct { + in float64 + want float64 + }{ + {math.NaN(), math.NaN()}, + {2, math.NaN()}, + {-2, math.NaN()}, + {-1, -math.Pi / 2}, + {-0.8660254, -math.Pi / 3}, + {-0.7071068, -math.Pi / 4}, + {-0.5, -math.Pi / 6}, + {0, 0}, + {0.5, math.Pi / 6}, + {0.7071068, math.Pi / 4}, + {0.8660254, math.Pi / 3}, + {1, math.Pi / 2}, + } + + for _, test := range tests { + got := asin(test.in) + if math.IsNaN(test.want) && !math.IsNaN(got) { + t.Errorf("wanted NaN, got %f for asin(%f)", got, test.in) + } + if !math.IsNaN(test.want) && math.Abs(got-test.want) > asinDelta { + t.Errorf("wanted: %f, got %f for asin(%f) diff %f", test.want, got, test.in, math.Abs(got-test.want)) + } + } +} diff --git a/mapping.go b/mapping.go index 2564ea66..76238dc1 100644 --- a/mapping.go +++ b/mapping.go @@ -59,3 +59,7 @@ func NewDateTimeFieldMapping() *mapping.FieldMapping { func NewBooleanFieldMapping() *mapping.FieldMapping { return mapping.NewBooleanFieldMapping() } + +func NewGeoPointFieldMapping() *mapping.FieldMapping { + return mapping.NewGeoPointFieldMapping() +} diff --git a/mapping/document.go b/mapping/document.go index 54bf61b6..9bdb8596 100644 --- a/mapping/document.go +++ b/mapping/document.go @@ -75,7 +75,7 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error { } } switch field.Type { - case "text", "datetime", "number", "boolean": + case "text", "datetime", "number", "boolean", "geopoint": default: return fmt.Errorf("unknown field type: '%s'", field.Type) } @@ -482,8 +482,24 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, fieldMapping.processTime(property, pathString, path, indexes, context) } default: + if subDocMapping != nil { + for _, fieldMapping := range subDocMapping.Fields { + if fieldMapping.Type == "geopoint" { + fieldMapping.processGeoPoint(property, pathString, path, indexes, context) + } + } + } dm.walkDocument(property, path, indexes, context) } + case reflect.Map: + if subDocMapping != nil { + for _, fieldMapping := range subDocMapping.Fields { + if fieldMapping.Type == "geopoint" { + fieldMapping.processGeoPoint(property, pathString, path, indexes, context) + } + } + } + dm.walkDocument(property, path, indexes, context) default: dm.walkDocument(property, path, indexes, context) } diff --git a/mapping/field.go b/mapping/field.go index 0976cd6f..9f1928ca 100644 --- a/mapping/field.go +++ b/mapping/field.go @@ -21,6 +21,7 @@ import ( "github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/geo" ) // control the default behavior for dynamic fields (those not explicitly mapped) @@ -124,6 +125,16 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { return rv } +// NewGeoPointFieldMapping returns a default field mapping for geo points +func NewGeoPointFieldMapping() *FieldMapping { + return &FieldMapping{ + Type: "geopoint", + Store: true, + Index: true, + IncludeInAll: true, + } +} + // Options returns the indexing options for this field. func (fm *FieldMapping) Options() document.IndexingOptions { var rv document.IndexingOptions @@ -208,6 +219,20 @@ func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string } } +func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) { + lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint) + if found { + fieldName := getFieldName(pathString, path, fm) + options := fm.Options() + field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options) + context.doc.AddField(field) + + if !fm.IncludeInAll { + context.excludedFromAll = append(context.excludedFromAll, fieldName) + } + } +} + func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { analyzerName := fm.Analyzer if analyzerName == "" { diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go index 096219ca..309c0e1d 100644 --- a/mapping/mapping_test.go +++ b/mapping/mapping_test.go @@ -23,6 +23,7 @@ import ( "github.com/blevesearch/bleve/analysis/tokenizer/exception" "github.com/blevesearch/bleve/analysis/tokenizer/regexp" "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/numeric" ) var mappingSource = []byte(`{ @@ -845,3 +846,57 @@ func TestMappingPrimitives(t *testing.T) { } } } + +func TestMappingForGeo(t *testing.T) { + + type Location struct { + Lat float64 + Lon float64 + } + + nameFieldMapping := NewTextFieldMapping() + nameFieldMapping.Name = "name" + nameFieldMapping.Analyzer = "standard" + + locFieldMapping := NewGeoPointFieldMapping() + + thingMapping := NewDocumentMapping() + thingMapping.AddFieldMappingsAt("name", nameFieldMapping) + thingMapping.AddFieldMappingsAt("location", locFieldMapping) + + mapping := NewIndexMapping() + mapping.DefaultMapping = thingMapping + + x := struct { + Name string `json:"name"` + Location *Location `json:"location"` + }{ + Name: "marty", + Location: &Location{ + Lon: -180, + Lat: -90, + }, + } + + doc := document.NewDocument("1") + err := mapping.MapDocument(doc, x) + if err != nil { + t.Fatal(err) + } + + var foundGeo bool + for _, f := range doc.Fields { + if f.Name() == "location" { + foundGeo = true + got := f.Value() + expect := []byte(numeric.MustNewPrefixCodedInt64(0, 0)) + if !reflect.DeepEqual(got, expect) { + t.Errorf("expected geo value: %v, got %v", expect, got) + } + } + } + + if !foundGeo { + t.Errorf("expected to find geo point, did not") + } +} diff --git a/numeric/bin.go b/numeric/bin.go new file mode 100644 index 00000000..cd71392d --- /dev/null +++ b/numeric/bin.go @@ -0,0 +1,43 @@ +package numeric + +var interleaveMagic = []uint64{ + 0x5555555555555555, + 0x3333333333333333, + 0x0F0F0F0F0F0F0F0F, + 0x00FF00FF00FF00FF, + 0x0000FFFF0000FFFF, + 0x00000000FFFFFFFF, + 0xAAAAAAAAAAAAAAAA, +} + +var interleaveShift = []uint{1, 2, 4, 8, 16} + +// Interleave the first 32 bits of each uint64 +// apdated from org.apache.lucene.util.BitUtil +// whcih was adapted from: +// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN +func Interleave(v1, v2 uint64) uint64 { + v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4] + v1 = (v1 | (v1 << interleaveShift[3])) & interleaveMagic[3] + v1 = (v1 | (v1 << interleaveShift[2])) & interleaveMagic[2] + v1 = (v1 | (v1 << interleaveShift[1])) & interleaveMagic[1] + v1 = (v1 | (v1 << interleaveShift[0])) & interleaveMagic[0] + v2 = (v2 | (v2 << interleaveShift[4])) & interleaveMagic[4] + v2 = (v2 | (v2 << interleaveShift[3])) & interleaveMagic[3] + v2 = (v2 | (v2 << interleaveShift[2])) & interleaveMagic[2] + v2 = (v2 | (v2 << interleaveShift[1])) & interleaveMagic[1] + v2 = (v2 | (v2 << interleaveShift[0])) & interleaveMagic[0] + return (v2 << 1) | v1 +} + +// Deinterleave the 32-bit value starting at position 0 +// to get the other 32-bit value, shift it by 1 first +func Deinterleave(b uint64) uint64 { + b &= interleaveMagic[0] + b = (b ^ (b >> interleaveShift[0])) & interleaveMagic[1] + b = (b ^ (b >> interleaveShift[1])) & interleaveMagic[2] + b = (b ^ (b >> interleaveShift[2])) & interleaveMagic[3] + b = (b ^ (b >> interleaveShift[3])) & interleaveMagic[4] + b = (b ^ (b >> interleaveShift[4])) & interleaveMagic[5] + return b +} diff --git a/numeric/bin_test.go b/numeric/bin_test.go new file mode 100644 index 00000000..f6dfb472 --- /dev/null +++ b/numeric/bin_test.go @@ -0,0 +1,27 @@ +package numeric + +import "testing" + +func TestInterleaveDeinterleave(t *testing.T) { + tests := []struct { + v1 uint64 + v2 uint64 + }{ + {0, 0}, + {1, 1}, + {27, 39}, + {1<<32 - 1, 1<<32 - 1}, // largest that should still work + } + + for _, test := range tests { + i := Interleave(test.v1, test.v2) + gotv1 := Deinterleave(i) + gotv2 := Deinterleave(i >> 1) + if gotv1 != test.v1 { + t.Errorf("expected v1: %d, got %d, interleaved was %x", test.v1, gotv1, i) + } + if gotv2 != test.v2 { + t.Errorf("expected v2: %d, got %d, interleaved was %x", test.v2, gotv2, i) + } + } +} diff --git a/search/query/geo_boundingbox.go b/search/query/geo_boundingbox.go new file mode 100644 index 00000000..97807c04 --- /dev/null +++ b/search/query/geo_boundingbox.go @@ -0,0 +1,93 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/mapping" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/searcher" +) + +type GeoPoint struct { + Lon float64 `json:"lon,omitempty"` + Lat float64 `json:"lat,omitempty"` +} + +type GeoBoundingBoxQuery struct { + TopLeft *GeoPoint `json:"top_left,omitempty"` + BottomRight *GeoPoint `json:"bottom_right,omitempty"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` +} + +func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery { + return &GeoBoundingBoxQuery{ + TopLeft: &GeoPoint{ + Lon: topLeftLon, + Lat: topLeftLat, + }, + BottomRight: &GeoPoint{ + Lon: bottomRightLon, + Lat: bottomRightLat, + }, + } +} + +func (q *GeoBoundingBoxQuery) SetBoost(b float64) { + boost := Boost(b) + q.BoostVal = &boost +} + +func (q *GeoBoundingBoxQuery) Boost() float64 { + return q.BoostVal.Value() +} + +func (q *GeoBoundingBoxQuery) SetField(f string) { + q.FieldVal = f +} + +func (q *GeoBoundingBoxQuery) Field() string { + return q.FieldVal +} + +func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + field := q.FieldVal + if q.FieldVal == "" { + field = m.DefaultSearchField() + } + + if q.BottomRight.Lon < q.TopLeft.Lon { + // cross date line, rewrite as two parts + + leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight.Lat, q.BottomRight.Lon, q.TopLeft.Lat, field, q.BoostVal.Value(), options) + if err != nil { + return nil, err + } + rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft.Lon, q.BottomRight.Lat, 180, q.TopLeft.Lat, field, q.BoostVal.Value(), options) + if err != nil { + _ = leftSearcher.Close() + return nil, err + } + + return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options) + } + + return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft.Lon, q.BottomRight.Lat, q.BottomRight.Lon, q.TopLeft.Lat, field, q.BoostVal.Value(), options) +} + +func (q *GeoBoundingBoxQuery) Validate() error { + return nil +} diff --git a/search/query/geo_distance.go b/search/query/geo_distance.go new file mode 100644 index 00000000..ee8f902a --- /dev/null +++ b/search/query/geo_distance.go @@ -0,0 +1,75 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/mapping" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/searcher" +) + +type GeoDistanceQuery struct { + Location *GeoPoint `json:"location,omitempty"` + Distance string `json:"distance,omitempty"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` +} + +func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery { + return &GeoDistanceQuery{ + Location: &GeoPoint{ + Lon: lon, + Lat: lat, + }, + Distance: distance, + } +} + +func (q *GeoDistanceQuery) SetBoost(b float64) { + boost := Boost(b) + q.BoostVal = &boost +} + +func (q *GeoDistanceQuery) Boost() float64 { + return q.BoostVal.Value() +} + +func (q *GeoDistanceQuery) SetField(f string) { + q.FieldVal = f +} + +func (q *GeoDistanceQuery) Field() string { + return q.FieldVal +} + +func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + field := q.FieldVal + if q.FieldVal == "" { + field = m.DefaultSearchField() + } + + dist, err := geo.ParseDistance(q.Distance) + if err != nil { + return nil, err + } + + return searcher.NewGeoPointDistanceSearcher(i, q.Location.Lon, q.Location.Lat, dist, field, q.BoostVal.Value(), options) +} + +func (q *GeoDistanceQuery) Validate() error { + return nil +} diff --git a/search/query/query.go b/search/query/query.go index 0ac12515..efac7316 100644 --- a/search/query/query.go +++ b/search/query/query.go @@ -244,6 +244,25 @@ func ParseQuery(input []byte) (Query, error) { } return &rv, nil } + _, hasTopLeft := tmp["top_left"] + _, hasBottomRight := tmp["bottom_right"] + if hasTopLeft && hasBottomRight { + var rv GeoBoundingBoxQuery + err := json.Unmarshal(input, &rv) + if err != nil { + return nil, err + } + return &rv, nil + } + _, hasDistance := tmp["distance"] + if hasDistance { + var rv GeoDistanceQuery + err := json.Unmarshal(input, &rv) + if err != nil { + return nil, err + } + return &rv, nil + } return nil, fmt.Errorf("unknown query type") } diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go new file mode 100644 index 00000000..219f2ee7 --- /dev/null +++ b/search/searcher/search_filter.go @@ -0,0 +1,88 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/search" +) + +// FilterFunc defines a function which can filter documents +// returning true means keep the document +// returning false means do not keep the document +type FilterFunc func(d *search.DocumentMatch) bool + +// FilteringSearcher wraps any other searcher, but checks any Next/Advance +// call against the supplied FilterFunc +type FilteringSearcher struct { + child search.Searcher + accept FilterFunc +} + +func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher { + return &FilteringSearcher{ + child: s, + accept: filter, + } +} + +func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + next, err := f.child.Next(ctx) + for next != nil && err == nil { + if f.accept(next) { + return next, nil + } + next, err = f.child.Next(ctx) + } + return nil, err +} + +func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + adv, err := f.child.Advance(ctx, ID) + if err != nil { + return nil, err + } + if adv == nil { + return nil, nil + } + if f.accept(adv) { + return adv, nil + } + return f.Next(ctx) +} + +func (f *FilteringSearcher) Close() error { + return f.child.Close() +} + +func (f *FilteringSearcher) Weight() float64 { + return f.child.Weight() +} + +func (f *FilteringSearcher) SetQueryNorm(n float64) { + f.child.SetQueryNorm(n) +} + +func (f *FilteringSearcher) Count() uint64 { + return f.child.Count() +} + +func (f *FilteringSearcher) Min() int { + return f.child.Min() +} + +func (f *FilteringSearcher) DocumentMatchPoolSize() int { + return f.child.DocumentMatchPoolSize() +} diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go new file mode 100644 index 00000000..8d3d6249 --- /dev/null +++ b/search/searcher/search_geoboundingbox.go @@ -0,0 +1,224 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "bytes" + + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/numeric" + "github.com/blevesearch/bleve/search" +) + +type GeoBoundingBoxSearcher struct { + indexReader index.IndexReader + field string + minLon float64 + minLat float64 + maxLon float64 + maxLat float64 + options search.SearcherOptions + + rangeBounds []*geoRange + + searcher *DisjunctionSearcher +} + +func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, maxLon, maxLat float64, field string, boost float64, options search.SearcherOptions) (*GeoBoundingBoxSearcher, error) { + rv := &GeoBoundingBoxSearcher{ + indexReader: indexReader, + minLon: minLon, + minLat: minLat, + maxLon: maxLon, + maxLat: maxLat, + field: field, + options: options, + } + rv.computeRange(0, (geo.GeoBits<<1)-1) + + var termsOnBoundary []search.Searcher + var termsNotOnBoundary []search.Searcher + for _, r := range rv.rangeBounds { + ts, err := NewTermSearcher(indexReader, string(r.cell), field, 1.0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + return nil, err + } + if r.boundary { + termsOnBoundary = append(termsOnBoundary, ts) + } else { + termsNotOnBoundary = append(termsNotOnBoundary, ts) + } + } + onBoundarySearcher, err := NewDisjunctionSearcher(indexReader, termsOnBoundary, 0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + return nil, err + } + filterOnBoundarySearcher := NewFilteringSearcher(onBoundarySearcher, func(d *search.DocumentMatch) bool { + var lon, lat float64 + var found bool + err = indexReader.DocumentVisitFieldTerms(d.IndexInternalID, []string{field}, func(field string, term []byte) { + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + var shift uint + shift, err = prefixCoded.Shift() + if err == nil && shift == 0 { + var i64 int64 + i64, err = prefixCoded.Int64() + if err == nil { + lon = geo.MortonUnhashLon(uint64(i64)) + lat = geo.MortonUnhashLat(uint64(i64)) + found = true + } + } + }) + if err == nil && found { + return geo.BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat) + } + return false + }) + notOnBoundarySearcher, err := NewDisjunctionSearcher(indexReader, termsNotOnBoundary, 0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + _ = filterOnBoundarySearcher.Close() + return nil, err + } + + rv.searcher, err = NewDisjunctionSearcher(indexReader, []search.Searcher{filterOnBoundarySearcher, notOnBoundarySearcher}, 0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + _ = filterOnBoundarySearcher.Close() + _ = notOnBoundarySearcher.Close() + return nil, err + } + return rv, nil +} + +func (s *GeoBoundingBoxSearcher) Count() uint64 { + return s.searcher.Count() +} + +func (s *GeoBoundingBoxSearcher) Weight() float64 { + return s.searcher.Weight() +} + +func (s *GeoBoundingBoxSearcher) SetQueryNorm(qnorm float64) { + s.searcher.SetQueryNorm(qnorm) +} + +func (s *GeoBoundingBoxSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + return s.searcher.Next(ctx) +} + +func (s *GeoBoundingBoxSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + return s.searcher.Advance(ctx, ID) +} + +func (s *GeoBoundingBoxSearcher) Close() error { + return s.searcher.Close() +} + +func (s *GeoBoundingBoxSearcher) Min() int { + return 0 +} + +func (s *GeoBoundingBoxSearcher) DocumentMatchPoolSize() int { + return s.searcher.DocumentMatchPoolSize() +} + +var geoMaxShift = document.GeoPrecisionStep * 4 +var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 + +func (s *GeoBoundingBoxSearcher) computeRange(term uint64, shift uint) { + split := term | uint64(0x1)<> 1 + + within := res%document.GeoPrecisionStep == 0 && s.cellWithin(minLon, minLat, maxLon, maxLat) + if within || (level == geoDetailLevel && s.cellIntersectShape(minLon, minLat, maxLon, maxLat)) { + s.rangeBounds = append(s.rangeBounds, newGeoRange(start, res, level, !within)) + } else if level < geoDetailLevel && s.cellIntersectsMBR(minLon, minLat, maxLon, maxLat) { + s.computeRange(start, res-1) + } +} + +func (s *GeoBoundingBoxSearcher) cellWithin(minLon, minLat, maxLon, maxLat float64) bool { + return geo.RectWithin(minLon, minLat, maxLon, maxLat, s.minLon, s.minLat, s.maxLon, s.maxLat) +} + +func (s *GeoBoundingBoxSearcher) cellIntersectShape(minLon, minLat, maxLon, maxLat float64) bool { + return s.cellIntersectsMBR(minLon, minLat, maxLon, maxLat) +} + +func (s *GeoBoundingBoxSearcher) cellIntersectsMBR(minLon, minLat, maxLon, maxLat float64) bool { + return geo.RectIntersects(minLon, minLat, maxLon, maxLat, s.minLon, s.minLat, s.maxLon, s.maxLat) +} + +type geoRange struct { + cell []byte + level uint + boundary bool +} + +func newGeoRange(lower uint64, res uint, level uint, boundary bool) *geoRange { + return &geoRange{ + level: level, + boundary: boundary, + cell: numeric.MustNewPrefixCodedInt64(int64(lower), res), + } +} + +func (r *geoRange) Compare(other *geoRange) int { + return bytes.Compare(r.cell, other.cell) +} diff --git a/search/searcher/search_geoboundingbox_test.go b/search/searcher/search_geoboundingbox_test.go new file mode 100644 index 00000000..1839dec2 --- /dev/null +++ b/search/searcher/search_geoboundingbox_test.go @@ -0,0 +1,156 @@ +package searcher + +import ( + "log" + "testing" + + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/store/gtreap" + "github.com/blevesearch/bleve/index/upsidedown" + "github.com/blevesearch/bleve/search" +) + +func TestGeoBoundingBox(t *testing.T) { + i := setup(t) + indexReader, err := i.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err = indexReader.Close() + if err != nil { + t.Fatal(err) + } + }() + + gbs, err := NewGeoBoundingBoxSearcher(indexReader, 0.001, 0.001, 0.002, 0.002, "loc", 1.0, search.SearcherOptions{}) + if err != nil { + t.Fatal(err) + } + ctx := &search.SearchContext{ + DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0), + } + docMatch, err := gbs.Next(ctx) + for docMatch != nil && err == nil { + if docMatch == nil { + log.Printf("nil docmatch") + } else { + log.Printf("got doc match: %s", docMatch.IndexInternalID) + } + docMatch, err = gbs.Next(ctx) + } + if err != nil { + t.Fatal(err) + } +} + +func setup(t *testing.T) index.Index { + + analysisQueue := index.NewAnalysisQueue(1) + i, err := upsidedown.NewUpsideDownCouch( + gtreap.Name, + map[string]interface{}{ + "path": "", + }, + analysisQueue) + if err != nil { + t.Fatal(err) + } + err = i.Open() + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "a", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 0.0015, 0.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "b", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 1.0015, 1.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "c", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 2.0015, 2.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "d", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 3.0015, 3.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "e", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 4.0015, 4.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "f", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 5.0015, 5.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "g", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 6.0015, 6.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "h", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 7.0015, 7.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "i", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 8.0015, 8.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "j", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 9.0015, 9.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + + return i +} diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go new file mode 100644 index 00000000..adb83dc8 --- /dev/null +++ b/search/searcher/search_geopointdistance.go @@ -0,0 +1,139 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/numeric" + "github.com/blevesearch/bleve/search" +) + +type GeoPointDistanceSearcher struct { + indexReader index.IndexReader + field string + + centerLon float64 + centerLat float64 + dist float64 + + options search.SearcherOptions + + searcher *FilteringSearcher +} + +func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, centerLat, dist float64, field string, boost float64, options search.SearcherOptions) (*GeoPointDistanceSearcher, error) { + rv := &GeoPointDistanceSearcher{ + indexReader: indexReader, + centerLon: centerLon, + centerLat: centerLat, + dist: dist, + field: field, + options: options, + } + + // compute bounding box containing the circle + topLeftLon, topLeftLat, bottomRightLon, bottomRightLat := geo.ComputeBoundingBox(centerLon, centerLat, dist) + + var boxSearcher search.Searcher + if bottomRightLon < topLeftLon { + // cross date line, rewrite as two parts + + leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, -180, bottomRightLat, bottomRightLon, topLeftLat, field, boost, options) + if err != nil { + return nil, err + } + rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options) + if err != nil { + _ = leftSearcher.Close() + return nil, err + } + + boxSearcher, err = NewDisjunctionSearcher(indexReader, []search.Searcher{leftSearcher, rightSearcher}, 0, options) + if err != nil { + _ = leftSearcher.Close() + _ = rightSearcher.Close() + return nil, err + } + } else { + + // build geoboundinggox searcher for that bounding box + var err error + boxSearcher, err = NewGeoBoundingBoxSearcher(indexReader, topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, options) + if err != nil { + return nil, err + } + } + + // wrap it in a filtering searcher which checks the actual distance + rv.searcher = NewFilteringSearcher(boxSearcher, func(d *search.DocumentMatch) bool { + var lon, lat float64 + var found bool + err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID, []string{field}, func(field string, term []byte) { + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + shift, err := prefixCoded.Shift() + if err == nil && shift == 0 { + i64, err := prefixCoded.Int64() + if err == nil { + lon = geo.MortonUnhashLon(uint64(i64)) + lat = geo.MortonUnhashLat(uint64(i64)) + found = true + } + } + }) + if err == nil && found { + dist := geo.Haversin(lon, lat, rv.centerLon, rv.centerLat) + if dist <= rv.dist/1000 { + return true + } + } + return false + }) + + return rv, nil +} + +func (s *GeoPointDistanceSearcher) Count() uint64 { + return s.searcher.Count() +} + +func (s *GeoPointDistanceSearcher) Weight() float64 { + return s.searcher.Weight() +} + +func (s *GeoPointDistanceSearcher) SetQueryNorm(qnorm float64) { + s.searcher.SetQueryNorm(qnorm) +} + +func (s *GeoPointDistanceSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + return s.searcher.Next(ctx) +} + +func (s *GeoPointDistanceSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + return s.searcher.Advance(ctx, ID) +} + +func (s *GeoPointDistanceSearcher) Close() error { + return s.searcher.Close() +} + +func (s *GeoPointDistanceSearcher) Min() int { + return 0 +} + +func (s *GeoPointDistanceSearcher) DocumentMatchPoolSize() int { + return s.searcher.DocumentMatchPoolSize() +} diff --git a/search/sort.go b/search/sort.go index 70d4fbaa..751eec9d 100644 --- a/search/sort.go +++ b/search/sort.go @@ -17,9 +17,11 @@ package search import ( "encoding/json" "fmt" + "math" "sort" "strings" + "github.com/blevesearch/bleve/geo" "github.com/blevesearch/bleve/numeric" ) @@ -51,6 +53,21 @@ func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) { return &SortScore{ Desc: descending, }, nil + case "geo_distance": + field, ok := input["field"].(string) + if !ok { + return nil, fmt.Errorf("search sort mode geo_distance must specify field") + } + lon, lat, foundLocation := geo.ExtractGeoPoint(input["location"]) + if !foundLocation { + return nil, fmt.Errorf("unable to parse geo_distance location") + } + return &SortGeoDistance{ + Field: field, + Desc: descending, + lon: lon, + lat: lat, + }, nil case "field": field, ok := input["field"].(string) if !ok { @@ -386,7 +403,7 @@ func (s *SortField) filterTermsByType(terms []string) []string { for _, term := range terms { valid, shift := numeric.ValidPrefixCodedTerm(term) if valid && shift == 0 { - termsWithShiftZero = append(termsWithShiftZero) + termsWithShiftZero = append(termsWithShiftZero, term) } } terms = termsWithShiftZero @@ -521,3 +538,99 @@ func (s *SortScore) MarshalJSON() ([]byte, error) { } return json.Marshal("_score") } + +var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) + +// SortGeoDistance will sort results by the distance of an +// indexed geo point, from the provided location. +// Field is the name of the field +// Descending reverse the sort order (default false) +type SortGeoDistance struct { + Field string + Desc bool + values []string + lon float64 + lat float64 +} + +// UpdateVisitor notifies this sort field that in this document +// this field has the specified term +func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) { + if field == s.Field { + s.values = append(s.values, string(term)) + } +} + +// Value returns the sort value of the DocumentMatch +// it also resets the state of this SortField for +// processing the next document +func (s *SortGeoDistance) Value(i *DocumentMatch) string { + iTerms := s.filterTermsByType(s.values) + iTerm := s.filterTermsByMode(iTerms) + s.values = nil + + if iTerm == "" { + return maxDistance + } + + i64, err := numeric.PrefixCoded(iTerm).Int64() + if err != nil { + return maxDistance + } + docLon := geo.MortonUnhashLon(uint64(i64)) + docLat := geo.MortonUnhashLat(uint64(i64)) + + dist := geo.Haversin(s.lon, s.lat, docLon, docLat) + return string(numeric.MustNewPrefixCodedInt64(int64(dist), 0)) +} + +// Descending determines the order of the sort +func (s *SortGeoDistance) Descending() bool { + return s.Desc +} + +func (s *SortGeoDistance) filterTermsByMode(terms []string) string { + if len(terms) >= 1 { + return terms[0] + } + + return "" +} + +// filterTermsByType attempts to make one pass on the terms +// return only valid prefix coded numbers with shift of 0 +func (s *SortGeoDistance) filterTermsByType(terms []string) []string { + var termsWithShiftZero []string + for _, term := range terms { + valid, shift := numeric.ValidPrefixCodedTerm(term) + if valid && shift == 0 { + termsWithShiftZero = append(termsWithShiftZero, term) + } + } + return termsWithShiftZero +} + +// RequiresDocID says this SearchSort does not require the DocID be loaded +func (s *SortGeoDistance) RequiresDocID() bool { return false } + +// RequiresScoring says this SearchStore does not require scoring +func (s *SortGeoDistance) RequiresScoring() bool { return false } + +// RequiresFields says this SearchStore requires the specified stored field +func (s *SortGeoDistance) RequiresFields() []string { return []string{s.Field} } + +func (s *SortGeoDistance) MarshalJSON() ([]byte, error) { + sfm := map[string]interface{}{ + "by": "geo_distance", + "field": s.Field, + "location": map[string]interface{}{ + "lon": s.lon, + "lat": s.lat, + }, + } + if s.Desc { + sfm["desc"] = true + } + + return json.Marshal(sfm) +}