diff --git a/document/field_geopoint.go b/document/field_geopoint.go new file mode 100644 index 00000000..f508b362 --- /dev/null +++ b/document/field_geopoint.go @@ -0,0 +1,137 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package document + +import ( + "fmt" + + "github.com/blevesearch/bleve/analysis" + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/numeric" +) + +var GeoPrecisionStep uint = 9 + +type GeoPointField struct { + name string + arrayPositions []uint64 + options IndexingOptions + value numeric.PrefixCoded + numPlainTextBytes uint64 +} + +func (n *GeoPointField) Name() string { + return n.name +} + +func (n *GeoPointField) ArrayPositions() []uint64 { + return n.arrayPositions +} + +func (n *GeoPointField) Options() IndexingOptions { + return n.options +} + +func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) { + tokens := make(analysis.TokenStream, 0) + tokens = append(tokens, &analysis.Token{ + Start: 0, + End: len(n.value), + Term: n.value, + Position: 1, + Type: analysis.Numeric, + }) + + original, err := n.value.Int64() + if err == nil { + + shift := GeoPrecisionStep + for shift < 64 { + shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) + if err != nil { + break + } + token := analysis.Token{ + Start: 0, + End: len(shiftEncoded), + Term: shiftEncoded, + Position: 1, + Type: analysis.Numeric, + } + tokens = append(tokens, &token) + shift += GeoPrecisionStep + } + } + + fieldLength := len(tokens) + tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) + return fieldLength, tokenFreqs +} + +func (n *GeoPointField) Value() []byte { + return n.value +} + +func (n *GeoPointField) Lon() (float64, error) { + i64, err := n.value.Int64() + if err != nil { + return 0.0, err + } + return geo.MortonUnhashLon(uint64(i64)), nil +} + +func (n *GeoPointField) Lat() (float64, error) { + i64, err := n.value.Int64() + if err != nil { + return 0.0, err + } + return geo.MortonUnhashLat(uint64(i64)), nil +} + +func (n *GeoPointField) GoString() string { + return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) +} + +func (n *GeoPointField) NumPlainTextBytes() uint64 { + return n.numPlainTextBytes +} + +func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField { + return &GeoPointField{ + name: name, + arrayPositions: arrayPositions, + value: value, + options: DefaultNumericIndexingOptions, + numPlainTextBytes: uint64(len(value)), + } +} + +func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField { + return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions) +} + +func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField { + mhash := geo.MortonHash(lon, lat) + prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0) + return &GeoPointField{ + name: name, + arrayPositions: arrayPositions, + value: prefixCoded, + options: options, + // not correct, just a place holder until we revisit how fields are + // represented and can fix this better + numPlainTextBytes: uint64(8), + } +} diff --git a/document/field_geopoint_test.go b/document/field_geopoint_test.go new file mode 100644 index 00000000..d705ac70 --- /dev/null +++ b/document/field_geopoint_test.go @@ -0,0 +1,14 @@ +package document + +import "testing" + +func TestGeoPointField(t *testing.T) { + gf := NewGeoPointField("loc", []uint64{}, 0.0015, 0.0015) + numTokens, tokenFreqs := gf.Analyze() + if numTokens != 8 { + t.Errorf("expected 8 tokens, got %d", numTokens) + } + if len(tokenFreqs) != 8 { + t.Errorf("expected 8 token freqs") + } +} diff --git a/geo/geo.go b/geo/geo.go new file mode 100644 index 00000000..37ed819e --- /dev/null +++ b/geo/geo.go @@ -0,0 +1,158 @@ +package geo + +import ( + "math" + + "github.com/blevesearch/bleve/numeric" +) + +var minLon = -180.0 +var minLat = -90.0 +var GeoBits uint = 32 +var geoTolerance = 1E-6 +var lonScale = float64((uint64(0x1)<> 1)) +} + +func unscaleLon(lon uint64) float64 { + return (float64(lon) / lonScale) + minLon +} + +func unscaleLat(lat uint64) float64 { + return (float64(lat) / latScale) + minLat +} + +func compareGeo(a, b float64) float64 { + compare := a - b + if math.Abs(compare) <= geoTolerance { + return 0 + } + return compare +} + +func RectIntersects(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool { + return !(aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY) +} + +func RectWithin(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool { + rv := !(aMinX < bMinX || aMinY < bMinY || aMaxX > bMaxX || aMaxY > bMaxY) + return rv +} + +func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool { + return compareGeo(lon, minLon) >= 0 && compareGeo(lon, maxLon) <= 0 && compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0 +} + +func ComputeBoundingBox(centerLon, centerLat, radius float64) (upperLeftLon float64, upperLeftLat float64, lowerRightLon float64, lowerRightLat float64) { + _, tlat := pointFromLonLatBearing(centerLon, centerLat, 0, radius) + rlon, _ := pointFromLonLatBearing(centerLon, centerLat, 90, radius) + _, blat := pointFromLonLatBearing(centerLon, centerLat, 180, radius) + llon, _ := pointFromLonLatBearing(centerLon, centerLat, 270, radius) + return normalizeLon(llon), normalizeLat(tlat), normalizeLon(rlon), normalizeLat(blat) +} + +const degreesToRadian = math.Pi / 180 +const radiansToDegrees = 180 / math.Pi +const flattening = 1.0 / 298.257223563 +const semiMajorAxis = 6378137 +const semiMinorAxis = semiMajorAxis * (1.0 - flattening) +const semiMajorAxis2 = semiMajorAxis * semiMajorAxis +const semiMinorAxis2 = semiMinorAxis * semiMinorAxis + +func DegreesToRadians(d float64) float64 { + return d * degreesToRadian +} + +func RadiansToDegrees(r float64) float64 { + return r * radiansToDegrees +} + +func pointFromLonLatBearing(lon, lat, bearing, dist float64) (float64, float64) { + + alpha1 := DegreesToRadians(bearing) + cosA1 := math.Cos(alpha1) + sinA1 := math.Sin(alpha1) + tanU1 := (1 - flattening) * math.Tan(DegreesToRadians(lat)) + cosU1 := 1 / math.Sqrt(1+tanU1*tanU1) + sinU1 := tanU1 * cosU1 + sig1 := math.Atan2(tanU1, cosA1) + sinAlpha := cosU1 * sinA1 + cosSqAlpha := 1 - sinAlpha*sinAlpha + uSq := cosSqAlpha * (semiMajorAxis2 - semiMinorAxis2) / semiMinorAxis2 + A := 1 + uSq/16384*(4096+uSq*(-768+uSq*(320-175*uSq))) + B := uSq / 1024 * (256 + uSq*(-128+uSq*(74-47*uSq))) + + sigma := dist / (semiMinorAxis * A) + + cos25SigmaM := math.Cos(2*sig1 + sigma) + sinSigma := math.Sin(sigma) + cosSigma := math.Cos(sigma) + deltaSigma := B * sinSigma * (cos25SigmaM + (B/4)*(cosSigma*(-1+2*cos25SigmaM*cos25SigmaM)-(B/6)*cos25SigmaM*(-1+4*sinSigma*sinSigma)*(-3+4*cos25SigmaM*cos25SigmaM))) + sigmaP := sigma + sigma = dist/(semiMinorAxis*A) + deltaSigma + for math.Abs(sigma-sigmaP) > 1E-12 { + cos25SigmaM = math.Cos(2*sig1 + sigma) + sinSigma = math.Sin(sigma) + cosSigma = math.Cos(sigma) + deltaSigma = B * sinSigma * (cos25SigmaM + (B/4)*(cosSigma*(-1+2*cos25SigmaM*cos25SigmaM)-(B/6)*cos25SigmaM*(-1+4*sinSigma*sinSigma)*(-3+4*cos25SigmaM*cos25SigmaM))) + sigmaP = sigma + sigma = dist/(semiMinorAxis*A) + deltaSigma + } + + tmp := sinU1*sinSigma - cosU1*cosSigma*cosA1 + lat2 := math.Atan2(sinU1*cosSigma+cosU1*sinSigma*cosA1, (1-flattening)*math.Sqrt(sinAlpha*sinAlpha+tmp*tmp)) + lamda := math.Atan2(sinSigma*sinA1, cosU1*cosSigma-sinU1*sinSigma*cosA1) + c := flattening / 16 * cosSqAlpha * (4 + flattening*(4-3*cosSqAlpha)) + lam := lamda - (1-c)*flattening*sinAlpha*(sigma+c*sinSigma*(cos25SigmaM+c*cosSigma*(-1+2*cos25SigmaM*cos25SigmaM))) + + rvlon := lon + RadiansToDegrees(lam) + rvlat := RadiansToDegrees(lat2) + + return rvlon, rvlat +} + +func normalizeLon(lonDeg float64) float64 { + if lonDeg >= -180 && lonDeg <= 180 { + return lonDeg + } + + off := math.Mod(lonDeg+180, 360) + if off < 0 { + return 180 + off + } else if off == 0 && lonDeg > 0 { + return 180 + } + return -180 + off +} + +func normalizeLat(latDeg float64) float64 { + if latDeg >= -90 && latDeg <= 90 { + return latDeg + } + off := math.Abs(math.Mod(latDeg+90, 360)) + if off <= 180 { + return off - 90 + } + return (360 - off) - 90 +} diff --git a/geo/geo_dist.go b/geo/geo_dist.go new file mode 100644 index 00000000..d9da1bde --- /dev/null +++ b/geo/geo_dist.go @@ -0,0 +1,59 @@ +package geo + +import ( + "math" + "strconv" + "strings" +) + +type distanceUnit struct { + conv float64 + suffixes []string +} + +var inch = distanceUnit{0.0254, []string{"in", "inch"}} +var yard = distanceUnit{0.9144, []string{"yd", "yards"}} +var feet = distanceUnit{0.3048, []string{"ft", "feet"}} +var kilom = distanceUnit{1000, []string{"km", "kilometers"}} +var nauticalm = distanceUnit{1852.0, []string{"nm", "nauticalmiles"}} +var millim = distanceUnit{0.001, []string{"mm", "millimeters"}} +var centim = distanceUnit{0.01, []string{"cm", "centimeters"}} +var miles = distanceUnit{1609.344, []string{"mi", "miles"}} +var meters = distanceUnit{1, []string{"m", "meters"}} + +var distanceUnits = []*distanceUnit{ + &inch, &yard, &feet, &kilom, &nauticalm, &millim, ¢im, &miles, &meters, +} + +// ParseDistance attempts to parse a distance, return distance in meters +func ParseDistance(d string) (float64, error) { + for _, unit := range distanceUnits { + for _, unitSuffix := range unit.suffixes { + if strings.HasSuffix(d, unitSuffix) { + parsedNum, err := strconv.ParseFloat(d[0:len(d)-len(unitSuffix)], 64) + if err != nil { + return 0, err + } + return parsedNum * unit.conv, nil + } + } + } + // no unit matched, try assuming meters? + parsedNum, err := strconv.ParseFloat(d, 64) + if err != nil { + return 0, err + } + return parsedNum, nil +} + +func Haversin(lon1, lat1, lon2, lat2 float64) float64 { + x1 := lat1 * degreesToRadian + x2 := lat2 * degreesToRadian + h1 := 1 - cos(x1-x2) + h2 := 1 - cos((lon1-lon2)*degreesToRadian) + h := (h1 + cos(x1)*cos(x2)*h2) / 2 + avgLat := (x1 + x2) / 2 + diameter := earthDiameter(avgLat) + + return diameter * asin(math.Min(1, math.Sqrt(h))) +} diff --git a/geo/geo_dist_test.go b/geo/geo_dist_test.go new file mode 100644 index 00000000..2ed57ac1 --- /dev/null +++ b/geo/geo_dist_test.go @@ -0,0 +1,81 @@ +package geo + +import ( + "math" + "reflect" + "strconv" + "testing" +) + +func TestParseDistance(t *testing.T) { + tests := []struct { + dist string + want float64 + wantErr error + }{ + {"5mi", 5 * 1609.344, nil}, + {"3", 3, nil}, + {"3m", 3, nil}, + {"5km", 5000, nil}, + {"km", 0, &strconv.NumError{Func: "ParseFloat", Num: "", Err: strconv.ErrSyntax}}, + {"", 0, &strconv.NumError{Func: "ParseFloat", Num: "", Err: strconv.ErrSyntax}}, + } + + for _, test := range tests { + got, err := ParseDistance(test.dist) + if !reflect.DeepEqual(err, test.wantErr) { + t.Errorf("expected err: %v, got %v for %s", test.wantErr, err, test.dist) + } + if got != test.want { + t.Errorf("expected distance %f got %f for %s", test.want, got, test.dist) + } + } +} + +func TestHaversinDistance(t *testing.T) { + earthRadiusKMs := 6378.137 + halfCircle := earthRadiusKMs * math.Pi + + tests := []struct { + lon1 float64 + lat1 float64 + lon2 float64 + lat2 float64 + want float64 + }{ + {1, 1, math.NaN(), 1, math.NaN()}, + {1, 1, 1, math.NaN(), math.NaN()}, + {1, math.NaN(), 1, 1, math.NaN()}, + {math.NaN(), 1, 1, 1, math.NaN()}, + + {0, 0, 0, 0, 0}, + {-180, 0, -180, 0, 0}, + {-180, 0, 180, 0, 0}, + {180, 0, 180, 0, 0}, + + {0, 90, 0, 90, 0}, + {-180, 90, -180, 90, 0}, + {-180, 90, 180, 90, 0}, + {180, 90, 180, 90, 0}, + + {0, 0, 180, 0, halfCircle}, + + {-74.0059731, 40.7143528, -74.0059731, 40.7143528, 0}, + {-74.0059731, 40.7143528, -73.9844722, 40.759011, 5.286}, + {-74.0059731, 40.7143528, -74.007819, 40.718266, 0.4621}, + {-74.0059731, 40.7143528, -74.0088305, 40.7051157, 1.055}, + {-74.0059731, 40.7143528, -74, 40.7247222, 1.258}, + {-74.0059731, 40.7143528, -73.9962255, 40.731033, 2.029}, + {-74.0059731, 40.7143528, -73.95, 40.65, 8.572}, + } + + for _, test := range tests { + got := Haversin(test.lon1, test.lat1, test.lon2, test.lat2) + if math.IsNaN(test.want) && !math.IsNaN(got) { + t.Errorf("expected NaN, got %f", got) + } + if !math.IsNaN(test.want) && math.Abs(got-test.want) > 1E-2 { + t.Errorf("expected %f got %f", test.want, got) + } + } +} diff --git a/geo/geo_test.go b/geo/geo_test.go new file mode 100644 index 00000000..2981dedb --- /dev/null +++ b/geo/geo_test.go @@ -0,0 +1,81 @@ +package geo + +import ( + "math" + "testing" +) + +func TestMortonHashMortonUnhash(t *testing.T) { + tests := []struct { + lon float64 + lat float64 + }{ + {-180.0, -90.0}, + {-5, 27.3}, + {0, 0}, + {1.0, 1.0}, + {24.7, -80.4}, + {180.0, 90.0}, + } + + for _, test := range tests { + hash := MortonHash(test.lon, test.lat) + lon := MortonUnhashLon(hash) + lat := MortonUnhashLat(hash) + if compareGeo(test.lon, lon) != 0 { + t.Errorf("expected lon %f, got %f, hash %x", test.lon, lon, hash) + } + if compareGeo(test.lat, lat) != 0 { + t.Errorf("expected lat %f, got %f, hash %x", test.lat, lat, hash) + } + } +} + +func TestScaleLonUnscaleLon(t *testing.T) { + tests := []struct { + lon float64 + }{ + {-180.0}, + {0.0}, + {1.0}, + {180.0}, + } + + for _, test := range tests { + s := scaleLon(test.lon) + lon := unscaleLon(s) + if compareGeo(test.lon, lon) != 0 { + t.Errorf("expected %f, got %f, scaled was %d", test.lon, lon, s) + } + } +} + +func TestScaleLatUnscaleLat(t *testing.T) { + tests := []struct { + lat float64 + }{ + {-90.0}, + {0.0}, + {1.0}, + {90.0}, + } + + for _, test := range tests { + s := scaleLat(test.lat) + lat := unscaleLat(s) + if compareGeo(test.lat, lat) != 0 { + t.Errorf("expected %.16f, got %.16f, scaled was %d", test.lat, lat, s) + } + } +} + +func TestComputeBoundingBoxCheckLatitudeAtEquator(t *testing.T) { + // at the equator 1 degree of latitude is about 110567 meters + _, upperLeftLat, _, lowerRightLat := ComputeBoundingBox(0, 0, 110567) + if math.Abs(upperLeftLat-1) > 1E-4 { + t.Errorf("expected bounding box upper left lat to be almost 1, got %f", upperLeftLat) + } + if math.Abs(lowerRightLat+1) > 1E-4 { + t.Errorf("expected bounding box lower right lat to be almost -1, got %f", lowerRightLat) + } +} diff --git a/geo/parse.go b/geo/parse.go new file mode 100644 index 00000000..d0585abf --- /dev/null +++ b/geo/parse.go @@ -0,0 +1,93 @@ +package geo + +import ( + "reflect" + "strings" +) + +// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to +// interpret it is as geo point +func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { + var foundLon, foundLat bool + // is it a map + if l, ok := thing.(map[string]interface{}); ok { + if lval, ok := l["lon"]; ok { + lon, foundLon = extractNumericVal(lval) + } else if lval, ok := l["lng"]; ok { + lon, foundLon = extractNumericVal(lval) + } + if lval, ok := l["lat"]; ok { + lat, foundLat = extractNumericVal(lval) + } + return lon, lat, foundLon && foundLat + } + + // now try reflection on struct fields + thingVal := reflect.ValueOf(thing) + thingTyp := thingVal.Type() + if thingVal.IsValid() && thingVal.Kind() == reflect.Struct { + for i := 0; i < thingVal.NumField(); i++ { + field := thingTyp.Field(i) + fieldName := field.Name + if strings.HasPrefix(strings.ToLower(fieldName), "lon") { + if thingVal.Field(i).CanInterface() { + fieldVal := thingVal.Field(i).Interface() + lon, foundLon = extractNumericVal(fieldVal) + } + } + if strings.HasPrefix(strings.ToLower(fieldName), "lng") { + if thingVal.Field(i).CanInterface() { + fieldVal := thingVal.Field(i).Interface() + lon, foundLon = extractNumericVal(fieldVal) + } + } + if strings.HasPrefix(strings.ToLower(fieldName), "lat") { + if thingVal.Field(i).CanInterface() { + fieldVal := thingVal.Field(i).Interface() + lat, foundLat = extractNumericVal(fieldVal) + } + } + } + } + + // last hope, some interfaces + // lon + if l, ok := thing.(loner); ok { + lon = l.Lon() + foundLon = true + } else if l, ok := thing.(lnger); ok { + lon = l.Lng() + foundLon = true + } + // lat + if l, ok := thing.(later); ok { + lat = l.Lat() + foundLat = true + } + + return lon, lat, foundLon && foundLat +} + +// extract numeric value (if possible) and returna s float64 +func extractNumericVal(v interface{}) (float64, bool) { + switch v := v.(type) { + case float64: + return v, true + case float32: + return float64(v), true + } + return 0, false +} + +// various support interfaces which can be used to find lat/lon +type loner interface { + Lon() float64 +} + +type later interface { + Lat() float64 +} + +type lnger interface { + Lng() float64 +} diff --git a/geo/sloppy.go b/geo/sloppy.go new file mode 100644 index 00000000..5a2ba6bc --- /dev/null +++ b/geo/sloppy.go @@ -0,0 +1,185 @@ +package geo + +import "math" + +var earthDiameterPerLatitude []float64 +var sinTab []float64 +var cosTab []float64 +var asinTab []float64 +var asinDer1DivF1Tab []float64 +var asinDer2DivF2Tab []float64 +var asinDer3DivF3Tab []float64 +var asinDer4DivF4Tab []float64 + +const radiusTabsSize = (1 << 10) + 1 +const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1) +const radiusIndexer = 1 / radiusDelta +const sinCosTabsSize = (1 << 11) + 1 +const asinTabsSize = (1 << 13) + 1 +const oneDivF2 = 1 / 2.0 +const oneDivF3 = 1 / 6.0 +const oneDivF4 = 1 / 24.0 + +// 1.57079632673412561417e+00 first 33 bits of pi/2 +var pio2Hi = math.Float64frombits(0x3FF921FB54400000) + +// 6.07710050650619224932e-11 pi/2 - PIO2_HI +var pio2Lo = math.Float64frombits(0x3DD0B4611A626331) + +var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00 +var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17 +var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01 +var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01 +var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01 +var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02 +var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04 +var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05 +var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00 +var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00 +var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01 +var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02 + +var twoPiHi = 4 * pio2Hi +var twoPiLo = 4 * pio2Lo +var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1 +var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1 +var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo) +var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99 +var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian) + +var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1) +var asinIndexer = 1 / asinDelta + +func init() { + + // sin and cos + sinTab = make([]float64, sinCosTabsSize) + cosTab = make([]float64, sinCosTabsSize) + sinCosPiIndex := (sinCosTabsSize - 1) / 2 + sinCosPiMul2Index := 2 * sinCosPiIndex + sinCosPiMul05Index := sinCosPiIndex / 2 + sinCosPiMul15Index := 3 * sinCosPiIndex / 2 + for i := 0; i < sinCosTabsSize; i++ { + // angle: in [0,2*PI]. + angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo + sinAngle := math.Sin(angle) + cosAngle := math.Cos(angle) + // For indexes corresponding to null cosine or sine, we make sure the value is zero + // and not an epsilon. This allows for a much better accuracy for results close to zero. + if i == sinCosPiIndex { + sinAngle = 0.0 + } else if i == sinCosPiMul2Index { + sinAngle = 0.0 + } else if i == sinCosPiMul05Index { + sinAngle = 0.0 + } else if i == sinCosPiMul15Index { + sinAngle = 0.0 + } + sinTab[i] = sinAngle + cosTab[i] = cosAngle + } + + // asin + asinTab = make([]float64, asinTabsSize) + asinDer1DivF1Tab = make([]float64, asinTabsSize) + asinDer2DivF2Tab = make([]float64, asinTabsSize) + asinDer3DivF3Tab = make([]float64, asinTabsSize) + asinDer4DivF4Tab = make([]float64, asinTabsSize) + for i := 0; i < asinTabsSize; i++ { + // x: in [0,ASIN_MAX_VALUE_FOR_TABS]. + x := float64(i) * asinDelta + asinTab[i] = math.Asin(x) + oneMinusXSqInv := 1.0 / (1 - x*x) + oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv) + oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv + oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv + oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv + asinDer1DivF1Tab[i] = oneMinusXSqInv05 + asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2 + asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3 + asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4 + } + + // earth radius + a := 6378137.0 + b := 6356752.31420 + a2 := a * a + b2 := b * b + earthDiameterPerLatitude = make([]float64, radiusTabsSize) + earthDiameterPerLatitude[0] = 2.0 * a / 1000 + earthDiameterPerLatitude[radiusTabsSize-1] = 2.0 * b / 1000 + for i := 1; i < radiusTabsSize-1; i++ { + lat := math.Pi * float64(i) / (2*radiusTabsSize - 1) + one := math.Pow(a2*math.Cos(lat), 2) + two := math.Pow(b2*math.Sin(lat), 2) + three := math.Pow(float64(a)*math.Cos(lat), 2) + four := math.Pow(b*math.Sin(lat), 2) + radius := math.Sqrt((one + two) / (three + four)) + earthDiameterPerLatitude[i] = 2 * radius / 1000 + } +} + +func earthDiameter(lat float64) float64 { + index := math.Mod(math.Abs(lat)*radiusIndexer+0.5, float64(len(earthDiameterPerLatitude))) + if math.IsNaN(index) { + return 0 + } + return earthDiameterPerLatitude[int(index)] +} + +func cos(a float64) float64 { + if a < 0.0 { + a = -a + } + if a > sinCosMaxValueForIntModulo { + return math.Cos(a) + } + // index: possibly outside tables range. + index := int(a*sinCosIndexer + 0.5) + delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo + // Making sure index is within tables range. + // Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo. + index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1) + indexCos := cosTab[index] + indexSin := sinTab[index] + return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4))) +} + +func asin(a float64) float64 { + var negateResult bool + if a < 0 { + a = -a + negateResult = true + } + if a <= asinMaxValueForTabs { + index := int(a*asinIndexer + 0.5) + delta := a - float64(index)*asinDelta + result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index]))) + if negateResult { + return -result + } + return result + } + // value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN + // This part is derived from fdlibm. + if a < 1 { + t := (1.0 - a) * 0.5 + p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5))))) + q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4))) + s := math.Sqrt(t) + z := s + s*(p/q) + result := asinPio2Hi - ((z + z) - asinPio2Lo) + if negateResult { + return -result + } + return result + } + // value >= 1.0, or value is NaN + if a == 1.0 { + if negateResult { + return -math.Pi / 2 + } + return math.Pi / 2 + } + return math.NaN() +} diff --git a/geo/sloppy_test.go b/geo/sloppy_test.go new file mode 100644 index 00000000..4ad4ff3b --- /dev/null +++ b/geo/sloppy_test.go @@ -0,0 +1,73 @@ +package geo + +import ( + "math" + "testing" +) + +func TestCos(t *testing.T) { + + cosDelta := 1E-15 + + tests := []struct { + in float64 + want float64 + }{ + {math.NaN(), math.NaN()}, + {math.Inf(-1), math.NaN()}, + {math.Inf(1), math.NaN()}, + {1, math.Cos(1)}, + {0, math.Cos(0)}, + {math.Pi / 2, math.Cos(math.Pi / 2)}, + {-math.Pi / 2, math.Cos(-math.Pi / 2)}, + {math.Pi / 4, math.Cos(math.Pi / 4)}, + {-math.Pi / 4, math.Cos(-math.Pi / 4)}, + {math.Pi * 2 / 3, math.Cos(math.Pi * 2 / 3)}, + {-math.Pi * -2 / 3, math.Cos(-math.Pi * -2 / 3)}, + {math.Pi / 6, math.Cos(math.Pi / 6)}, + {-math.Pi / 6, math.Cos(-math.Pi / 6)}, + } + + for _, test := range tests { + got := cos(test.in) + if math.IsNaN(test.want) && !math.IsNaN(got) { + t.Errorf("wanted NaN, got %f for cos(%f)", got, test.in) + } + if !math.IsNaN(test.want) && math.Abs(got-test.want) > cosDelta { + t.Errorf("wanted: %f, got %f for cos(%f) diff %f", test.want, got, test.in, math.Abs(got-test.want)) + } + } +} + +func TestAsin(t *testing.T) { + + asinDelta := 1E-7 + + tests := []struct { + in float64 + want float64 + }{ + {math.NaN(), math.NaN()}, + {2, math.NaN()}, + {-2, math.NaN()}, + {-1, -math.Pi / 2}, + {-0.8660254, -math.Pi / 3}, + {-0.7071068, -math.Pi / 4}, + {-0.5, -math.Pi / 6}, + {0, 0}, + {0.5, math.Pi / 6}, + {0.7071068, math.Pi / 4}, + {0.8660254, math.Pi / 3}, + {1, math.Pi / 2}, + } + + for _, test := range tests { + got := asin(test.in) + if math.IsNaN(test.want) && !math.IsNaN(got) { + t.Errorf("wanted NaN, got %f for asin(%f)", got, test.in) + } + if !math.IsNaN(test.want) && math.Abs(got-test.want) > asinDelta { + t.Errorf("wanted: %f, got %f for asin(%f) diff %f", test.want, got, test.in, math.Abs(got-test.want)) + } + } +} diff --git a/mapping.go b/mapping.go index 2564ea66..76238dc1 100644 --- a/mapping.go +++ b/mapping.go @@ -59,3 +59,7 @@ func NewDateTimeFieldMapping() *mapping.FieldMapping { func NewBooleanFieldMapping() *mapping.FieldMapping { return mapping.NewBooleanFieldMapping() } + +func NewGeoPointFieldMapping() *mapping.FieldMapping { + return mapping.NewGeoPointFieldMapping() +} diff --git a/mapping/document.go b/mapping/document.go index 54bf61b6..9bdb8596 100644 --- a/mapping/document.go +++ b/mapping/document.go @@ -75,7 +75,7 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error { } } switch field.Type { - case "text", "datetime", "number", "boolean": + case "text", "datetime", "number", "boolean", "geopoint": default: return fmt.Errorf("unknown field type: '%s'", field.Type) } @@ -482,8 +482,24 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, fieldMapping.processTime(property, pathString, path, indexes, context) } default: + if subDocMapping != nil { + for _, fieldMapping := range subDocMapping.Fields { + if fieldMapping.Type == "geopoint" { + fieldMapping.processGeoPoint(property, pathString, path, indexes, context) + } + } + } dm.walkDocument(property, path, indexes, context) } + case reflect.Map: + if subDocMapping != nil { + for _, fieldMapping := range subDocMapping.Fields { + if fieldMapping.Type == "geopoint" { + fieldMapping.processGeoPoint(property, pathString, path, indexes, context) + } + } + } + dm.walkDocument(property, path, indexes, context) default: dm.walkDocument(property, path, indexes, context) } diff --git a/mapping/field.go b/mapping/field.go index 0976cd6f..9f1928ca 100644 --- a/mapping/field.go +++ b/mapping/field.go @@ -21,6 +21,7 @@ import ( "github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/geo" ) // control the default behavior for dynamic fields (those not explicitly mapped) @@ -124,6 +125,16 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { return rv } +// NewGeoPointFieldMapping returns a default field mapping for geo points +func NewGeoPointFieldMapping() *FieldMapping { + return &FieldMapping{ + Type: "geopoint", + Store: true, + Index: true, + IncludeInAll: true, + } +} + // Options returns the indexing options for this field. func (fm *FieldMapping) Options() document.IndexingOptions { var rv document.IndexingOptions @@ -208,6 +219,20 @@ func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string } } +func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) { + lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint) + if found { + fieldName := getFieldName(pathString, path, fm) + options := fm.Options() + field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options) + context.doc.AddField(field) + + if !fm.IncludeInAll { + context.excludedFromAll = append(context.excludedFromAll, fieldName) + } + } +} + func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { analyzerName := fm.Analyzer if analyzerName == "" { diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go index 096219ca..309c0e1d 100644 --- a/mapping/mapping_test.go +++ b/mapping/mapping_test.go @@ -23,6 +23,7 @@ import ( "github.com/blevesearch/bleve/analysis/tokenizer/exception" "github.com/blevesearch/bleve/analysis/tokenizer/regexp" "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/numeric" ) var mappingSource = []byte(`{ @@ -845,3 +846,57 @@ func TestMappingPrimitives(t *testing.T) { } } } + +func TestMappingForGeo(t *testing.T) { + + type Location struct { + Lat float64 + Lon float64 + } + + nameFieldMapping := NewTextFieldMapping() + nameFieldMapping.Name = "name" + nameFieldMapping.Analyzer = "standard" + + locFieldMapping := NewGeoPointFieldMapping() + + thingMapping := NewDocumentMapping() + thingMapping.AddFieldMappingsAt("name", nameFieldMapping) + thingMapping.AddFieldMappingsAt("location", locFieldMapping) + + mapping := NewIndexMapping() + mapping.DefaultMapping = thingMapping + + x := struct { + Name string `json:"name"` + Location *Location `json:"location"` + }{ + Name: "marty", + Location: &Location{ + Lon: -180, + Lat: -90, + }, + } + + doc := document.NewDocument("1") + err := mapping.MapDocument(doc, x) + if err != nil { + t.Fatal(err) + } + + var foundGeo bool + for _, f := range doc.Fields { + if f.Name() == "location" { + foundGeo = true + got := f.Value() + expect := []byte(numeric.MustNewPrefixCodedInt64(0, 0)) + if !reflect.DeepEqual(got, expect) { + t.Errorf("expected geo value: %v, got %v", expect, got) + } + } + } + + if !foundGeo { + t.Errorf("expected to find geo point, did not") + } +} diff --git a/numeric/bin.go b/numeric/bin.go new file mode 100644 index 00000000..cd71392d --- /dev/null +++ b/numeric/bin.go @@ -0,0 +1,43 @@ +package numeric + +var interleaveMagic = []uint64{ + 0x5555555555555555, + 0x3333333333333333, + 0x0F0F0F0F0F0F0F0F, + 0x00FF00FF00FF00FF, + 0x0000FFFF0000FFFF, + 0x00000000FFFFFFFF, + 0xAAAAAAAAAAAAAAAA, +} + +var interleaveShift = []uint{1, 2, 4, 8, 16} + +// Interleave the first 32 bits of each uint64 +// apdated from org.apache.lucene.util.BitUtil +// whcih was adapted from: +// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN +func Interleave(v1, v2 uint64) uint64 { + v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4] + v1 = (v1 | (v1 << interleaveShift[3])) & interleaveMagic[3] + v1 = (v1 | (v1 << interleaveShift[2])) & interleaveMagic[2] + v1 = (v1 | (v1 << interleaveShift[1])) & interleaveMagic[1] + v1 = (v1 | (v1 << interleaveShift[0])) & interleaveMagic[0] + v2 = (v2 | (v2 << interleaveShift[4])) & interleaveMagic[4] + v2 = (v2 | (v2 << interleaveShift[3])) & interleaveMagic[3] + v2 = (v2 | (v2 << interleaveShift[2])) & interleaveMagic[2] + v2 = (v2 | (v2 << interleaveShift[1])) & interleaveMagic[1] + v2 = (v2 | (v2 << interleaveShift[0])) & interleaveMagic[0] + return (v2 << 1) | v1 +} + +// Deinterleave the 32-bit value starting at position 0 +// to get the other 32-bit value, shift it by 1 first +func Deinterleave(b uint64) uint64 { + b &= interleaveMagic[0] + b = (b ^ (b >> interleaveShift[0])) & interleaveMagic[1] + b = (b ^ (b >> interleaveShift[1])) & interleaveMagic[2] + b = (b ^ (b >> interleaveShift[2])) & interleaveMagic[3] + b = (b ^ (b >> interleaveShift[3])) & interleaveMagic[4] + b = (b ^ (b >> interleaveShift[4])) & interleaveMagic[5] + return b +} diff --git a/numeric/bin_test.go b/numeric/bin_test.go new file mode 100644 index 00000000..f6dfb472 --- /dev/null +++ b/numeric/bin_test.go @@ -0,0 +1,27 @@ +package numeric + +import "testing" + +func TestInterleaveDeinterleave(t *testing.T) { + tests := []struct { + v1 uint64 + v2 uint64 + }{ + {0, 0}, + {1, 1}, + {27, 39}, + {1<<32 - 1, 1<<32 - 1}, // largest that should still work + } + + for _, test := range tests { + i := Interleave(test.v1, test.v2) + gotv1 := Deinterleave(i) + gotv2 := Deinterleave(i >> 1) + if gotv1 != test.v1 { + t.Errorf("expected v1: %d, got %d, interleaved was %x", test.v1, gotv1, i) + } + if gotv2 != test.v2 { + t.Errorf("expected v2: %d, got %d, interleaved was %x", test.v2, gotv2, i) + } + } +} diff --git a/search/query/geo_boundingbox.go b/search/query/geo_boundingbox.go new file mode 100644 index 00000000..97807c04 --- /dev/null +++ b/search/query/geo_boundingbox.go @@ -0,0 +1,93 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/mapping" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/searcher" +) + +type GeoPoint struct { + Lon float64 `json:"lon,omitempty"` + Lat float64 `json:"lat,omitempty"` +} + +type GeoBoundingBoxQuery struct { + TopLeft *GeoPoint `json:"top_left,omitempty"` + BottomRight *GeoPoint `json:"bottom_right,omitempty"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` +} + +func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery { + return &GeoBoundingBoxQuery{ + TopLeft: &GeoPoint{ + Lon: topLeftLon, + Lat: topLeftLat, + }, + BottomRight: &GeoPoint{ + Lon: bottomRightLon, + Lat: bottomRightLat, + }, + } +} + +func (q *GeoBoundingBoxQuery) SetBoost(b float64) { + boost := Boost(b) + q.BoostVal = &boost +} + +func (q *GeoBoundingBoxQuery) Boost() float64 { + return q.BoostVal.Value() +} + +func (q *GeoBoundingBoxQuery) SetField(f string) { + q.FieldVal = f +} + +func (q *GeoBoundingBoxQuery) Field() string { + return q.FieldVal +} + +func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + field := q.FieldVal + if q.FieldVal == "" { + field = m.DefaultSearchField() + } + + if q.BottomRight.Lon < q.TopLeft.Lon { + // cross date line, rewrite as two parts + + leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight.Lat, q.BottomRight.Lon, q.TopLeft.Lat, field, q.BoostVal.Value(), options) + if err != nil { + return nil, err + } + rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft.Lon, q.BottomRight.Lat, 180, q.TopLeft.Lat, field, q.BoostVal.Value(), options) + if err != nil { + _ = leftSearcher.Close() + return nil, err + } + + return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options) + } + + return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft.Lon, q.BottomRight.Lat, q.BottomRight.Lon, q.TopLeft.Lat, field, q.BoostVal.Value(), options) +} + +func (q *GeoBoundingBoxQuery) Validate() error { + return nil +} diff --git a/search/query/geo_distance.go b/search/query/geo_distance.go new file mode 100644 index 00000000..ee8f902a --- /dev/null +++ b/search/query/geo_distance.go @@ -0,0 +1,75 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/mapping" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/searcher" +) + +type GeoDistanceQuery struct { + Location *GeoPoint `json:"location,omitempty"` + Distance string `json:"distance,omitempty"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` +} + +func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery { + return &GeoDistanceQuery{ + Location: &GeoPoint{ + Lon: lon, + Lat: lat, + }, + Distance: distance, + } +} + +func (q *GeoDistanceQuery) SetBoost(b float64) { + boost := Boost(b) + q.BoostVal = &boost +} + +func (q *GeoDistanceQuery) Boost() float64 { + return q.BoostVal.Value() +} + +func (q *GeoDistanceQuery) SetField(f string) { + q.FieldVal = f +} + +func (q *GeoDistanceQuery) Field() string { + return q.FieldVal +} + +func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + field := q.FieldVal + if q.FieldVal == "" { + field = m.DefaultSearchField() + } + + dist, err := geo.ParseDistance(q.Distance) + if err != nil { + return nil, err + } + + return searcher.NewGeoPointDistanceSearcher(i, q.Location.Lon, q.Location.Lat, dist, field, q.BoostVal.Value(), options) +} + +func (q *GeoDistanceQuery) Validate() error { + return nil +} diff --git a/search/query/query.go b/search/query/query.go index 0ac12515..efac7316 100644 --- a/search/query/query.go +++ b/search/query/query.go @@ -244,6 +244,25 @@ func ParseQuery(input []byte) (Query, error) { } return &rv, nil } + _, hasTopLeft := tmp["top_left"] + _, hasBottomRight := tmp["bottom_right"] + if hasTopLeft && hasBottomRight { + var rv GeoBoundingBoxQuery + err := json.Unmarshal(input, &rv) + if err != nil { + return nil, err + } + return &rv, nil + } + _, hasDistance := tmp["distance"] + if hasDistance { + var rv GeoDistanceQuery + err := json.Unmarshal(input, &rv) + if err != nil { + return nil, err + } + return &rv, nil + } return nil, fmt.Errorf("unknown query type") } diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go new file mode 100644 index 00000000..219f2ee7 --- /dev/null +++ b/search/searcher/search_filter.go @@ -0,0 +1,88 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/search" +) + +// FilterFunc defines a function which can filter documents +// returning true means keep the document +// returning false means do not keep the document +type FilterFunc func(d *search.DocumentMatch) bool + +// FilteringSearcher wraps any other searcher, but checks any Next/Advance +// call against the supplied FilterFunc +type FilteringSearcher struct { + child search.Searcher + accept FilterFunc +} + +func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher { + return &FilteringSearcher{ + child: s, + accept: filter, + } +} + +func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + next, err := f.child.Next(ctx) + for next != nil && err == nil { + if f.accept(next) { + return next, nil + } + next, err = f.child.Next(ctx) + } + return nil, err +} + +func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + adv, err := f.child.Advance(ctx, ID) + if err != nil { + return nil, err + } + if adv == nil { + return nil, nil + } + if f.accept(adv) { + return adv, nil + } + return f.Next(ctx) +} + +func (f *FilteringSearcher) Close() error { + return f.child.Close() +} + +func (f *FilteringSearcher) Weight() float64 { + return f.child.Weight() +} + +func (f *FilteringSearcher) SetQueryNorm(n float64) { + f.child.SetQueryNorm(n) +} + +func (f *FilteringSearcher) Count() uint64 { + return f.child.Count() +} + +func (f *FilteringSearcher) Min() int { + return f.child.Min() +} + +func (f *FilteringSearcher) DocumentMatchPoolSize() int { + return f.child.DocumentMatchPoolSize() +} diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go new file mode 100644 index 00000000..8d3d6249 --- /dev/null +++ b/search/searcher/search_geoboundingbox.go @@ -0,0 +1,224 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "bytes" + + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/numeric" + "github.com/blevesearch/bleve/search" +) + +type GeoBoundingBoxSearcher struct { + indexReader index.IndexReader + field string + minLon float64 + minLat float64 + maxLon float64 + maxLat float64 + options search.SearcherOptions + + rangeBounds []*geoRange + + searcher *DisjunctionSearcher +} + +func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, maxLon, maxLat float64, field string, boost float64, options search.SearcherOptions) (*GeoBoundingBoxSearcher, error) { + rv := &GeoBoundingBoxSearcher{ + indexReader: indexReader, + minLon: minLon, + minLat: minLat, + maxLon: maxLon, + maxLat: maxLat, + field: field, + options: options, + } + rv.computeRange(0, (geo.GeoBits<<1)-1) + + var termsOnBoundary []search.Searcher + var termsNotOnBoundary []search.Searcher + for _, r := range rv.rangeBounds { + ts, err := NewTermSearcher(indexReader, string(r.cell), field, 1.0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + return nil, err + } + if r.boundary { + termsOnBoundary = append(termsOnBoundary, ts) + } else { + termsNotOnBoundary = append(termsNotOnBoundary, ts) + } + } + onBoundarySearcher, err := NewDisjunctionSearcher(indexReader, termsOnBoundary, 0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + return nil, err + } + filterOnBoundarySearcher := NewFilteringSearcher(onBoundarySearcher, func(d *search.DocumentMatch) bool { + var lon, lat float64 + var found bool + err = indexReader.DocumentVisitFieldTerms(d.IndexInternalID, []string{field}, func(field string, term []byte) { + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + var shift uint + shift, err = prefixCoded.Shift() + if err == nil && shift == 0 { + var i64 int64 + i64, err = prefixCoded.Int64() + if err == nil { + lon = geo.MortonUnhashLon(uint64(i64)) + lat = geo.MortonUnhashLat(uint64(i64)) + found = true + } + } + }) + if err == nil && found { + return geo.BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat) + } + return false + }) + notOnBoundarySearcher, err := NewDisjunctionSearcher(indexReader, termsNotOnBoundary, 0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + _ = filterOnBoundarySearcher.Close() + return nil, err + } + + rv.searcher, err = NewDisjunctionSearcher(indexReader, []search.Searcher{filterOnBoundarySearcher, notOnBoundarySearcher}, 0, options) + if err != nil { + for _, s := range termsOnBoundary { + _ = s.Close() + } + for _, s := range termsNotOnBoundary { + _ = s.Close() + } + _ = filterOnBoundarySearcher.Close() + _ = notOnBoundarySearcher.Close() + return nil, err + } + return rv, nil +} + +func (s *GeoBoundingBoxSearcher) Count() uint64 { + return s.searcher.Count() +} + +func (s *GeoBoundingBoxSearcher) Weight() float64 { + return s.searcher.Weight() +} + +func (s *GeoBoundingBoxSearcher) SetQueryNorm(qnorm float64) { + s.searcher.SetQueryNorm(qnorm) +} + +func (s *GeoBoundingBoxSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + return s.searcher.Next(ctx) +} + +func (s *GeoBoundingBoxSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + return s.searcher.Advance(ctx, ID) +} + +func (s *GeoBoundingBoxSearcher) Close() error { + return s.searcher.Close() +} + +func (s *GeoBoundingBoxSearcher) Min() int { + return 0 +} + +func (s *GeoBoundingBoxSearcher) DocumentMatchPoolSize() int { + return s.searcher.DocumentMatchPoolSize() +} + +var geoMaxShift = document.GeoPrecisionStep * 4 +var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 + +func (s *GeoBoundingBoxSearcher) computeRange(term uint64, shift uint) { + split := term | uint64(0x1)<> 1 + + within := res%document.GeoPrecisionStep == 0 && s.cellWithin(minLon, minLat, maxLon, maxLat) + if within || (level == geoDetailLevel && s.cellIntersectShape(minLon, minLat, maxLon, maxLat)) { + s.rangeBounds = append(s.rangeBounds, newGeoRange(start, res, level, !within)) + } else if level < geoDetailLevel && s.cellIntersectsMBR(minLon, minLat, maxLon, maxLat) { + s.computeRange(start, res-1) + } +} + +func (s *GeoBoundingBoxSearcher) cellWithin(minLon, minLat, maxLon, maxLat float64) bool { + return geo.RectWithin(minLon, minLat, maxLon, maxLat, s.minLon, s.minLat, s.maxLon, s.maxLat) +} + +func (s *GeoBoundingBoxSearcher) cellIntersectShape(minLon, minLat, maxLon, maxLat float64) bool { + return s.cellIntersectsMBR(minLon, minLat, maxLon, maxLat) +} + +func (s *GeoBoundingBoxSearcher) cellIntersectsMBR(minLon, minLat, maxLon, maxLat float64) bool { + return geo.RectIntersects(minLon, minLat, maxLon, maxLat, s.minLon, s.minLat, s.maxLon, s.maxLat) +} + +type geoRange struct { + cell []byte + level uint + boundary bool +} + +func newGeoRange(lower uint64, res uint, level uint, boundary bool) *geoRange { + return &geoRange{ + level: level, + boundary: boundary, + cell: numeric.MustNewPrefixCodedInt64(int64(lower), res), + } +} + +func (r *geoRange) Compare(other *geoRange) int { + return bytes.Compare(r.cell, other.cell) +} diff --git a/search/searcher/search_geoboundingbox_test.go b/search/searcher/search_geoboundingbox_test.go new file mode 100644 index 00000000..1839dec2 --- /dev/null +++ b/search/searcher/search_geoboundingbox_test.go @@ -0,0 +1,156 @@ +package searcher + +import ( + "log" + "testing" + + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/store/gtreap" + "github.com/blevesearch/bleve/index/upsidedown" + "github.com/blevesearch/bleve/search" +) + +func TestGeoBoundingBox(t *testing.T) { + i := setup(t) + indexReader, err := i.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err = indexReader.Close() + if err != nil { + t.Fatal(err) + } + }() + + gbs, err := NewGeoBoundingBoxSearcher(indexReader, 0.001, 0.001, 0.002, 0.002, "loc", 1.0, search.SearcherOptions{}) + if err != nil { + t.Fatal(err) + } + ctx := &search.SearchContext{ + DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0), + } + docMatch, err := gbs.Next(ctx) + for docMatch != nil && err == nil { + if docMatch == nil { + log.Printf("nil docmatch") + } else { + log.Printf("got doc match: %s", docMatch.IndexInternalID) + } + docMatch, err = gbs.Next(ctx) + } + if err != nil { + t.Fatal(err) + } +} + +func setup(t *testing.T) index.Index { + + analysisQueue := index.NewAnalysisQueue(1) + i, err := upsidedown.NewUpsideDownCouch( + gtreap.Name, + map[string]interface{}{ + "path": "", + }, + analysisQueue) + if err != nil { + t.Fatal(err) + } + err = i.Open() + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "a", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 0.0015, 0.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "b", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 1.0015, 1.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "c", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 2.0015, 2.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "d", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 3.0015, 3.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "e", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 4.0015, 4.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "f", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 5.0015, 5.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "g", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 6.0015, 6.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "h", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 7.0015, 7.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "i", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 8.0015, 8.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + err = i.Update(&document.Document{ + ID: "j", + Fields: []document.Field{ + document.NewGeoPointField("loc", []uint64{}, 9.0015, 9.0015), + }, + }) + if err != nil { + t.Fatal(err) + } + + return i +} diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go new file mode 100644 index 00000000..adb83dc8 --- /dev/null +++ b/search/searcher/search_geopointdistance.go @@ -0,0 +1,139 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/numeric" + "github.com/blevesearch/bleve/search" +) + +type GeoPointDistanceSearcher struct { + indexReader index.IndexReader + field string + + centerLon float64 + centerLat float64 + dist float64 + + options search.SearcherOptions + + searcher *FilteringSearcher +} + +func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, centerLat, dist float64, field string, boost float64, options search.SearcherOptions) (*GeoPointDistanceSearcher, error) { + rv := &GeoPointDistanceSearcher{ + indexReader: indexReader, + centerLon: centerLon, + centerLat: centerLat, + dist: dist, + field: field, + options: options, + } + + // compute bounding box containing the circle + topLeftLon, topLeftLat, bottomRightLon, bottomRightLat := geo.ComputeBoundingBox(centerLon, centerLat, dist) + + var boxSearcher search.Searcher + if bottomRightLon < topLeftLon { + // cross date line, rewrite as two parts + + leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, -180, bottomRightLat, bottomRightLon, topLeftLat, field, boost, options) + if err != nil { + return nil, err + } + rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options) + if err != nil { + _ = leftSearcher.Close() + return nil, err + } + + boxSearcher, err = NewDisjunctionSearcher(indexReader, []search.Searcher{leftSearcher, rightSearcher}, 0, options) + if err != nil { + _ = leftSearcher.Close() + _ = rightSearcher.Close() + return nil, err + } + } else { + + // build geoboundinggox searcher for that bounding box + var err error + boxSearcher, err = NewGeoBoundingBoxSearcher(indexReader, topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, options) + if err != nil { + return nil, err + } + } + + // wrap it in a filtering searcher which checks the actual distance + rv.searcher = NewFilteringSearcher(boxSearcher, func(d *search.DocumentMatch) bool { + var lon, lat float64 + var found bool + err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID, []string{field}, func(field string, term []byte) { + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + shift, err := prefixCoded.Shift() + if err == nil && shift == 0 { + i64, err := prefixCoded.Int64() + if err == nil { + lon = geo.MortonUnhashLon(uint64(i64)) + lat = geo.MortonUnhashLat(uint64(i64)) + found = true + } + } + }) + if err == nil && found { + dist := geo.Haversin(lon, lat, rv.centerLon, rv.centerLat) + if dist <= rv.dist/1000 { + return true + } + } + return false + }) + + return rv, nil +} + +func (s *GeoPointDistanceSearcher) Count() uint64 { + return s.searcher.Count() +} + +func (s *GeoPointDistanceSearcher) Weight() float64 { + return s.searcher.Weight() +} + +func (s *GeoPointDistanceSearcher) SetQueryNorm(qnorm float64) { + s.searcher.SetQueryNorm(qnorm) +} + +func (s *GeoPointDistanceSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + return s.searcher.Next(ctx) +} + +func (s *GeoPointDistanceSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + return s.searcher.Advance(ctx, ID) +} + +func (s *GeoPointDistanceSearcher) Close() error { + return s.searcher.Close() +} + +func (s *GeoPointDistanceSearcher) Min() int { + return 0 +} + +func (s *GeoPointDistanceSearcher) DocumentMatchPoolSize() int { + return s.searcher.DocumentMatchPoolSize() +} diff --git a/search/sort.go b/search/sort.go index 70d4fbaa..751eec9d 100644 --- a/search/sort.go +++ b/search/sort.go @@ -17,9 +17,11 @@ package search import ( "encoding/json" "fmt" + "math" "sort" "strings" + "github.com/blevesearch/bleve/geo" "github.com/blevesearch/bleve/numeric" ) @@ -51,6 +53,21 @@ func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) { return &SortScore{ Desc: descending, }, nil + case "geo_distance": + field, ok := input["field"].(string) + if !ok { + return nil, fmt.Errorf("search sort mode geo_distance must specify field") + } + lon, lat, foundLocation := geo.ExtractGeoPoint(input["location"]) + if !foundLocation { + return nil, fmt.Errorf("unable to parse geo_distance location") + } + return &SortGeoDistance{ + Field: field, + Desc: descending, + lon: lon, + lat: lat, + }, nil case "field": field, ok := input["field"].(string) if !ok { @@ -386,7 +403,7 @@ func (s *SortField) filterTermsByType(terms []string) []string { for _, term := range terms { valid, shift := numeric.ValidPrefixCodedTerm(term) if valid && shift == 0 { - termsWithShiftZero = append(termsWithShiftZero) + termsWithShiftZero = append(termsWithShiftZero, term) } } terms = termsWithShiftZero @@ -521,3 +538,99 @@ func (s *SortScore) MarshalJSON() ([]byte, error) { } return json.Marshal("_score") } + +var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) + +// SortGeoDistance will sort results by the distance of an +// indexed geo point, from the provided location. +// Field is the name of the field +// Descending reverse the sort order (default false) +type SortGeoDistance struct { + Field string + Desc bool + values []string + lon float64 + lat float64 +} + +// UpdateVisitor notifies this sort field that in this document +// this field has the specified term +func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) { + if field == s.Field { + s.values = append(s.values, string(term)) + } +} + +// Value returns the sort value of the DocumentMatch +// it also resets the state of this SortField for +// processing the next document +func (s *SortGeoDistance) Value(i *DocumentMatch) string { + iTerms := s.filterTermsByType(s.values) + iTerm := s.filterTermsByMode(iTerms) + s.values = nil + + if iTerm == "" { + return maxDistance + } + + i64, err := numeric.PrefixCoded(iTerm).Int64() + if err != nil { + return maxDistance + } + docLon := geo.MortonUnhashLon(uint64(i64)) + docLat := geo.MortonUnhashLat(uint64(i64)) + + dist := geo.Haversin(s.lon, s.lat, docLon, docLat) + return string(numeric.MustNewPrefixCodedInt64(int64(dist), 0)) +} + +// Descending determines the order of the sort +func (s *SortGeoDistance) Descending() bool { + return s.Desc +} + +func (s *SortGeoDistance) filterTermsByMode(terms []string) string { + if len(terms) >= 1 { + return terms[0] + } + + return "" +} + +// filterTermsByType attempts to make one pass on the terms +// return only valid prefix coded numbers with shift of 0 +func (s *SortGeoDistance) filterTermsByType(terms []string) []string { + var termsWithShiftZero []string + for _, term := range terms { + valid, shift := numeric.ValidPrefixCodedTerm(term) + if valid && shift == 0 { + termsWithShiftZero = append(termsWithShiftZero, term) + } + } + return termsWithShiftZero +} + +// RequiresDocID says this SearchSort does not require the DocID be loaded +func (s *SortGeoDistance) RequiresDocID() bool { return false } + +// RequiresScoring says this SearchStore does not require scoring +func (s *SortGeoDistance) RequiresScoring() bool { return false } + +// RequiresFields says this SearchStore requires the specified stored field +func (s *SortGeoDistance) RequiresFields() []string { return []string{s.Field} } + +func (s *SortGeoDistance) MarshalJSON() ([]byte, error) { + sfm := map[string]interface{}{ + "by": "geo_distance", + "field": s.Field, + "location": map[string]interface{}{ + "lon": s.lon, + "lat": s.lat, + }, + } + if s.Desc { + sfm["desc"] = true + } + + return json.Marshal(sfm) +}