Browse Source

Merge pull request #1213 from sreekanth-cb/geohash_new

MB-34021 - geo query not returning few docs
Sreekanth Sivasankaran 4 months ago
parent
commit
e0de6a8c9c
5 changed files with 4310 additions and 150 deletions
  1. 43 0
      geo/benchmark_geohash_test.go
  2. 80 143
      geo/geohash.go
  3. 26 6
      geo/geohash_test.go
  4. 1 1
      geo/parse.go
  5. 4160 0
      geo/versus_test.go

+ 43 - 0
geo/benchmark_geohash_test.go

@@ -0,0 +1,43 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package geo
+
+import (
+	"testing"
+)
+
+func BenchmarkGeoHashLen5NewDecode(b *testing.B) {
+	b.ResetTimer()
+	hash := "d3hn3"
+	for i := 0; i < b.N; i++ {
+		_, _ = DecodeGeoHash(hash)
+	}
+}
+
+func BenchmarkGeoHashLen6NewDecode(b *testing.B) {
+	b.ResetTimer()
+	hash := "u4pruy"
+	for i := 0; i < b.N; i++ {
+		_, _ = DecodeGeoHash(hash)
+	}
+}
+
+func BenchmarkGeoHashLen7NewDecode(b *testing.B) {
+	b.ResetTimer()
+	hash := "u4pruyd"
+	for i := 0; i < b.N; i++ {
+		_, _ = DecodeGeoHash(hash)
+	}
+}

+ 80 - 143
geo/geohash.go

@@ -1,32 +1,21 @@
-// The code here was obtained from:
-//   https://github.com/mmcloughlin/geohash
-
-// The MIT License (MIT)
-// Copyright (c) 2015 Michael McLoughlin
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// This implementation is inspired from the geohash-js
+// ref: https://github.com/davetroy/geohash-js
 
 package geo
 
-import (
-	"math"
-)
-
 // encoding encapsulates an encoding defined by a given base32 alphabet.
 type encoding struct {
 	enc string
@@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding {
 	return e
 }
 
-// Decode string into bits of a 64-bit word. The string s may be at most 12
-// characters.
-func (e *encoding) decode(s string) uint64 {
-	x := uint64(0)
-	for i := 0; i < len(s); i++ {
-		x = (x << 5) | uint64(e.dec[s[i]])
-	}
-	return x
-}
-
-// Encode bits of 64-bit word into a string.
-func (e *encoding) encode(x uint64) string {
-	b := [12]byte{}
-	for i := 0; i < 12; i++ {
-		b[11-i] = e.enc[x&0x1f]
-		x >>= 5
-	}
-	return string(b[:])
-}
-
-// Base32Encoding with the Geohash alphabet.
+// base32encoding with the Geohash alphabet.
 var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
 
-// BoundingBox returns the region encoded by the given string geohash.
-func geoBoundingBox(hash string) geoBox {
-	bits := uint(5 * len(hash))
-	inthash := base32encoding.decode(hash)
-	return geoBoundingBoxIntWithPrecision(inthash, bits)
-}
-
-// Box represents a rectangle in latitude/longitude space.
-type geoBox struct {
-	minLat float64
-	maxLat float64
-	minLng float64
-	maxLng float64
-}
-
-// Round returns a point inside the box, making an effort to round to minimal
-// precision.
-func (b geoBox) round() (lat, lng float64) {
-	x := maxDecimalPower(b.maxLat - b.minLat)
-	lat = math.Ceil(b.minLat/x) * x
-	x = maxDecimalPower(b.maxLng - b.minLng)
-	lng = math.Ceil(b.minLng/x) * x
-	return
-}
-
-// precalculated for performance
-var exp232 = math.Exp2(32)
-
-// errorWithPrecision returns the error range in latitude and longitude for in
-// integer geohash with bits of precision.
-func errorWithPrecision(bits uint) (latErr, lngErr float64) {
-	b := int(bits)
-	latBits := b / 2
-	lngBits := b - latBits
-	latErr = math.Ldexp(180.0, -latBits)
-	lngErr = math.Ldexp(360.0, -lngBits)
-	return
-}
-
-// minDecimalPlaces returns the minimum number of decimal places such that
-// there must exist an number with that many places within any range of width
-// r. This is intended for returning minimal precision coordinates inside a
-// box.
-func maxDecimalPower(r float64) float64 {
-	m := int(math.Floor(math.Log10(r)))
-	return math.Pow10(m)
-}
-
-// Encode the position of x within the range -r to +r as a 32-bit integer.
-func encodeRange(x, r float64) uint32 {
-	p := (x + r) / (2 * r)
-	return uint32(p * exp232)
-}
-
-// Decode the 32-bit range encoding X back to a value in the range -r to +r.
-func decodeRange(X uint32, r float64) float64 {
-	p := float64(X) / exp232
-	x := 2*r*p - r
-	return x
-}
-
-// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
-// ignored, and may take any value.
-func squash(X uint64) uint32 {
-	X &= 0x5555555555555555
-	X = (X | (X >> 1)) & 0x3333333333333333
-	X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
-	X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
-	X = (X | (X >> 8)) & 0x0000ffff0000ffff
-	X = (X | (X >> 16)) & 0x00000000ffffffff
-	return uint32(X)
-}
+var masks = []uint64{16, 8, 4, 2, 1}
+
+// DecodeGeoHash decodes the string geohash faster with
+// higher precision. This api is in experimental phase.
+func DecodeGeoHash(geoHash string) (float64, float64) {
+	even := true
+	lat := []float64{-90.0, 90.0}
+	lon := []float64{-180.0, 180.0}
+
+	for i := 0; i < len(geoHash); i++ {
+		cd := uint64(base32encoding.dec[geoHash[i]])
+		for j := 0; j < 5; j++ {
+			if even {
+				if cd&masks[j] > 0 {
+					lon[0] = (lon[0] + lon[1]) / 2
+				} else {
+					lon[1] = (lon[0] + lon[1]) / 2
+				}
+			} else {
+				if cd&masks[j] > 0 {
+					lat[0] = (lat[0] + lat[1]) / 2
+				} else {
+					lat[1] = (lat[0] + lat[1]) / 2
+				}
+			}
+			even = !even
+		}
+	}
 
-// Deinterleave the bits of X into 32-bit words containing the even and odd
-// bitlevels of X, respectively.
-func deinterleave(X uint64) (uint32, uint32) {
-	return squash(X), squash(X >> 1)
+	return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
 }
 
-// BoundingBoxIntWithPrecision returns the region encoded by the integer
-// geohash with the specified precision.
-func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
-	fullHash := hash << (64 - bits)
-	latInt, lngInt := deinterleave(fullHash)
-	lat := decodeRange(latInt, 90)
-	lng := decodeRange(lngInt, 180)
-	latErr, lngErr := errorWithPrecision(bits)
-	return geoBox{
-		minLat: lat,
-		maxLat: lat + latErr,
-		minLng: lng,
-		maxLng: lng + lngErr,
+func EncodeGeoHash(lat, lon float64) string {
+	even := true
+	lats := []float64{-90.0, 90.0}
+	lons := []float64{-180.0, 180.0}
+	precision := 12
+	var ch, bit uint64
+	var geoHash string
+
+	for len(geoHash) < precision {
+		if even {
+			mid := (lons[0] + lons[1]) / 2
+			if lon > mid {
+				ch |= masks[bit]
+				lons[0] = mid
+			} else {
+				lons[1] = mid
+			}
+		} else {
+			mid := (lats[0] + lats[1]) / 2
+			if lat > mid {
+				ch |= masks[bit]
+				lats[0] = mid
+			} else {
+				lats[1] = mid
+			}
+		}
+		even = !even
+		if bit < 4 {
+			bit++
+		} else {
+			geoHash += string(base32encoding.enc[ch])
+			ch = 0
+			bit = 0
+		}
 	}
-}
-
-// ----------------------------------------------------------------------
 
-// Decode the string geohash to a (lat, lng) point.
-func GeoHashDecode(hash string) (lat, lng float64) {
-	box := geoBoundingBox(hash)
-	return box.round()
+	return geoHash
 }

+ 26 - 6
geo/geohash_test.go

@@ -15,24 +15,25 @@
 package geo
 
 import (
+	"strings"
 	"testing"
 )
 
-func TestGeoHash(t *testing.T) {
+func TestDecodeGeoHash(t *testing.T) {
 	tests := []struct {
 		hash string
 		lon  float64
 		lat  float64
 	}{
-		{"d3hn3", -73.080000, 6.730000},     // -73.05908203, 6.74560547 as per http://geohash.co/
-		{"u4pru", 10.380000, 57.620000},     // 10.39306641, 57.63427734
-		{"u4pruy", 10.410000, 57.646000},    // 10.40954590, 57.64801025
-		{"u4pruyd", 10.407000, 57.649000},   // 10.40748596, 57.64869690
+		{"d3hn3", -73.059082, 6.745605},     // -73.05908203, 6.74560547 as per http://geohash.co/
+		{"u4pru", 10.393066, 57.634277},     // 10.39306641, 57.63427734
+		{"u4pruy", 10.409546, 57.648010},    // 10.40954590, 57.64801025
+		{"u4pruyd", 10.407486, 57.648697},   // 10.40748596, 57.64869690
 		{"u4pruydqqvj", 10.40744, 57.64911}, // 10.40743969, 57.64911063
 	}
 
 	for _, test := range tests {
-		lat, lon := GeoHashDecode(test.hash)
+		lat, lon := DecodeGeoHash(test.hash)
 
 		if compareGeo(test.lon, lon) != 0 {
 			t.Errorf("expected lon %f, got %f, hash %s", test.lon, lon, test.hash)
@@ -42,3 +43,22 @@ func TestGeoHash(t *testing.T) {
 		}
 	}
 }
+
+func TestEncodeGeoHash(t *testing.T) {
+	tests := []struct {
+		lon  float64
+		lat  float64
+		hash string
+	}{
+		{2.29449034, 48.85841131, "u09tunquc"},
+		{76.491540, 10.060349, "t9y3hx7my0fp"},
+	}
+
+	for _, test := range tests {
+		hash := EncodeGeoHash(test.lat, test.lon)
+
+		if !strings.HasPrefix(hash, test.hash) {
+			t.Errorf("expected hash %s, got %s", test.hash, hash)
+		}
+	}
+}

+ 1 - 1
geo/parse.go

@@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 			}
 		} else {
 			// geohash
-			lat, lon = GeoHashDecode(geoStr)
+			lat, lon = DecodeGeoHash(geoStr)
 			foundLat = true
 			foundLon = true
 		}

File diff suppressed because it is too large
+ 4160 - 0
geo/versus_test.go