Browse Source

Add support for GeoSpatial points in string format

+ Following specification used in elastic search:
    https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-point.html
+ String formats allowed:
    - "lat,lon"
    - "  lat, lon   "
    - "geohash"
+ All the necessary code to decode geohashes is obtained from:
    - https://github.com/mmcloughlin/geohash
+ Also see:
    https://issues.couchbase.com/browse/MB-30542
abhinavdangeti 7 months ago
parent
commit
478805bc7f
4 changed files with 283 additions and 7 deletions
  1. 174 0
      geo/geohash.go
  2. 32 0
      geo/parse.go
  3. 5 1
      mapping/document.go
  4. 72 6
      mapping/mapping_test.go

+ 174 - 0
geo/geohash.go

@@ -0,0 +1,174 @@
+// The code here was obtained from:
+//   https://github.com/mmcloughlin/geohash
+
+// The MIT License (MIT)
+// Copyright (c) 2015 Michael McLoughlin
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+package geo
+
+import (
+	"math"
+)
+
+// encoding encapsulates an encoding defined by a given base32 alphabet.
+type encoding struct {
+	enc string
+	dec [256]byte
+}
+
+// newEncoding constructs a new encoding defined by the given alphabet,
+// which must be a 32-byte string.
+func newEncoding(encoder string) *encoding {
+	e := new(encoding)
+	e.enc = encoder
+	for i := 0; i < len(e.dec); i++ {
+		e.dec[i] = 0xff
+	}
+	for i := 0; i < len(encoder); i++ {
+		e.dec[encoder[i]] = byte(i)
+	}
+	return e
+}
+
+// Decode string into bits of a 64-bit word. The string s may be at most 12
+// characters.
+func (e *encoding) decode(s string) uint64 {
+	x := uint64(0)
+	for i := 0; i < len(s); i++ {
+		x = (x << 5) | uint64(e.dec[s[i]])
+	}
+	return x
+}
+
+// Encode bits of 64-bit word into a string.
+func (e *encoding) encode(x uint64) string {
+	b := [12]byte{}
+	for i := 0; i < 12; i++ {
+		b[11-i] = e.enc[x&0x1f]
+		x >>= 5
+	}
+	return string(b[:])
+}
+
+// Base32Encoding with the Geohash alphabet.
+var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
+
+// BoundingBox returns the region encoded by the given string geohash.
+func geoBoundingBox(hash string) geoBox {
+	bits := uint(5 * len(hash))
+	inthash := base32encoding.decode(hash)
+	return geoBoundingBoxIntWithPrecision(inthash, bits)
+}
+
+// Box represents a rectangle in latitude/longitude space.
+type geoBox struct {
+	minLat float64
+	maxLat float64
+	minLng float64
+	maxLng float64
+}
+
+// Round returns a point inside the box, making an effort to round to minimal
+// precision.
+func (b geoBox) round() (lat, lng float64) {
+	x := maxDecimalPower(b.maxLat - b.minLat)
+	lat = math.Ceil(b.minLat/x) * x
+	x = maxDecimalPower(b.maxLng - b.minLng)
+	lng = math.Ceil(b.minLng/x) * x
+	return
+}
+
+// precalculated for performance
+var exp232 = math.Exp2(32)
+
+// errorWithPrecision returns the error range in latitude and longitude for in
+// integer geohash with bits of precision.
+func errorWithPrecision(bits uint) (latErr, lngErr float64) {
+	b := int(bits)
+	latBits := b / 2
+	lngBits := b - latBits
+	latErr = math.Ldexp(180.0, -latBits)
+	lngErr = math.Ldexp(360.0, -lngBits)
+	return
+}
+
+// minDecimalPlaces returns the minimum number of decimal places such that
+// there must exist an number with that many places within any range of width
+// r. This is intended for returning minimal precision coordinates inside a
+// box.
+func maxDecimalPower(r float64) float64 {
+	m := int(math.Floor(math.Log10(r)))
+	return math.Pow10(m)
+}
+
+// Encode the position of x within the range -r to +r as a 32-bit integer.
+func encodeRange(x, r float64) uint32 {
+	p := (x + r) / (2 * r)
+	return uint32(p * exp232)
+}
+
+// Decode the 32-bit range encoding X back to a value in the range -r to +r.
+func decodeRange(X uint32, r float64) float64 {
+	p := float64(X) / exp232
+	x := 2*r*p - r
+	return x
+}
+
+// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
+// ignored, and may take any value.
+func squash(X uint64) uint32 {
+	X &= 0x5555555555555555
+	X = (X | (X >> 1)) & 0x3333333333333333
+	X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
+	X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
+	X = (X | (X >> 8)) & 0x0000ffff0000ffff
+	X = (X | (X >> 16)) & 0x00000000ffffffff
+	return uint32(X)
+}
+
+// Deinterleave the bits of X into 32-bit words containing the even and odd
+// bitlevels of X, respectively.
+func deinterleave(X uint64) (uint32, uint32) {
+	return squash(X), squash(X >> 1)
+}
+
+// BoundingBoxIntWithPrecision returns the region encoded by the integer
+// geohash with the specified precision.
+func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
+	fullHash := hash << (64 - bits)
+	latInt, lngInt := deinterleave(fullHash)
+	lat := decodeRange(latInt, 90)
+	lng := decodeRange(lngInt, 180)
+	latErr, lngErr := errorWithPrecision(bits)
+	return geoBox{
+		minLat: lat,
+		maxLat: lat + latErr,
+		minLng: lng,
+		maxLng: lng + lngErr,
+	}
+}
+
+// ----------------------------------------------------------------------
+
+// Decode the string geohash to a (lat, lng) point.
+func GeoHashDecode(hash string) (lat, lng float64) {
+	box := geoBoundingBox(hash)
+	return box.round()
+}

+ 32 - 0
geo/parse.go

@@ -16,6 +16,7 @@ package geo
 
 import (
 	"reflect"
+	"strconv"
 	"strings"
 )
 
@@ -24,6 +25,8 @@ import (
 // Container:
 // slice length 2 (GeoJSON)
 //  first element lon, second element lat
+// string (coordinates separated by comma, or a geohash)
+//  first element lat, second element lon
 // map[string]interface{}
 //  exact keys lat and lon or lng
 // struct
@@ -59,6 +62,35 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 		}
 	}
 
+	// is it a string
+	if thingVal.Kind() == reflect.String {
+		geoStr := thingVal.Interface().(string)
+		if strings.Contains(geoStr, ",") {
+			// geo point with coordinates split by comma
+			points := strings.Split(geoStr, ",")
+			for i, point := range points {
+				// trim any leading or trailing white spaces
+				points[i] = strings.TrimSpace(point)
+			}
+			if len(points) == 2 {
+				var err error
+				lat, err = strconv.ParseFloat(points[0], 64)
+				if err == nil {
+					foundLat = true
+				}
+				lon, err = strconv.ParseFloat(points[1], 64)
+				if err == nil {
+					foundLon = true
+				}
+			}
+		} else {
+			// geohash
+			lat, lon = GeoHashDecode(geoStr)
+			foundLat = true
+			foundLon = true
+		}
+	}
+
 	// is it a map
 	if l, ok := thing.(map[string]interface{}); ok {
 		if lval, ok := l["lon"]; ok {

+ 5 - 1
mapping/document.go

@@ -424,7 +424,11 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
 		if subDocMapping != nil {
 			// index by explicit mapping
 			for _, fieldMapping := range subDocMapping.Fields {
-				fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
+				if fieldMapping.Type == "geopoint" {
+					fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
+				} else {
+					fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
+				}
 			}
 		} else if closestDocMapping.Dynamic {
 			// automatic indexing behavior

+ 72 - 6
mapping/mapping_test.go

@@ -18,13 +18,13 @@ import (
 	"encoding/json"
 	"fmt"
 	"reflect"
+	"strconv"
 	"testing"
 	"time"
 
 	"github.com/blevesearch/bleve/analysis/tokenizer/exception"
 	"github.com/blevesearch/bleve/analysis/tokenizer/regexp"
 	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/numeric"
 )
 
 var mappingSource = []byte(`{
@@ -870,6 +870,7 @@ func TestMappingForGeo(t *testing.T) {
 	mapping.DefaultMapping = thingMapping
 
 	geopoints := []interface{}{}
+	expect := [][]float64{} // to contain expected [lon,lat] for geopoints
 
 	// geopoint as a struct
 	geopoints = append(geopoints, struct {
@@ -882,6 +883,7 @@ func TestMappingForGeo(t *testing.T) {
 			Lat: -90,
 		},
 	})
+	expect = append(expect, []float64{-180, -90})
 
 	// geopoint as a map
 	geopoints = append(geopoints, struct {
@@ -894,8 +896,9 @@ func TestMappingForGeo(t *testing.T) {
 			"lat": -90,
 		},
 	})
+	expect = append(expect, []float64{-180, -90})
 
-	// geopoint as a slice
+	// geopoint as a slice, format: {lon, lat}
 	geopoints = append(geopoints, struct {
 		Name     string        `json:"name"`
 		Location []interface{} `json:"location"`
@@ -905,6 +908,55 @@ func TestMappingForGeo(t *testing.T) {
 			-180, -90,
 		},
 	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string, format: "lat,lon"
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"-90,-180",
+		},
+	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string, format: "lat , lon" with leading/trailing whitespaces
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"-90    ,    -180",
+		},
+	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string - geohash
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"000000000000",
+		},
+	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string - geohash
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"drm3btev3e86",
+		},
+	})
+	expect = append(expect, []float64{-71.34, 41.12})
 
 	for i, geopoint := range geopoints {
 		doc := document.NewDocument(string(i))
@@ -917,10 +969,24 @@ func TestMappingForGeo(t *testing.T) {
 		for _, f := range doc.Fields {
 			if f.Name() == "location" {
 				foundGeo = true
-				got := f.Value()
-				expect := []byte(numeric.MustNewPrefixCodedInt64(0, 0))
-				if !reflect.DeepEqual(got, expect) {
-					t.Errorf("expected geo value: %v, got %v", expect, got)
+				geoF, ok := f.(*document.GeoPointField)
+				if !ok {
+					t.Errorf("expected a geopoint field!")
+				}
+				lon, err := geoF.Lon()
+				if err != nil {
+					t.Errorf("error in fetching lon, err: %v", err)
+				}
+				lat, err := geoF.Lat()
+				if err != nil {
+					t.Errorf("error in fetching lat, err: %v", err)
+				}
+				// round obtained lon, lat to 2 decimal places
+				roundLon, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lon), 64)
+				roundLat, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lat), 64)
+				if roundLon != expect[i][0] || roundLat != expect[i][1] {
+					t.Errorf("expected geo point: {%v, %v}, got {%v, %v}",
+						expect[i][0], expect[i][1], lon, lat)
 				}
 			}
 		}