Browse Source

Merge pull request #1236 from sreekanth-cb/dict_new

Geo adopts the new field dictionary Exists (random look up) API
Sreekanth Sivasankaran 5 months ago
parent
commit
3038e74d6f

+ 8 - 0
index/index.go

@@ -121,6 +121,10 @@ type IndexReaderOnly interface {
 	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
 }
 
+type IndexReaderExists interface {
+	FieldDictExists(field string) (FieldDictExists, error)
+}
+
 // FieldTerms contains the terms used by a document, keyed by field
 type FieldTerms map[string][]string
 
@@ -230,6 +234,10 @@ type FieldDict interface {
 	Close() error
 }
 
+type FieldDictExists interface {
+	Exists(key []byte) (bool, error)
+}
+
 // DocIDReader is the interface exposing enumeration of documents identifiers.
 // Close the reader to release associated resources.
 type DocIDReader interface {

+ 8 - 0
index/scorch/segment/empty.go

@@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
 	return &EmptyDictionaryIterator{}
 }
 
+func (e *EmptyDictionary) ExistsIterator() DictionaryIterator {
+	return &EmptyDictionaryIterator{}
+}
+
 type EmptyDictionaryIterator struct{}
 
 func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 	return nil, nil
 }
 
+func (e *EmptyDictionaryIterator) Exists(key []byte) (bool, error) {
+	return false, nil
+}
+
 func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
 	return nil, nil
 }

+ 2 - 0
index/scorch/segment/segment.go

@@ -59,10 +59,12 @@ type TermDictionary interface {
 	AutomatonIterator(a vellum.Automaton,
 		startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
+	ExistsIterator() DictionaryIterator
 }
 
 type DictionaryIterator interface {
 	Next() (*index.DictEntry, error)
+	Exists(key []byte) (bool, error)
 }
 
 type PostingsList interface {

+ 29 - 0
index/scorch/segment/zap/dict.go

@@ -228,6 +228,25 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 	return rv
 }
 
+// ExistsIterator returns an exists iterator for this dictionary
+func (d *Dictionary) ExistsIterator() segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d:         d,
+		omitCount: true,
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator(nil, nil)
+		if err == nil {
+			rv.itr = itr
+		} else if err != vellum.ErrIteratorDone {
+			rv.err = err
+		}
+	}
+
+	return rv
+}
+
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
 	d         *Dictionary
@@ -257,3 +276,13 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 	i.err = i.itr.Next()
 	return &i.entry, nil
 }
+
+func (i *DictionaryIterator) Exists(key []byte) (bool, error) {
+	if i.err != nil && i.err != vellum.ErrIteratorDone {
+		return false, i.err
+	}
+	if i.itr == nil || i.err == vellum.ErrIteratorDone {
+		return false, nil
+	}
+	return i.itr.Exists(key)
+}

+ 33 - 16
index/scorch/snapshot_index.go

@@ -126,7 +126,9 @@ func (i *IndexSnapshot) updateSize() {
 	}
 }
 
-func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
+func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
+	makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
+	randomLookup bool) (*IndexSnapshotFieldDict, error) {
 
 	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
@@ -150,14 +152,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 		if asr.err != nil && err == nil {
 			err = asr.err
 		} else {
-			next, err2 := asr.dictItr.Next()
-			if err2 != nil && err == nil {
-				err = err2
-			}
-			if next != nil {
+			if !randomLookup {
+				next, err2 := asr.dictItr.Next()
+				if err2 != nil && err == nil {
+					err = err2
+				}
+				if next != nil {
+					rv.cursors = append(rv.cursors, &segmentDictCursor{
+						itr:  asr.dictItr,
+						curr: *next,
+					})
+				}
+			} else {
 				rv.cursors = append(rv.cursors, &segmentDictCursor{
-					itr:  asr.dictItr,
-					curr: *next,
+					itr: asr.dictItr,
 				})
 			}
 		}
@@ -166,8 +174,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 	if err != nil {
 		return nil, err
 	}
-	// prepare heap
-	heap.Init(rv)
+
+	if !randomLookup {
+		// prepare heap
+		heap.Init(rv)
+	}
 
 	return rv, nil
 }
@@ -175,21 +186,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.Iterator()
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
 	endTerm []byte) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.RangeIterator(string(startTerm), string(endTerm))
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictPrefix(field string,
 	termPrefix []byte) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.PrefixIterator(string(termPrefix))
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictRegexp(field string,
@@ -204,7 +215,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
 
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.AutomatonIterator(a, prefixBeg, prefixEnd)
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) getLevAutomaton(term string,
@@ -232,14 +243,20 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
 
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.AutomatonIterator(a, prefixBeg, prefixEnd)
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictOnly(field string,
 	onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.OnlyIterator(onlyTerms, includeCount)
-	})
+	}, false)
+}
+
+func (i *IndexSnapshot) FieldDictExists(field string) (index.FieldDictExists, error) {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+		return i.ExistsIterator()
+	}, true)
 }
 
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {

+ 14 - 0
index/scorch/snapshot_index_dict.go

@@ -91,3 +91,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
 func (i *IndexSnapshotFieldDict) Close() error {
 	return nil
 }
+
+func (i *IndexSnapshotFieldDict) Exists(key []byte) (bool, error) {
+	if len(i.cursors) == 0 {
+		return false, nil
+	}
+
+	for _, cursor := range i.cursors {
+		if found, _ := cursor.itr.Exists(key); found {
+			return true, nil
+		}
+	}
+
+	return false, nil
+}

+ 38 - 9
search/searcher/search_geoboundingbox.go

@@ -22,6 +22,8 @@ import (
 	"github.com/blevesearch/bleve/search"
 )
 
+type filterFunc func(key []byte) bool
+
 var GeoBitsShift1 = (geo.GeoBits << 1)
 var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
 
@@ -118,19 +120,46 @@ func ComputeGeoRange(term uint64, shift uint,
 		return rv
 	}
 
-	isIndexed := func(term []byte) bool {
-		if indexReader != nil {
-			reader, err := indexReader.TermFieldReader(term, field, false, false, false)
-			if err != nil || reader == nil {
-				return false
+	var fieldDict index.FieldDictExists
+	var isIndexed filterFunc
+	if irr, ok := indexReader.(index.IndexReaderExists); ok {
+		fieldDict, err = irr.FieldDictExists(field)
+		if err != nil {
+			return nil, nil, err
+		}
+
+		isIndexed = func(term []byte) bool {
+			found, err := fieldDict.Exists(term)
+			return err == nil && found
+		}
+	}
+
+	defer func() {
+		if fieldDict != nil {
+			if fd, ok := fieldDict.(index.FieldDict); ok {
+				cerr := fd.Close()
+				if cerr != nil {
+					err = cerr
+				}
 			}
-			if reader.Count() == 0 {
+		}
+	}()
+
+	if isIndexed == nil {
+		isIndexed = func(term []byte) bool {
+			if indexReader != nil {
+				reader, err := indexReader.TermFieldReader(term, field, false, false, false)
+				if err != nil || reader == nil {
+					return false
+				}
+				if reader.Count() == 0 {
+					_ = reader.Close()
+					return false
+				}
 				_ = reader.Close()
-				return false
 			}
-			_ = reader.Close()
+			return true
 		}
-		return true
 	}
 
 	var computeGeoRange func(term uint64, shift uint) // declare for recursion