Browse Source

index API & scorch uses index.Regexp instead string'ified regexp

In this optimization and bleve "non-porcelain" index API change, the
index.IndexReaderRegexp API is changed to accept an index.Regexp
instance instead of the string representation of a regexp.

This allows scorch to leverage the LiteralPrefix() information of the
regexp instance (which is not implemented by the vellum.regexp API),
so that the FST dictionary searches can be more selective by
invoking...

   d.fst.Search(r, prefixBeg, prefixEnd)

instead of the previous...

   d.fst.Search(r, nil, nil)

See also: https://issues.couchbase.com/browse/MB-30264
Steve Yen 10 months ago
parent
commit
96657413a7

+ 1 - 1
index/index.go

@@ -110,7 +110,7 @@ type Regexp interface {
 }
 
 type IndexReaderRegexp interface {
-	FieldDictRegexp(field string, regex []byte) (FieldDict, error)
+	FieldDictRegexp(field string, regex Regexp) (FieldDict, error)
 }
 
 type IndexReaderFuzzy interface {

+ 1 - 1
index/scorch/segment/empty.go

@@ -80,7 +80,7 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator {
+func (e *EmptyDictionary) RegexpIterator(r index.Regexp) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 

+ 1 - 1
index/scorch/segment/segment.go

@@ -51,7 +51,7 @@ type TermDictionary interface {
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
 	RangeIterator(start, end string) DictionaryIterator
-	RegexpIterator(regex string) DictionaryIterator
+	RegexpIterator(regex index.Regexp) DictionaryIterator
 	FuzzyIterator(term string, fuzziness int) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
 }

+ 14 - 3
index/scorch/segment/zap/dict.go

@@ -178,15 +178,26 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 
 // RegexpIterator returns an iterator which only visits terms having the
 // the specified regex
-func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
+func (d *Dictionary) RegexpIterator(rIn index.Regexp) segment.DictionaryIterator {
+	prefixTerm, complete := rIn.LiteralPrefix()
+	if complete {
+		return d.PrefixIterator(prefixTerm)
+	}
+
 	rv := &DictionaryIterator{
 		d: d,
 	}
 
 	if d.fst != nil {
-		r, err := regexp.New(regex)
+		r, err := regexp.New(rIn.String())
 		if err == nil {
-			itr, err2 := d.fst.Search(r, nil, nil)
+			var prefixBeg, prefixEnd []byte
+			if prefixTerm != "" {
+				prefixBeg = []byte(prefixTerm)
+				prefixEnd = incrementBytes(prefixEnd)
+			}
+
+			itr, err2 := d.fst.Search(r, prefixBeg, prefixEnd)
 			if err2 == nil {
 				rv.itr = itr
 			} else if err2 != nil && err2 != vellum.ErrIteratorDone {

+ 2 - 2
index/scorch/snapshot_index.go

@@ -179,9 +179,9 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 }
 
 func (i *IndexSnapshot) FieldDictRegexp(field string,
-	termRegex []byte) (index.FieldDict, error) {
+	termRegex index.Regexp) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
-		return i.RegexpIterator(string(termRegex))
+		return i.RegexpIterator(termRegex)
 	})
 }
 

+ 1 - 1
index/scorch/snapshot_segment.go

@@ -52,7 +52,7 @@ func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.Dic
 	return s.d.RangeIterator(start, end)
 }
 
-func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
+func (s *SegmentDictionarySnapshot) RegexpIterator(regex index.Regexp) segment.DictionaryIterator {
 	return s.d.RegexpIterator(regex)
 }
 

+ 1 - 1
index_meta.go

@@ -19,7 +19,7 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	
+
 	"github.com/blevesearch/bleve/index/upsidedown"
 )
 

+ 1 - 1
search/searcher/search_regexp.go

@@ -29,7 +29,7 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
 	search.Searcher, error) {
 	var candidateTerms []string
 	if ir, ok := indexReader.(index.IndexReaderRegexp); ok {
-		fieldDict, err := ir.FieldDictRegexp(field, []byte(pattern.String()))
+		fieldDict, err := ir.FieldDictRegexp(field, pattern)
 		if err != nil {
 			return nil, err
 		}