Browse Source

optimization for LevenshteinDistanceMaxReuseSlice()

This optimization allows the levenshtein-distance-max calculation to
not necessarily have to allocate memory on every invocation.

This optimization is used in the prefix edge case by the fuzzy
searcher during the findFuzzyCandidateTerms() inner loop.
Steve Yen 1 year ago
parent
commit
640d3c218c
2 changed files with 17 additions and 5 deletions
  1. 13 4
      search/levenshtein.go
  2. 4 1
      search/searcher/search_fuzzy.go

+ 13 - 4
search/levenshtein.go

@@ -57,15 +57,24 @@ func LevenshteinDistance(a, b string) int {
 // in which case the first return val will be the max
 // and the second will be true, indicating max was exceeded
 func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
+	v, wasMax, _ := LevenshteinDistanceMaxReuseSlice(a, b, max, nil)
+	return v, wasMax
+}
+
+func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool, []int) {
 	la := len(a)
 	lb := len(b)
 
 	ld := int(math.Abs(float64(la - lb)))
 	if ld > max {
-		return max, true
+		return max, true, d
 	}
 
-	d := make([]int, la+1)
+	if cap(d) < la+1 {
+		d = make([]int, la+1)
+	}
+	d = d[:la+1]
+
 	var lastdiag, olddiag, temp int
 
 	for i := 1; i <= la; i++ {
@@ -98,8 +107,8 @@ func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
 		}
 		// after each row if rowmin isn't less than max stop
 		if rowmin > max {
-			return max, true
+			return max, true, d
 		}
 	}
-	return d[la], false
+	return d[la], false, d
 }

+ 4 - 1
search/searcher/search_fuzzy.go

@@ -72,9 +72,12 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 	}()
 
 	// enumerate terms and check levenshtein distance
+	var reuse []int
 	tfd, err := fieldDict.Next()
 	for err == nil && tfd != nil {
-		ld, exceeded := search.LevenshteinDistanceMax(term, tfd.Term, fuzziness)
+		var ld int
+		var exceeded bool
+		ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse)
 		if !exceeded && ld <= fuzziness {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {