search_fuzzy.go 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "github.com/blevesearch/bleve/index"
  17. "github.com/blevesearch/bleve/search"
  18. )
  19. func NewFuzzySearcher(indexReader index.IndexReader, term string,
  20. prefix, fuzziness int, field string, boost float64,
  21. options search.SearcherOptions) (search.Searcher, error) {
  22. // Note: we don't byte slice the term for a prefix because of runes.
  23. prefixTerm := ""
  24. for i, r := range term {
  25. if i < prefix {
  26. prefixTerm += string(r)
  27. } else {
  28. break
  29. }
  30. }
  31. candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
  32. field, prefixTerm)
  33. if err != nil {
  34. return nil, err
  35. }
  36. return NewMultiTermSearcher(indexReader, candidateTerms, field,
  37. boost, options, true)
  38. }
  39. func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
  40. fuzziness int, field, prefixTerm string) (rv []string, err error) {
  41. rv = make([]string, 0)
  42. var fieldDict index.FieldDict
  43. if len(prefixTerm) > 0 {
  44. fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
  45. } else {
  46. // in case of advanced reader implementations directly call
  47. // the levenshtein automaton based iterator to collect the
  48. // candidate terms
  49. if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
  50. fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness)
  51. if err != nil {
  52. return rv, err
  53. }
  54. tfd, err := fieldDict.Next()
  55. for err == nil && tfd != nil {
  56. rv = append(rv, tfd.Term)
  57. tfd, err = fieldDict.Next()
  58. }
  59. return rv, nil
  60. }
  61. fieldDict, err = indexReader.FieldDict(field)
  62. }
  63. defer func() {
  64. if cerr := fieldDict.Close(); cerr != nil && err == nil {
  65. err = cerr
  66. }
  67. }()
  68. // enumerate terms and check levenshtein distance
  69. var reuse []int
  70. tfd, err := fieldDict.Next()
  71. for err == nil && tfd != nil {
  72. var ld int
  73. var exceeded bool
  74. ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse)
  75. if !exceeded && ld <= fuzziness {
  76. rv = append(rv, tfd.Term)
  77. if tooManyClauses(len(rv)) {
  78. return rv, tooManyClausesErr()
  79. }
  80. }
  81. tfd, err = fieldDict.Next()
  82. }
  83. return rv, err
  84. }