regexp_char_filter.go 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
  3. // except in compliance with the License. You may obtain a copy of the License at
  4. // http://www.apache.org/licenses/LICENSE-2.0
  5. // Unless required by applicable law or agreed to in writing, software distributed under the
  6. // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
  7. // either express or implied. See the License for the specific language governing permissions
  8. // and limitations under the License.
  9. package regexp_char_filter
  10. import (
  11. "bytes"
  12. "fmt"
  13. "regexp"
  14. "github.com/couchbaselabs/bleve/analysis"
  15. "github.com/couchbaselabs/bleve/registry"
  16. )
  17. const Name = "regexp"
  18. type RegexpCharFilter struct {
  19. r *regexp.Regexp
  20. replacement []byte
  21. }
  22. func NewRegexpCharFilter(r *regexp.Regexp, replacement []byte) *RegexpCharFilter {
  23. return &RegexpCharFilter{
  24. r: r,
  25. replacement: replacement,
  26. }
  27. }
  28. func (s *RegexpCharFilter) Filter(input []byte) []byte {
  29. return s.r.ReplaceAllFunc(input, func(in []byte) []byte { return bytes.Repeat(s.replacement, len(in)) })
  30. }
  31. func RegexpCharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
  32. regexpStr, ok := config["regexp"].(string)
  33. if !ok {
  34. return nil, fmt.Errorf("must specify regexp")
  35. }
  36. r, err := regexp.Compile(regexpStr)
  37. if err != nil {
  38. return nil, fmt.Errorf("unable to build regexp char filter: %v", err)
  39. }
  40. replaceBytes := []byte(" ")
  41. replaceStr, ok := config["replace"].(string)
  42. if ok {
  43. replaceBytes = []byte(replaceStr)
  44. }
  45. return NewRegexpCharFilter(r, replaceBytes), nil
  46. }
  47. func init() {
  48. registry.RegisterCharFilter(Name, RegexpCharFilterConstructor)
  49. }