search.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bleve
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "reflect"
  19. "time"
  20. "github.com/blevesearch/bleve/analysis"
  21. "github.com/blevesearch/bleve/analysis/datetime/optional"
  22. "github.com/blevesearch/bleve/document"
  23. "github.com/blevesearch/bleve/registry"
  24. "github.com/blevesearch/bleve/search"
  25. "github.com/blevesearch/bleve/search/collector"
  26. "github.com/blevesearch/bleve/search/query"
  27. "github.com/blevesearch/bleve/size"
  28. )
  29. var reflectStaticSizeSearchResult int
  30. var reflectStaticSizeSearchStatus int
  31. func init() {
  32. var sr SearchResult
  33. reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
  34. var ss SearchStatus
  35. reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
  36. }
  37. var cache = registry.NewCache()
  38. const defaultDateTimeParser = optional.Name
  39. type numericRange struct {
  40. Name string `json:"name,omitempty"`
  41. Min *float64 `json:"min,omitempty"`
  42. Max *float64 `json:"max,omitempty"`
  43. }
  44. type dateTimeRange struct {
  45. Name string `json:"name,omitempty"`
  46. Start time.Time `json:"start,omitempty"`
  47. End time.Time `json:"end,omitempty"`
  48. startString *string
  49. endString *string
  50. }
  51. func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time) {
  52. start = dr.Start
  53. if dr.Start.IsZero() && dr.startString != nil {
  54. s, err := dateTimeParser.ParseDateTime(*dr.startString)
  55. if err == nil {
  56. start = s
  57. }
  58. }
  59. end = dr.End
  60. if dr.End.IsZero() && dr.endString != nil {
  61. e, err := dateTimeParser.ParseDateTime(*dr.endString)
  62. if err == nil {
  63. end = e
  64. }
  65. }
  66. return start, end
  67. }
  68. func (dr *dateTimeRange) UnmarshalJSON(input []byte) error {
  69. var temp struct {
  70. Name string `json:"name,omitempty"`
  71. Start *string `json:"start,omitempty"`
  72. End *string `json:"end,omitempty"`
  73. }
  74. err := json.Unmarshal(input, &temp)
  75. if err != nil {
  76. return err
  77. }
  78. dr.Name = temp.Name
  79. if temp.Start != nil {
  80. dr.startString = temp.Start
  81. }
  82. if temp.End != nil {
  83. dr.endString = temp.End
  84. }
  85. return nil
  86. }
  87. func (dr *dateTimeRange) MarshalJSON() ([]byte, error) {
  88. rv := map[string]interface{}{
  89. "name": dr.Name,
  90. "start": dr.Start,
  91. "end": dr.End,
  92. }
  93. if dr.Start.IsZero() && dr.startString != nil {
  94. rv["start"] = dr.startString
  95. }
  96. if dr.End.IsZero() && dr.endString != nil {
  97. rv["end"] = dr.endString
  98. }
  99. return json.Marshal(rv)
  100. }
  101. // A FacetRequest describes a facet or aggregation
  102. // of the result document set you would like to be
  103. // built.
  104. type FacetRequest struct {
  105. Size int `json:"size"`
  106. Field string `json:"field"`
  107. NumericRanges []*numericRange `json:"numeric_ranges,omitempty"`
  108. DateTimeRanges []*dateTimeRange `json:"date_ranges,omitempty"`
  109. }
  110. func (fr *FacetRequest) Validate() error {
  111. nrCount := len(fr.NumericRanges)
  112. drCount := len(fr.DateTimeRanges)
  113. if nrCount > 0 && drCount > 0 {
  114. return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both")
  115. }
  116. if nrCount > 0 {
  117. nrNames := map[string]interface{}{}
  118. for _, nr := range fr.NumericRanges {
  119. if _, ok := nrNames[nr.Name]; ok {
  120. return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
  121. }
  122. nrNames[nr.Name] = struct{}{}
  123. if nr.Min == nil && nr.Max == nil {
  124. return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
  125. }
  126. }
  127. } else {
  128. dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
  129. if err != nil {
  130. return err
  131. }
  132. drNames := map[string]interface{}{}
  133. for _, dr := range fr.DateTimeRanges {
  134. if _, ok := drNames[dr.Name]; ok {
  135. return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
  136. }
  137. drNames[dr.Name] = struct{}{}
  138. start, end := dr.ParseDates(dateTimeParser)
  139. if start.IsZero() && end.IsZero() {
  140. return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
  141. }
  142. }
  143. }
  144. return nil
  145. }
  146. // NewFacetRequest creates a facet on the specified
  147. // field that limits the number of entries to the
  148. // specified size.
  149. func NewFacetRequest(field string, size int) *FacetRequest {
  150. return &FacetRequest{
  151. Field: field,
  152. Size: size,
  153. }
  154. }
  155. // AddDateTimeRange adds a bucket to a field
  156. // containing date values. Documents with a
  157. // date value falling into this range are tabulated
  158. // as part of this bucket/range.
  159. func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
  160. if fr.DateTimeRanges == nil {
  161. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  162. }
  163. fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
  164. }
  165. // AddDateTimeRangeString adds a bucket to a field
  166. // containing date values.
  167. func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
  168. if fr.DateTimeRanges == nil {
  169. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  170. }
  171. fr.DateTimeRanges = append(fr.DateTimeRanges,
  172. &dateTimeRange{Name: name, startString: start, endString: end})
  173. }
  174. // AddNumericRange adds a bucket to a field
  175. // containing numeric values. Documents with a
  176. // numeric value falling into this range are
  177. // tabulated as part of this bucket/range.
  178. func (fr *FacetRequest) AddNumericRange(name string, min, max *float64) {
  179. if fr.NumericRanges == nil {
  180. fr.NumericRanges = make([]*numericRange, 0, 1)
  181. }
  182. fr.NumericRanges = append(fr.NumericRanges, &numericRange{Name: name, Min: min, Max: max})
  183. }
  184. // FacetsRequest groups together all the
  185. // FacetRequest objects for a single query.
  186. type FacetsRequest map[string]*FacetRequest
  187. func (fr FacetsRequest) Validate() error {
  188. for _, v := range fr {
  189. err := v.Validate()
  190. if err != nil {
  191. return err
  192. }
  193. }
  194. return nil
  195. }
  196. // HighlightRequest describes how field matches
  197. // should be highlighted.
  198. type HighlightRequest struct {
  199. Style *string `json:"style"`
  200. Fields []string `json:"fields"`
  201. }
  202. // NewHighlight creates a default
  203. // HighlightRequest.
  204. func NewHighlight() *HighlightRequest {
  205. return &HighlightRequest{}
  206. }
  207. // NewHighlightWithStyle creates a HighlightRequest
  208. // with an alternate style.
  209. func NewHighlightWithStyle(style string) *HighlightRequest {
  210. return &HighlightRequest{
  211. Style: &style,
  212. }
  213. }
  214. func (h *HighlightRequest) AddField(field string) {
  215. if h.Fields == nil {
  216. h.Fields = make([]string, 0, 1)
  217. }
  218. h.Fields = append(h.Fields, field)
  219. }
  220. // A SearchRequest describes all the parameters
  221. // needed to search the index.
  222. // Query is required.
  223. // Size/From describe how much and which part of the
  224. // result set to return.
  225. // Highlight describes optional search result
  226. // highlighting.
  227. // Fields describes a list of field values which
  228. // should be retrieved for result documents, provided they
  229. // were stored while indexing.
  230. // Facets describe the set of facets to be computed.
  231. // Explain triggers inclusion of additional search
  232. // result score explanations.
  233. // Sort describes the desired order for the results to be returned.
  234. // Score controls the kind of scoring performed
  235. //
  236. // A special field named "*" can be used to return all fields.
  237. type SearchRequest struct {
  238. Query query.Query `json:"query"`
  239. Size int `json:"size"`
  240. From int `json:"from"`
  241. Highlight *HighlightRequest `json:"highlight"`
  242. Fields []string `json:"fields"`
  243. Facets FacetsRequest `json:"facets"`
  244. Explain bool `json:"explain"`
  245. Sort search.SortOrder `json:"sort"`
  246. IncludeLocations bool `json:"includeLocations"`
  247. Score string `json:"score,omitempty"`
  248. }
  249. func (r *SearchRequest) Validate() error {
  250. if srq, ok := r.Query.(query.ValidatableQuery); ok {
  251. err := srq.Validate()
  252. if err != nil {
  253. return err
  254. }
  255. }
  256. return r.Facets.Validate()
  257. }
  258. // AddFacet adds a FacetRequest to this SearchRequest
  259. func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
  260. if r.Facets == nil {
  261. r.Facets = make(FacetsRequest, 1)
  262. }
  263. r.Facets[facetName] = f
  264. }
  265. // SortBy changes the request to use the requested sort order
  266. // this form uses the simplified syntax with an array of strings
  267. // each string can either be a field name
  268. // or the magic value _id and _score which refer to the doc id and search score
  269. // any of these values can optionally be prefixed with - to reverse the order
  270. func (r *SearchRequest) SortBy(order []string) {
  271. so := search.ParseSortOrderStrings(order)
  272. r.Sort = so
  273. }
  274. // SortByCustom changes the request to use the requested sort order
  275. func (r *SearchRequest) SortByCustom(order search.SortOrder) {
  276. r.Sort = order
  277. }
  278. // UnmarshalJSON deserializes a JSON representation of
  279. // a SearchRequest
  280. func (r *SearchRequest) UnmarshalJSON(input []byte) error {
  281. var temp struct {
  282. Q json.RawMessage `json:"query"`
  283. Size *int `json:"size"`
  284. From int `json:"from"`
  285. Highlight *HighlightRequest `json:"highlight"`
  286. Fields []string `json:"fields"`
  287. Facets FacetsRequest `json:"facets"`
  288. Explain bool `json:"explain"`
  289. Sort []json.RawMessage `json:"sort"`
  290. IncludeLocations bool `json:"includeLocations"`
  291. Score string `json:"score"`
  292. }
  293. err := json.Unmarshal(input, &temp)
  294. if err != nil {
  295. return err
  296. }
  297. if temp.Size == nil {
  298. r.Size = 10
  299. } else {
  300. r.Size = *temp.Size
  301. }
  302. if temp.Sort == nil {
  303. r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
  304. } else {
  305. r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
  306. if err != nil {
  307. return err
  308. }
  309. }
  310. r.From = temp.From
  311. r.Explain = temp.Explain
  312. r.Highlight = temp.Highlight
  313. r.Fields = temp.Fields
  314. r.Facets = temp.Facets
  315. r.IncludeLocations = temp.IncludeLocations
  316. r.Score = temp.Score
  317. r.Query, err = query.ParseQuery(temp.Q)
  318. if err != nil {
  319. return err
  320. }
  321. if r.Size < 0 {
  322. r.Size = 10
  323. }
  324. if r.From < 0 {
  325. r.From = 0
  326. }
  327. return nil
  328. }
  329. // NewSearchRequest creates a new SearchRequest
  330. // for the Query, using default values for all
  331. // other search parameters.
  332. func NewSearchRequest(q query.Query) *SearchRequest {
  333. return NewSearchRequestOptions(q, 10, 0, false)
  334. }
  335. // NewSearchRequestOptions creates a new SearchRequest
  336. // for the Query, with the requested size, from
  337. // and explanation search parameters.
  338. // By default results are ordered by score, descending.
  339. func NewSearchRequestOptions(q query.Query, size, from int, explain bool) *SearchRequest {
  340. return &SearchRequest{
  341. Query: q,
  342. Size: size,
  343. From: from,
  344. Explain: explain,
  345. Sort: search.SortOrder{&search.SortScore{Desc: true}},
  346. }
  347. }
  348. // IndexErrMap tracks errors with the name of the index where it occurred
  349. type IndexErrMap map[string]error
  350. // MarshalJSON seralizes the error into a string for JSON consumption
  351. func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
  352. tmp := make(map[string]string, len(iem))
  353. for k, v := range iem {
  354. tmp[k] = v.Error()
  355. }
  356. return json.Marshal(tmp)
  357. }
  358. func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
  359. var tmp map[string]string
  360. err := json.Unmarshal(data, &tmp)
  361. if err != nil {
  362. return err
  363. }
  364. for k, v := range tmp {
  365. iem[k] = fmt.Errorf("%s", v)
  366. }
  367. return nil
  368. }
  369. // SearchStatus is a secion in the SearchResult reporting how many
  370. // underlying indexes were queried, how many were successful/failed
  371. // and a map of any errors that were encountered
  372. type SearchStatus struct {
  373. Total int `json:"total"`
  374. Failed int `json:"failed"`
  375. Successful int `json:"successful"`
  376. Errors IndexErrMap `json:"errors,omitempty"`
  377. }
  378. // Merge will merge together multiple SearchStatuses during a MultiSearch
  379. func (ss *SearchStatus) Merge(other *SearchStatus) {
  380. ss.Total += other.Total
  381. ss.Failed += other.Failed
  382. ss.Successful += other.Successful
  383. if len(other.Errors) > 0 {
  384. if ss.Errors == nil {
  385. ss.Errors = make(map[string]error)
  386. }
  387. for otherIndex, otherError := range other.Errors {
  388. ss.Errors[otherIndex] = otherError
  389. }
  390. }
  391. }
  392. // A SearchResult describes the results of executing
  393. // a SearchRequest.
  394. type SearchResult struct {
  395. Status *SearchStatus `json:"status"`
  396. Request *SearchRequest `json:"request"`
  397. Hits search.DocumentMatchCollection `json:"hits"`
  398. Total uint64 `json:"total_hits"`
  399. MaxScore float64 `json:"max_score"`
  400. Took time.Duration `json:"took"`
  401. Facets search.FacetResults `json:"facets"`
  402. }
  403. func (sr *SearchResult) Size() int {
  404. sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
  405. reflectStaticSizeSearchStatus
  406. for _, entry := range sr.Hits {
  407. if entry != nil {
  408. sizeInBytes += entry.Size()
  409. }
  410. }
  411. for k, v := range sr.Facets {
  412. sizeInBytes += size.SizeOfString + len(k) +
  413. v.Size()
  414. }
  415. return sizeInBytes
  416. }
  417. func (sr *SearchResult) String() string {
  418. rv := ""
  419. if sr.Total > 0 {
  420. if sr.Request.Size > 0 {
  421. rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
  422. for i, hit := range sr.Hits {
  423. rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
  424. for fragmentField, fragments := range hit.Fragments {
  425. rv += fmt.Sprintf("\t%s\n", fragmentField)
  426. for _, fragment := range fragments {
  427. rv += fmt.Sprintf("\t\t%s\n", fragment)
  428. }
  429. }
  430. for otherFieldName, otherFieldValue := range hit.Fields {
  431. if _, ok := hit.Fragments[otherFieldName]; !ok {
  432. rv += fmt.Sprintf("\t%s\n", otherFieldName)
  433. rv += fmt.Sprintf("\t\t%v\n", otherFieldValue)
  434. }
  435. }
  436. }
  437. } else {
  438. rv = fmt.Sprintf("%d matches, took %s\n", sr.Total, sr.Took)
  439. }
  440. } else {
  441. rv = "No matches"
  442. }
  443. if len(sr.Facets) > 0 {
  444. rv += fmt.Sprintf("Facets:\n")
  445. for fn, f := range sr.Facets {
  446. rv += fmt.Sprintf("%s(%d)\n", fn, f.Total)
  447. for _, t := range f.Terms {
  448. rv += fmt.Sprintf("\t%s(%d)\n", t.Term, t.Count)
  449. }
  450. if f.Other != 0 {
  451. rv += fmt.Sprintf("\tOther(%d)\n", f.Other)
  452. }
  453. }
  454. }
  455. return rv
  456. }
  457. // Merge will merge together multiple SearchResults during a MultiSearch
  458. func (sr *SearchResult) Merge(other *SearchResult) {
  459. sr.Status.Merge(other.Status)
  460. sr.Hits = append(sr.Hits, other.Hits...)
  461. sr.Total += other.Total
  462. if other.MaxScore > sr.MaxScore {
  463. sr.MaxScore = other.MaxScore
  464. }
  465. if sr.Facets == nil && len(other.Facets) != 0 {
  466. sr.Facets = other.Facets
  467. return
  468. }
  469. sr.Facets.Merge(other.Facets)
  470. }
  471. // MemoryNeededForSearchResult is an exported helper function to determine the RAM
  472. // needed to accommodate the results for a given search request.
  473. func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
  474. if req == nil {
  475. return 0
  476. }
  477. numDocMatches := req.Size + req.From
  478. if req.Size+req.From > collector.PreAllocSizeSkipCap {
  479. numDocMatches = collector.PreAllocSizeSkipCap
  480. }
  481. estimate := 0
  482. // overhead from the SearchResult structure
  483. var sr SearchResult
  484. estimate += sr.Size()
  485. var dm search.DocumentMatch
  486. sizeOfDocumentMatch := dm.Size()
  487. // overhead from results
  488. estimate += numDocMatches * sizeOfDocumentMatch
  489. // overhead from facet results
  490. if req.Facets != nil {
  491. var fr search.FacetResult
  492. estimate += len(req.Facets) * fr.Size()
  493. }
  494. // highlighting, store
  495. var d document.Document
  496. if len(req.Fields) > 0 || req.Highlight != nil {
  497. for i := 0; i < (req.Size + req.From); i++ {
  498. estimate += (req.Size + req.From) * d.Size()
  499. }
  500. }
  501. return uint64(estimate)
  502. }