index.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bleve
  15. import (
  16. "context"
  17. "github.com/blevesearch/bleve/document"
  18. "github.com/blevesearch/bleve/index"
  19. "github.com/blevesearch/bleve/index/store"
  20. "github.com/blevesearch/bleve/mapping"
  21. "github.com/blevesearch/bleve/size"
  22. )
  23. // A Batch groups together multiple Index and Delete
  24. // operations you would like performed at the same
  25. // time. The Batch structure is NOT thread-safe.
  26. // You should only perform operations on a batch
  27. // from a single thread at a time. Once batch
  28. // execution has started, you may not modify it.
  29. type Batch struct {
  30. index Index
  31. internal *index.Batch
  32. lastDocSize uint64
  33. totalSize uint64
  34. }
  35. // Index adds the specified index operation to the
  36. // batch. NOTE: the bleve Index is not updated
  37. // until the batch is executed.
  38. func (b *Batch) Index(id string, data interface{}) error {
  39. if id == "" {
  40. return ErrorEmptyID
  41. }
  42. doc := document.NewDocument(id)
  43. err := b.index.Mapping().MapDocument(doc, data)
  44. if err != nil {
  45. return err
  46. }
  47. b.internal.Update(doc)
  48. b.lastDocSize = uint64(doc.Size() +
  49. len(id) + size.SizeOfString) // overhead from internal
  50. b.totalSize += b.lastDocSize
  51. return nil
  52. }
  53. func (b *Batch) LastDocSize() uint64 {
  54. return b.lastDocSize
  55. }
  56. func (b *Batch) TotalDocsSize() uint64 {
  57. return b.totalSize
  58. }
  59. // IndexAdvanced adds the specified index operation to the
  60. // batch which skips the mapping. NOTE: the bleve Index is not updated
  61. // until the batch is executed.
  62. func (b *Batch) IndexAdvanced(doc *document.Document) (err error) {
  63. if doc.ID == "" {
  64. return ErrorEmptyID
  65. }
  66. b.internal.Update(doc)
  67. return nil
  68. }
  69. // Delete adds the specified delete operation to the
  70. // batch. NOTE: the bleve Index is not updated until
  71. // the batch is executed.
  72. func (b *Batch) Delete(id string) {
  73. if id != "" {
  74. b.internal.Delete(id)
  75. }
  76. }
  77. // SetInternal adds the specified set internal
  78. // operation to the batch. NOTE: the bleve Index is
  79. // not updated until the batch is executed.
  80. func (b *Batch) SetInternal(key, val []byte) {
  81. b.internal.SetInternal(key, val)
  82. }
  83. // DeleteInternal adds the specified delete internal
  84. // operation to the batch. NOTE: the bleve Index is
  85. // not updated until the batch is executed.
  86. func (b *Batch) DeleteInternal(key []byte) {
  87. b.internal.DeleteInternal(key)
  88. }
  89. // Size returns the total number of operations inside the batch
  90. // including normal index operations and internal operations.
  91. func (b *Batch) Size() int {
  92. return len(b.internal.IndexOps) + len(b.internal.InternalOps)
  93. }
  94. // String prints a user friendly string representation of what
  95. // is inside this batch.
  96. func (b *Batch) String() string {
  97. return b.internal.String()
  98. }
  99. // Reset returns a Batch to the empty state so that it can
  100. // be re-used in the future.
  101. func (b *Batch) Reset() {
  102. b.internal.Reset()
  103. b.lastDocSize = 0
  104. b.totalSize = 0
  105. }
  106. func (b *Batch) Merge(o *Batch) {
  107. if o != nil && o.internal != nil {
  108. b.internal.Merge(o.internal)
  109. if o.LastDocSize() > 0 {
  110. b.lastDocSize = o.LastDocSize()
  111. }
  112. b.totalSize = uint64(b.internal.TotalDocSize())
  113. }
  114. }
  115. func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
  116. b.internal.SetPersistedCallback(f)
  117. }
  118. func (b *Batch) PersistedCallback() index.BatchCallback {
  119. return b.internal.PersistedCallback()
  120. }
  121. // An Index implements all the indexing and searching
  122. // capabilities of bleve. An Index can be created
  123. // using the New() and Open() methods.
  124. //
  125. // Index() takes an input value, deduces a DocumentMapping for its type,
  126. // assigns string paths to its fields or values then applies field mappings on
  127. // them.
  128. //
  129. // The DocumentMapping used to index a value is deduced by the following rules:
  130. // 1) If value implements mapping.bleveClassifier interface, resolve the mapping
  131. // from BleveType().
  132. // 2) If value implements mapping.Classifier interface, resolve the mapping
  133. // from Type().
  134. // 3) If value has a string field or value at IndexMapping.TypeField.
  135. // (defaulting to "_type"), use it to resolve the mapping. Fields addressing
  136. // is described below.
  137. // 4) If IndexMapping.DefaultType is registered, return it.
  138. // 5) Return IndexMapping.DefaultMapping.
  139. //
  140. // Each field or nested field of the value is identified by a string path, then
  141. // mapped to one or several FieldMappings which extract the result for analysis.
  142. //
  143. // Struct values fields are identified by their "json:" tag, or by their name.
  144. // Nested fields are identified by prefixing with their parent identifier,
  145. // separated by a dot.
  146. //
  147. // Map values entries are identified by their string key. Entries not indexed
  148. // by strings are ignored. Entry values are identified recursively like struct
  149. // fields.
  150. //
  151. // Slice and array values are identified by their field name. Their elements
  152. // are processed sequentially with the same FieldMapping.
  153. //
  154. // String, float64 and time.Time values are identified by their field name.
  155. // Other types are ignored.
  156. //
  157. // Each value identifier is decomposed in its parts and recursively address
  158. // SubDocumentMappings in the tree starting at the root DocumentMapping. If a
  159. // mapping is found, all its FieldMappings are applied to the value. If no
  160. // mapping is found and the root DocumentMapping is dynamic, default mappings
  161. // are used based on value type and IndexMapping default configurations.
  162. //
  163. // Finally, mapped values are analyzed, indexed or stored. See
  164. // FieldMapping.Analyzer to know how an analyzer is resolved for a given field.
  165. //
  166. // Examples:
  167. //
  168. // type Date struct {
  169. // Day string `json:"day"`
  170. // Month string
  171. // Year string
  172. // }
  173. //
  174. // type Person struct {
  175. // FirstName string `json:"first_name"`
  176. // LastName string
  177. // BirthDate Date `json:"birth_date"`
  178. // }
  179. //
  180. // A Person value FirstName is mapped by the SubDocumentMapping at
  181. // "first_name". Its LastName is mapped by the one at "LastName". The day of
  182. // BirthDate is mapped to the SubDocumentMapping "day" of the root
  183. // SubDocumentMapping "birth_date". It will appear as the "birth_date.day"
  184. // field in the index. The month is mapped to "birth_date.Month".
  185. type Index interface {
  186. // Index analyzes, indexes or stores mapped data fields. Supplied
  187. // identifier is bound to analyzed data and will be retrieved by search
  188. // requests. See Index interface documentation for details about mapping
  189. // rules.
  190. Index(id string, data interface{}) error
  191. Delete(id string) error
  192. NewBatch() *Batch
  193. Batch(b *Batch) error
  194. // Document returns specified document or nil if the document is not
  195. // indexed or stored.
  196. Document(id string) (*document.Document, error)
  197. // DocCount returns the number of documents in the index.
  198. DocCount() (uint64, error)
  199. Search(req *SearchRequest) (*SearchResult, error)
  200. SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error)
  201. Fields() ([]string, error)
  202. FieldDict(field string) (index.FieldDict, error)
  203. FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
  204. FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)
  205. Close() error
  206. Mapping() mapping.IndexMapping
  207. Stats() *IndexStat
  208. StatsMap() map[string]interface{}
  209. GetInternal(key []byte) ([]byte, error)
  210. SetInternal(key, val []byte) error
  211. DeleteInternal(key []byte) error
  212. // Name returns the name of the index (by default this is the path)
  213. Name() string
  214. // SetName lets you assign your own logical name to this index
  215. SetName(string)
  216. // Advanced returns the indexer and data store, exposing lower level
  217. // methods to enumerate records and access data.
  218. Advanced() (index.Index, store.KVStore, error)
  219. }
  220. // New index at the specified path, must not exist.
  221. // The provided mapping will be used for all
  222. // Index/Search operations.
  223. func New(path string, mapping mapping.IndexMapping) (Index, error) {
  224. return newIndexUsing(path, mapping, Config.DefaultIndexType, Config.DefaultKVStore, nil)
  225. }
  226. // NewMemOnly creates a memory-only index.
  227. // The contents of the index is NOT persisted,
  228. // and will be lost once closed.
  229. // The provided mapping will be used for all
  230. // Index/Search operations.
  231. func NewMemOnly(mapping mapping.IndexMapping) (Index, error) {
  232. return newIndexUsing("", mapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
  233. }
  234. // NewUsing creates index at the specified path,
  235. // which must not already exist.
  236. // The provided mapping will be used for all
  237. // Index/Search operations.
  238. // The specified index type will be used.
  239. // The specified kvstore implementation will be used
  240. // and the provided kvconfig will be passed to its
  241. // constructor. Note that currently the values of kvconfig must
  242. // be able to be marshaled and unmarshaled using the encoding/json library (used
  243. // when reading/writing the index metadata file).
  244. func NewUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (Index, error) {
  245. return newIndexUsing(path, mapping, indexType, kvstore, kvconfig)
  246. }
  247. // Open index at the specified path, must exist.
  248. // The mapping used when it was created will be used for all Index/Search operations.
  249. func Open(path string) (Index, error) {
  250. return openIndexUsing(path, nil)
  251. }
  252. // OpenUsing opens index at the specified path, must exist.
  253. // The mapping used when it was created will be used for all Index/Search operations.
  254. // The provided runtimeConfig can override settings
  255. // persisted when the kvstore was created.
  256. func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) {
  257. return openIndexUsing(path, runtimeConfig)
  258. }