versus_test.go 14 KB


  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package test
  15. import (
  16. "bytes"
  17. "encoding/json"
  18. "fmt"
  19. "math"
  20. "math/rand"
  21. "os"
  22. "reflect"
  23. "strconv"
  24. "strings"
  25. "testing"
  26. "text/template"
  27. "github.com/blevesearch/bleve"
  28. "github.com/blevesearch/bleve/index/scorch"
  29. "github.com/blevesearch/bleve/index/store/boltdb"
  30. "github.com/blevesearch/bleve/index/upsidedown"
  31. "github.com/blevesearch/bleve/mapping"
  32. "github.com/blevesearch/bleve/search"
  33. )
  34. // Tests scorch indexer versus upsidedown/bolt indexer against various
  35. // templated queries. Example usage from the bleve top-level directory...
  36. //
  37. // go test -v -run TestScorchVersusUpsideDownBolt ./test
  38. // VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
  39. //
  40. func init() {
  41. // override for tests
  42. scorch.DefaultPersisterNapTimeMSec = 1
  43. }
  44. func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
  45. (&VersusTest{
  46. t: t,
  47. NumDocs: 1000,
  48. MaxWordsPerDoc: 20,
  49. NumWords: 10,
  50. BatchSize: 10,
  51. NumAttemptsPerSearch: 100,
  52. }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
  53. }
  54. func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) {
  55. (&VersusTest{
  56. t: t,
  57. Focus: "must-not-same-as-must",
  58. NumDocs: 5,
  59. MaxWordsPerDoc: 2,
  60. NumWords: 1,
  61. BatchSize: 1,
  62. NumAttemptsPerSearch: 1,
  63. }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
  64. }
  65. func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) {
  66. (&VersusTest{
  67. t: t,
  68. Focus: "conjuncts-match-phrase-1-1",
  69. NumDocs: 30,
  70. MaxWordsPerDoc: 8,
  71. NumWords: 2,
  72. BatchSize: 1,
  73. NumAttemptsPerSearch: 1,
  74. }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
  75. }
  76. // -------------------------------------------------------
  77. // Templates used to compare search results in the "versus" tests.
  78. var testVersusSearchTemplates = []string{
  79. `{
  80. "about": "expected to return zero hits",
  81. "query": {
  82. "query": "title:notARealTitle"
  83. }
  84. }`,
  85. `{
  86. "about": "try straight word()'s",
  87. "query": {
  88. "query": "body:{{word}}"
  89. }
  90. }`,
  91. `{
  92. "about": "conjuncts on same term",
  93. "query": {
  94. "conjuncts": [
  95. { "field": "body", "term": "{{word}}", "boost": 1.0 },
  96. { "field": "body", "term": "{{word}}", "boost": 1.0 }
  97. ]
  98. }
  99. }`,
  100. `{
  101. "about": "disjuncts on same term",
  102. "query": {
  103. "disjuncts": [
  104. { "field": "body", "term": "{{word}}", "boost": 1.0 },
  105. { "field": "body", "term": "{{word}}", "boost": 1.0 }
  106. ]
  107. }
  108. }`,
  109. `{
  110. "about": "never-matching-title-conjuncts",
  111. "query": {
  112. "conjuncts": [
  113. {"field": "body", "match": "{{word}}"},
  114. {"field": "body", "match": "{{word}}"},
  115. {"field": "title", "match": "notAnActualTitle"}
  116. ]
  117. }
  118. }`,
  119. `{
  120. "about": "never-matching-title-disjuncts",
  121. "query": {
  122. "disjuncts": [
  123. {"field": "body", "match": "{{word}}"},
  124. {"field": "body", "match": "{{word}}"},
  125. {"field": "title", "match": "notAnActualTitle"}
  126. ]
  127. }
  128. }`,
  129. `{
  130. "about": "must-not-never-matches",
  131. "query": {
  132. "must_not": {"disjuncts": [
  133. {"field": "title", "match": "notAnActualTitle"}
  134. ]},
  135. "should": {"disjuncts": [
  136. {"field": "body", "match": "{{word}}"}
  137. ]}
  138. }
  139. }`,
  140. `{
  141. "about": "must-not-only",
  142. "query": {
  143. "must_not": {"disjuncts": [
  144. {"field": "body", "term": "{{word}}"}
  145. ]}
  146. }
  147. }`,
  148. `{
  149. "about": "must-not-same-as-must -- see: MB-27291",
  150. "query": {
  151. "must_not": {"disjuncts": [
  152. {"field": "body", "match": "{{word}}"}
  153. ]},
  154. "must": {"conjuncts": [
  155. {"field": "body", "match": "{{word}}"}
  156. ]}
  157. }
  158. }`,
  159. `{
  160. "about": "must-not-same-as-should",
  161. "query": {
  162. "must_not": {"disjuncts": [
  163. {"field": "body", "match": "{{word}}"}
  164. ]},
  165. "should": {"disjuncts": [
  166. {"field": "body", "match": "{{word}}"}
  167. ]}
  168. }
  169. }`,
  170. `{
  171. "about": "inspired by testrunner RQG issue -- see: MB-27291",
  172. "query": {
  173. "must_not": {"disjuncts": [
  174. {"field": "title", "match": "Trista Allen"},
  175. {"field": "body", "match": "{{word}}"}
  176. ]},
  177. "should": {"disjuncts": [
  178. {"field": "title", "match": "Kallie Safiya Amara"},
  179. {"field": "body", "match": "{{word}}"}
  180. ]}
  181. }
  182. }`,
  183. `{
  184. "about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291",
  185. "query": {
  186. "conjuncts": [
  187. {"field": "body", "match": "{{bodyWord 0}}"},
  188. {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"}
  189. ]
  190. }
  191. }`,
  192. `{
  193. "about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!",
  194. "query": {
  195. "conjuncts": [
  196. {"field": "body", "match": "{{bodyWord 0}}"},
  197. {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"}
  198. ]
  199. }
  200. }`,
  201. }
  202. // -------------------------------------------------------
  203. type VersusTest struct {
  204. t *testing.T
  205. // Use environment variable VERBOSE=<integer> that's > 0 for more
  206. // verbose output.
  207. Verbose int
  208. // Allow user to focus on particular search templates, where
  209. // where the search template must contain the Focus string.
  210. Focus string
  211. NumDocs int // Number of docs to insert.
  212. MaxWordsPerDoc int // Max number words in each doc's Body field.
  213. NumWords int // Total number of words in the dictionary.
  214. BatchSize int // Batch size when inserting docs.
  215. NumAttemptsPerSearch int // For each search template, number of searches to try.
  216. // The Bodies is an array with length NumDocs, where each entry
  217. // is the words in a doc's Body field.
  218. Bodies [][]string
  219. CurAttempt int
  220. TotAttempts int
  221. }
  222. // -------------------------------------------------------
  223. func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) {
  224. t := vt.t
  225. funcMap := template.FuncMap{
  226. // Returns a word. The word may or may not be in any
  227. // document's body.
  228. "word": func() string {
  229. return vt.genWord(vt.CurAttempt % vt.NumWords)
  230. },
  231. // Picks a document and returns the i'th word in that
  232. // document's body. You can use this in searches to
  233. // definitely find at least one document.
  234. "bodyWord": func(i int) string {
  235. body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)]
  236. if len(body) == 0 {
  237. return ""
  238. }
  239. return body[i%len(body)]
  240. },
  241. }
  242. // Optionally allow call to focus on a particular search templates,
  243. // where the search template must contain the vt.Focus string.
  244. if vt.Focus == "" {
  245. vt.Focus = os.Getenv("FOCUS")
  246. }
  247. for i, searchTemplate := range searchTemplates {
  248. if vt.Focus != "" && !strings.Contains(searchTemplate, vt.Focus) {
  249. continue
  250. }
  251. tmpl, err := template.New("search").Funcs(funcMap).Parse(searchTemplate)
  252. if err != nil {
  253. t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err)
  254. }
  255. for j := 0; j < vt.NumAttemptsPerSearch; j++ {
  256. vt.CurAttempt = j
  257. var buf bytes.Buffer
  258. err = tmpl.Execute(&buf, vt)
  259. if err != nil {
  260. t.Fatalf("could not execute search template: %s, err: %v", searchTemplate, err)
  261. }
  262. bufBytes := buf.Bytes()
  263. if vt.Verbose > 0 {
  264. fmt.Printf(" %s\n", bufBytes)
  265. }
  266. var search bleve.SearchRequest
  267. err = json.Unmarshal(bufBytes, &search)
  268. if err != nil {
  269. t.Fatalf("could not unmarshal search: %s, err: %v", bufBytes, err)
  270. }
  271. search.Size = vt.NumDocs * 10 // Crank up limit to get all results.
  272. searchA := search
  273. searchB := search
  274. resA, errA := idxA.Search(&searchA)
  275. resB, errB := idxB.Search(&searchB)
  276. if errA != errB {
  277. t.Errorf("search: (%d) %s,\n err mismatch, errA: %v, errB: %v",
  278. i, bufBytes, errA, errB)
  279. }
  280. // Scores might have float64 vs float32 wobbles, so truncate precision.
  281. resA.MaxScore = math.Trunc(resA.MaxScore*1000.0) / 1000.0
  282. resB.MaxScore = math.Trunc(resB.MaxScore*1000.0) / 1000.0
  283. // Timings may be different between A & B, so force equality.
  284. resA.Took = resB.Took
  285. // Hits might have different ordering since some indexers
  286. // (like upsidedown) have a natural secondary sort on id
  287. // while others (like scorch) don't. So, we compare by
  288. // putting the hits from A & B into maps.
  289. hitsA := hitsById(resA)
  290. hitsB := hitsById(resB)
  291. for id, hitA := range hitsA {
  292. hitB := hitsB[id]
  293. if len(hitA.FieldTermLocations) == 0 {
  294. hitA.FieldTermLocations = nil
  295. }
  296. if len(hitB.FieldTermLocations) == 0 {
  297. hitB.FieldTermLocations = nil
  298. }
  299. if !reflect.DeepEqual(hitA, hitB) {
  300. t.Errorf("\n driving from hitsA\n hitA: %#v,\n hitB: %#v", hitA, hitB)
  301. idx, _ := strconv.Atoi(id)
  302. t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
  303. }
  304. }
  305. for id, hitB := range hitsB {
  306. hitA := hitsA[id]
  307. if len(hitA.FieldTermLocations) == 0 {
  308. hitA.FieldTermLocations = nil
  309. }
  310. if len(hitB.FieldTermLocations) == 0 {
  311. hitB.FieldTermLocations = nil
  312. }
  313. if !reflect.DeepEqual(hitA, hitB) {
  314. t.Errorf("\n driving from hitsB\n hitA: %#v,\n hitB: %#v", hitA, hitB)
  315. idx, _ := strconv.Atoi(id)
  316. t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
  317. }
  318. }
  319. if !reflect.DeepEqual(hitsA, hitsB) {
  320. t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
  321. i, bufBytes, len(hitsA), len(hitsB))
  322. t.Errorf("\n hitsA: %#v,\n hitsB: %#v",
  323. hitsA, hitsB)
  324. }
  325. resA.Hits = nil
  326. resB.Hits = nil
  327. if !reflect.DeepEqual(resA, resB) {
  328. resAj, _ := json.Marshal(resA)
  329. resBj, _ := json.Marshal(resB)
  330. t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s",
  331. i, bufBytes, resAj, resBj)
  332. }
  333. if vt.Verbose > 0 {
  334. fmt.Printf(" Total: (%t) %d\n", resA.Total == resB.Total, resA.Total)
  335. }
  336. vt.TotAttempts++
  337. }
  338. }
  339. }
  340. // Organizes the hits into a map keyed by id.
  341. func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch {
  342. rv := make(map[string]*search.DocumentMatch, len(res.Hits))
  343. for _, hit := range res.Hits {
  344. // Clear out or truncate precision of hit fields that might be
  345. // different across different indexer implementations.
  346. hit.Index = ""
  347. hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0
  348. hit.IndexInternalID = nil
  349. hit.HitNumber = 0
  350. rv[hit.ID] = hit
  351. }
  352. return rv
  353. }
  354. // -------------------------------------------------------
  355. func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
  356. cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index),
  357. searchTemplates []string) {
  358. if cb == nil {
  359. cb = testVersusSearches
  360. }
  361. if searchTemplates == nil {
  362. searchTemplates = testVersusSearchTemplates
  363. }
  364. if vt.Verbose <= 0 {
  365. vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE"))
  366. }
  367. dirA := "/tmp/bleve-versus-test-a"
  368. dirB := "/tmp/bleve-versus-test-b"
  369. defer func() {
  370. _ = os.RemoveAll(dirA)
  371. _ = os.RemoveAll(dirB)
  372. }()
  373. _ = os.RemoveAll(dirA)
  374. _ = os.RemoveAll(dirB)
  375. imA := vt.makeIndexMapping()
  376. imB := vt.makeIndexMapping()
  377. kvConfigA := map[string]interface{}{}
  378. kvConfigB := map[string]interface{}{}
  379. idxA, err := bleve.NewUsing(dirA, imA, indexTypeA, kvStoreA, kvConfigA)
  380. if err != nil || idxA == nil {
  381. vt.t.Fatalf("new using err: %v", err)
  382. }
  383. defer func() { _ = idxA.Close() }()
  384. idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB)
  385. if err != nil || idxB == nil {
  386. vt.t.Fatalf("new using err: %v", err)
  387. }
  388. defer func() { _ = idxB.Close() }()
  389. rand.Seed(0)
  390. if vt.Bodies == nil {
  391. vt.Bodies = vt.genBodies()
  392. }
  393. vt.insertBodies(idxA)
  394. vt.insertBodies(idxB)
  395. cb(vt, searchTemplates, idxA, idxB)
  396. }
  397. // -------------------------------------------------------
  398. func (vt *VersusTest) makeIndexMapping() mapping.IndexMapping {
  399. standardFM := bleve.NewTextFieldMapping()
  400. standardFM.Store = false
  401. standardFM.IncludeInAll = false
  402. standardFM.IncludeTermVectors = true
  403. standardFM.Analyzer = "standard"
  404. dm := bleve.NewDocumentMapping()
  405. dm.AddFieldMappingsAt("title", standardFM)
  406. dm.AddFieldMappingsAt("body", standardFM)
  407. im := bleve.NewIndexMapping()
  408. im.DefaultMapping = dm
  409. im.DefaultAnalyzer = "standard"
  410. return im
  411. }
  412. func (vt *VersusTest) insertBodies(idx bleve.Index) {
  413. batch := idx.NewBatch()
  414. for i, bodyWords := range vt.Bodies {
  415. title := fmt.Sprintf("%d", i)
  416. body := strings.Join(bodyWords, " ")
  417. err := batch.Index(title, map[string]interface{}{"title": title, "body": body})
  418. if err != nil {
  419. vt.t.Fatalf("batch.Index err: %v", err)
  420. }
  421. if i%vt.BatchSize == 0 {
  422. err = idx.Batch(batch)
  423. if err != nil {
  424. vt.t.Fatalf("batch err: %v", err)
  425. }
  426. batch.Reset()
  427. }
  428. }
  429. err := idx.Batch(batch)
  430. if err != nil {
  431. vt.t.Fatalf("last batch err: %v", err)
  432. }
  433. }
  434. func (vt *VersusTest) genBodies() (rv [][]string) {
  435. for i := 0; i < vt.NumDocs; i++ {
  436. rv = append(rv, vt.genBody())
  437. }
  438. return rv
  439. }
  440. func (vt *VersusTest) genBody() (rv []string) {
  441. m := rand.Intn(vt.MaxWordsPerDoc)
  442. for j := 0; j < m; j++ {
  443. rv = append(rv, vt.genWord(rand.Intn(vt.NumWords)))
  444. }
  445. return rv
  446. }
  447. func (vt *VersusTest) genWord(i int) string {
  448. return fmt.Sprintf("%x", i)
  449. }