search.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. //
  2. // Copyright (C) 2018-2019 Marcus Rohrmoser, http://purl.mro.name/ShaarliGo
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. //
  17. package main
  18. import (
  19. "encoding/xml"
  20. "net/http"
  21. "net/url"
  22. "path"
  23. "sort"
  24. "strconv"
  25. "strings"
  26. "time"
  27. "golang.org/x/text/language"
  28. "golang.org/x/text/search"
  29. )
  30. // better: https://stackoverflow.com/questions/24836044/case-insensitive-string-search-in-golang
  31. func rankEntryTerms(entry *Entry, terms []string, matcher *search.Matcher) int {
  32. // defer un(trace("ranker"))
  33. parts := [2]string{"", ""}
  34. if nil != entry {
  35. if nil != entry.Content {
  36. parts[0] = entry.Content.Body
  37. }
  38. parts[1] = entry.Title.Body
  39. }
  40. rank := 0
  41. for _, term := range terms {
  42. if strings.HasPrefix(term, "#") {
  43. t := term[1:]
  44. for _, cat := range entry.Categories {
  45. if idx, _ := matcher.IndexString(cat.Term, t); idx >= 0 {
  46. rank += 5
  47. }
  48. }
  49. }
  50. for weight, txt := range parts {
  51. if idx, _ := matcher.IndexString(txt, term); idx >= 0 {
  52. rank += 1 + weight
  53. }
  54. }
  55. }
  56. return rank
  57. }
  58. func (app *Server) handleSearch() http.HandlerFunc {
  59. return func(w http.ResponseWriter, r *http.Request) {
  60. now := time.Now()
  61. // evtl. check permission to search (non-logged-in visitor?)
  62. if !app.cfg.IsConfigured() {
  63. http.Redirect(w, r, cgiName+"/config", http.StatusPreconditionFailed)
  64. return
  65. }
  66. switch r.Method {
  67. case http.MethodGet:
  68. app.KeepAlive(w, r, now)
  69. // pull out parameters q, offset, limit
  70. query := r.URL.Query()
  71. if q := query["q"]; q != nil && 0 < len(q) {
  72. terms := strings.Fields(strings.TrimSpace(strings.Join(q, " ")))
  73. if 0 == len(terms) {
  74. http.Redirect(w, r, path.Join("..", "..", uriPub, uriPosts)+"/", http.StatusFound)
  75. return
  76. }
  77. limit := max(1, app.cfg.LinksPerPage)
  78. offset := 0
  79. if o := query["offset"]; o != nil {
  80. offset, _ = strconv.Atoi(o[0]) // just ignore conversion errors. 0 is a fine fallback
  81. }
  82. qu := cgiName + "/search/" + "?" + "q" + "=" + url.QueryEscape(strings.Join(terms, " "))
  83. catScheme := Iri(app.url.ResolveReference(mustParseURL(path.Join(uriPub, uriTags))).String() + "/")
  84. feed, _ := LoadFeed()
  85. lang := language.Make("de") // todo: should come from the entry, feed, settings, default (in that order)
  86. matcher := search.New(lang, search.IgnoreDiacritics, search.IgnoreCase)
  87. ret := feed.Search(func(entry *Entry) int { return rankEntryTerms(entry, terms, matcher) })
  88. ret.XmlBase = Iri(app.url.String())
  89. ret.Id = Id(app.url.ResolveReference(mustParseURL(qu)).String())
  90. ret.Generator = &Generator{Uri: myselfNamespace, Version: version + "+" + GitSHA1, Body: "🌺 ShaarliGo"}
  91. ret.XmlNSShaarliGo = myselfNamespace
  92. ret.SearchTerms = strings.Join(q, " ") // rather use http://www.opensearch.org/Specifications/OpenSearch/1.1#Example_of_OpenSearch_response_elements_in_Atom_1.0
  93. ret.XmlNSOpenSearch = "http://a9.com/-/spec/opensearch/1.1/"
  94. // paging / RFC5005
  95. clamp := func(x int) int { return min(len(ret.Entries), x) }
  96. offset = clamp(max(0, offset))
  97. count := len(ret.Entries)
  98. ret.Links = append(ret.Links, Link{Rel: relSelf, Href: qu + "&" + "offset" + "=" + strconv.Itoa(offset), Title: strconv.Itoa(1 + offset/limit)})
  99. if count > limit {
  100. ret.Links = append(ret.Links, Link{Rel: relFirst, Href: qu, Title: strconv.Itoa(1 + 0)})
  101. ret.Links = append(ret.Links, Link{Rel: relLast, Href: qu + "&" + "offset" + "=" + strconv.Itoa(count-(count%limit)), Title: strconv.Itoa(1 + count/limit)})
  102. if intPrev := offset - limit; intPrev >= 0 {
  103. ret.Links = append(ret.Links, Link{Rel: relPrevious, Href: qu + "&" + "offset" + "=" + strconv.Itoa(intPrev), Title: strconv.Itoa(1 + intPrev/limit)})
  104. }
  105. if intNext := offset + limit; intNext < count {
  106. ret.Links = append(ret.Links, Link{Rel: relNext, Href: qu + "&" + "offset" + "=" + strconv.Itoa(intNext), Title: strconv.Itoa(1 + intNext/limit)})
  107. }
  108. ret.Entries = ret.Entries[offset:clamp(offset+limit)]
  109. }
  110. // prepare entries for Atom publication
  111. for _, item := range ret.Entries {
  112. // change entries for output but don't save the change:
  113. selfURL := mustParseURL(path.Join(uriPub, uriPosts, string(item.Id)) + "/")
  114. editURL := strings.Join([]string{cgiName, "?post=", selfURL.String()}, "")
  115. item.Id = Id(app.url.ResolveReference(selfURL).String()) // expand XmlBase as required by https://validator.w3.org/feed/check.cgi?url=
  116. item.Links = append(item.Links,
  117. Link{Rel: relSelf, Href: selfURL.String()},
  118. Link{Rel: relEdit, Href: editURL},
  119. )
  120. for i := range item.Categories {
  121. item.Categories[i].Scheme = catScheme
  122. }
  123. if item.Updated.IsZero() {
  124. item.Updated = item.Published
  125. }
  126. if item.Updated.After(ret.Updated) {
  127. ret.Updated = item.Updated
  128. }
  129. }
  130. ret.Categories = AggregateCategories(ret.Entries)
  131. if ret.Updated.IsZero() {
  132. ret.Updated = iso8601(now)
  133. }
  134. w.Header().Set("Content-Type", "text/xml; charset=utf-8")
  135. enc := xml.NewEncoder(w)
  136. enc.Indent("", " ")
  137. if err := xmlEncodeWithXslt(ret, "../../assets/"+app.cfg.Skin+"/posts.xslt", enc); err == nil {
  138. if err := enc.Flush(); err == nil {
  139. return
  140. }
  141. }
  142. }
  143. }
  144. }
  145. }
  146. func min(x, y int) int {
  147. if x < y {
  148. return x
  149. }
  150. return y
  151. }
  152. func max(x, y int) int {
  153. if x > y {
  154. return x
  155. }
  156. return y
  157. }
  158. func (feed Feed) Search(ranker func(*Entry) int) Feed {
  159. defer un(trace("Feed.Search"))
  160. feed.Entries = searchEntries(feed.Entries, ranker)
  161. return feed
  162. }
  163. type search_results struct {
  164. Ranks []int
  165. Entries []*Entry
  166. }
  167. func (r search_results) Len() int { return len(r.Ranks) }
  168. func (r search_results) Less(i, j int) bool {
  169. if r.Ranks[i] == r.Ranks[j] {
  170. return ByPublishedDesc(r.Entries).Less(i, j)
  171. }
  172. return r.Ranks[i] > r.Ranks[j]
  173. }
  174. func (r search_results) Swap(i, j int) {
  175. r.Ranks[i], r.Ranks[j] = r.Ranks[j], r.Ranks[i]
  176. r.Entries[i], r.Entries[j] = r.Entries[j], r.Entries[i]
  177. }
  178. func searchEntries(entries []*Entry, ranker func(*Entry) int) []*Entry {
  179. r := search_results{
  180. Ranks: make([]int, len(entries)),
  181. Entries: entries,
  182. }
  183. // could be concurrent:
  184. for idx, ent := range entries {
  185. r.Ranks[idx] = ranker(ent)
  186. }
  187. // sort entries according to rank
  188. sort.Sort(r)
  189. cut := sort.Search(len(r.Ranks), func(idx int) bool { return r.Ranks[idx] <= 0 })
  190. return r.Entries[0:cut]
  191. }
  192. //