http.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. //
  2. // Copyright (C) 2017-2019 Marcus Rohrmoser, http://purl.mro.name/ShaarliGo
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. //
  17. package main
  18. import (
  19. "compress/gzip"
  20. "io"
  21. "log"
  22. "net/http"
  23. "net/url"
  24. "strings"
  25. "time"
  26. "github.com/yhat/scrape"
  27. "golang.org/x/net/html"
  28. "golang.org/x/net/html/atom"
  29. )
  30. func contains(haystack []string, needle string) bool {
  31. for _, s := range haystack {
  32. if needle == s {
  33. return true
  34. }
  35. }
  36. return false
  37. }
  38. func HttpGetBody(url *url.URL, timeout time.Duration) (io.Reader, error) {
  39. defer un(trace(strings.Join([]string{"HttpGetBody", url.String()}, " ")))
  40. client := &http.Client{Timeout: timeout}
  41. req, _ := http.NewRequest("GET", url.String(), nil)
  42. req.Header.Set("Accept-Encoding", "gzip, deflate")
  43. req.Header.Set("User-Agent", strings.Join([]string{myselfNamespace, version}, ""))
  44. if resp, err := client.Do(req); nil == resp && nil != err {
  45. return nil, err
  46. } else {
  47. encs := resp.Header["Content-Encoding"]
  48. switch {
  49. case contains(encs, "gzip"), contains(encs, "deflate"):
  50. return gzip.NewReader(resp.Body)
  51. case 0 == len(encs):
  52. // NOP
  53. default:
  54. log.Printf("Strange compression: %s\n", encs)
  55. }
  56. return resp.Body, err
  57. }
  58. }
  59. func formValuesFromReader(r io.Reader, name string) (ret url.Values, err error) {
  60. root, err := html.Parse(r) // assumes r is UTF8
  61. if err != nil {
  62. return ret, err
  63. }
  64. for _, form := range scrape.FindAll(root, func(n *html.Node) bool {
  65. return atom.Form == n.DataAtom &&
  66. (name == scrape.Attr(n, "name") || name == scrape.Attr(n, "id"))
  67. }) {
  68. ret := url.Values{}
  69. for _, inp := range scrape.FindAll(form, func(n *html.Node) bool {
  70. return atom.Input == n.DataAtom || atom.Textarea == n.DataAtom
  71. }) {
  72. n := scrape.Attr(inp, "name")
  73. if n == "" {
  74. n = scrape.Attr(inp, "id")
  75. }
  76. ty := scrape.Attr(inp, "type")
  77. v := scrape.Attr(inp, "value")
  78. if atom.Textarea == inp.DataAtom {
  79. v = scrape.Text(inp)
  80. } else if v == "" && ty == "checkbox" {
  81. v = scrape.Attr(inp, "checked")
  82. }
  83. ret.Set(n, v)
  84. }
  85. return ret, err // return on first occurrence
  86. }
  87. return ret, err
  88. }