pull.sh 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #!/bin/sh
  2. cd "$(dirname "${0}")" || exit 1
  3. #
  4. # See
  5. # - https://events.ccc.de/congress/2018/wiki/Static:Crawling
  6. # - https://code.mro.name/mro/35c3/
  7. #
  8. USER_AGENT="https://mro.github.io/35c3"
  9. dir=Fahrplan
  10. year=2018
  11. # url="https://${dir}.events.ccc.de/congress/2015/${dir}/version"
  12. url="https://${dir}.events.ccc.de/congress/${year}/${dir}/version"
  13. dst="${dir}.version"
  14. {
  15. curl --output "${dir}/schedule.xml" --location "https://fahrplan.events.ccc.de/congress/${year}/${dir}/schedule.xml"
  16. {
  17. echo '<?xml-stylesheet type="text/xsl" href="../assets/schedule2html.xslt"?>'
  18. grep -vF "<?xml version=" "${dir}/schedule.xml"
  19. } | xmllint --output "${dir}"/schedule2.xml --relaxng assets/schedule.rng --format --encode utf-8 -
  20. sed -i -e "s|<url>https://fahrplan.events.ccc.de/congress/${year}/Fahrplan/events/|<url>./events/|g" "${dir}"/schedule2.xml
  21. }
  22. for evt in $(grep -F '<url>' Fahrplan/schedule2.xml | grep -hoE '[0-9]+' | sort -n)
  23. do
  24. dst_evt="${dir}/events/${evt}.html"
  25. url_evt="https://fahrplan.events.ccc.de/congress/${year}/${dst_evt}"
  26. echo "${url_evt}"
  27. curl --silent --max-time 3 --create-dirs --location --remote-time --time-cond "${dst_evt}" --output "${dst_evt}" "${url_evt}"
  28. done
  29. {
  30. curl --output "${dir}/schedule.json" --location "https://fahrplan.events.ccc.de/congress/${year}/${dir}/schedule.json"
  31. ruby -ryaml -e "puts YAML::dump(YAML::load(STDIN.read))" < "${dir}/schedule.json" > "${dir}/schedule.yaml"
  32. }
  33. curl --silent --location --remote-time --output "${dst}" --time-cond "${dst}" --user-agent "${USER_AGENT}" "${url}" && {
  34. url="$(grep -F "URL: " < "${dst}" | cut -d ' ' -f 2)"
  35. dst="${dir}.tar.gz"
  36. curl --silent --location --remote-time --output "${dst}" --time-cond "${dst}" --user-agent "${USER_AGENT}" "${url}" && {
  37. rm -rf "${dir}"
  38. tar -xzf "${dst}" && mv 35c3 "${dir}"
  39. sed -i -e "s:/congress/${year}/${dir}/:./:g" "${dir}"/*.html
  40. sed -i -e "s:/congress/${year}/${dir}/:../:g" "${dir}"/*/*.html
  41. {
  42. echo '<?xml-stylesheet type="text/xsl" href="../assets/schedule2html.xslt"?>'
  43. grep -vF "<?xml version=" "${dir}/schedule.xml"
  44. } | xmllint --output "${dir}"/schedule2.xml --relaxng assets/schedule.rng --format --encode utf-8 -
  45. # xsltproc --output "${dir}"/schedule2.html~ assets/schedule2html.xslt "${dir}"/schedule.xml
  46. # xmllint --output "${dir}"/schedule2.html --format --encode utf-8 "${dir}"/schedule2.html~
  47. # rm "${dir}"/schedule2.html~
  48. # add a manifest for offline caching?
  49. touch "${dir}/index.manifest"
  50. }
  51. }
  52. for part in everything workshops
  53. do
  54. dst="${dir}/${part}.schedule.xml"
  55. url="https://${dir}.events.ccc.de/congress/${year}/${dst}"
  56. curl --silent --location --remote-time --output "${dst}" --time-cond "${dst}" --user-agent "${USER_AGENT}" "${url}" && {
  57. {
  58. echo '<?xml-stylesheet type="text/xsl" href="../assets/schedule2html.xslt"?>'
  59. grep -vF "<?xml version=" "${dst}"
  60. } | xmllint --output "${dir}"/${part}.schedule2.xml --relaxng assets/schedule.sloppy.rng --format --encode utf-8 -
  61. }
  62. done
  63. # purge images and other binaries with few benefit but large footprint
  64. find . \( -name "*.gif" -o -name "*.jpeg" -o -name "*.jpg" -o -name "*.JPG" -o -name "*.mp4" -o -name "*.odp" -o -name "*.pdf" -o -name "*.png" -o -name "*.PNG" \) -exec rm "{}" \;
  65. git add . && git commit -a -m '🚀'
  66. dir="wiki"
  67. # url="https://events.ccc.de/congress/2015/${dir}/version"
  68. url="https://events.ccc.de/congress/${year}/${dir}/version"
  69. dst="${dir}.version"
  70. curl --silent --location --remote-time --output "${dst}" --time-cond "${dst}" --user-agent "${USER_AGENT}" "${url}" && {
  71. url="$(grep -F "URL: " < "${dst}" | cut -d ' ' -f 2)"
  72. dst="${dir}.tbz"
  73. curl --silent --location --remote-time --output "${dst}" --time-cond "${dst}" --user-agent "${USER_AGENT}" "${url}" && {
  74. rm -rf "${dir}"
  75. tar -xjf "${dst}"
  76. }
  77. }
  78. sh wiki.sh
  79. git add . && git commit -a -m '🐳'