comments-clean.sh 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. #!/bin/sh
  2. cd "$(dirname "${0}")" || exit 1
  3. # ls -d public/????/??/*
  4. for raw in content/posts/*/feed/index.raw
  5. do
  6. echo "${raw}"
  7. dst="$(dirname "${raw}")/$(basename "${raw}" .raw)"
  8. cat > "${dst}.1" <<EOF
  9. <?xml-stylesheet type='text/xsl' href='../../../../assets/comments.xslt'?>
  10. <!--
  11. https://developer.mozilla.org/en/docs/XSL_Transformations_in_Mozilla_FAQ#Why_isn.27t_my_stylesheet_applied.3F
  12. Caution! Firefox ignores your XSLT stylesheet if your XML looks like an RSS or Atom feed. A typical workaround is to insert an XML comment at the beginning of your XML file to move the <fEEd or <rsS tag out of the first 512 bytes used by Firefox to guess whether it is a feed or not.
  13. See also the discussion at https://bugzilla.mozilla.org/show_bug.cgi?id=338621#c72.
  14. For best results, serve both atom feed and xslt as 'text/xml' or 'application/xml' without charset specified.
  15. -->
  16. EOF
  17. xmllint --format --encode utf-8 "${raw}" | tail -n +2 \
  18. | sed 's/<p>//g' \
  19. | sed 's|</p>|\&#xA;|g' \
  20. | sed 's|<br />|\&#xA;|g' \
  21. | sed 's|&#xA;||g' \
  22. | sed 's|&gt;|>|g' \
  23. | sed 's|&lt;|<|g' \
  24. | sed -E 's|<content[^>]+>|<content type="text">|g' \
  25. | sed 's|&#8211;|—|g' \
  26. | sed 's|&#8216;|´|g' \
  27. | sed "s|&#8217;|'|g" \
  28. | sed 's|&#8218;|\`|g' \
  29. | sed 's|&#8230;|…|g' \
  30. | sed 's|&#8222;|„|g' \
  31. | sed 's|&#8220;|“|g' \
  32. | sed 's|http://old.blog.mro.name/|https://mro.name/blog/|g' \
  33. | sed 's|/feed/atom/|/feed/index.xml|g' \
  34. | sed -E 's|<a href="([^"]+)" rel="nofollow">[^<]+</a>|\1|g' \
  35. | grep -vF '<title>' \
  36. | grep -vF '/comment-page-1/#comment-' \
  37. > "${dst}.2"
  38. cat "${dst}.1" "${dst}.2" | xmllint --format --encode utf-8 --output "${dst}.xml" -
  39. touch -r "${raw}" "${dst}.xml"
  40. # if [ 0 -eq "$(grep -cF '<entry' "${dst}.xml")" ] ; then
  41. # rm "${dst}.xml"
  42. # fi
  43. done
  44. perl -0777 -i.original -pe 's/\s+\]\]>/]]>/gs' content/posts/*/feed/index.xml
  45. rm content/posts/*/feed/index.1 content/posts/*/feed/index.2
  46. ls -l content/posts/*/feed/index.xml