votes-pdf2xml.sh 862 B

123456789101112131415161718192021222324
  1. #!/bin/sh
  2. cd "$(dirname "${0}")"
  3. rapper -v >/dev/null || { echo "Install raptor-utils" 1>&2 && exit 1; }
  4. ruby -v >/dev/null || { echo "Install ruby" 1>&2 && exit 1; }
  5. pdftotext -v 2>/dev/null || { echo "Install poppler" 1>&2 && exit 1; }
  6. xmllint --version 2>/dev/null || { echo "Install libxml2" 1>&2 && exit 1; }
  7. build="build"
  8. for pdf in "${build}/"*.pdf
  9. do
  10. day="${build}/$(basename "${pdf}" .pdf)"
  11. echo "${day}"
  12. pdftotext "${pdf}"
  13. ruby pdftotext-postprocess.rb < "${day}.txt" \
  14. | xmllint --output "${day}.xml" --format --encode utf-8 -
  15. ruby votes-xml-to-ttl.rb < "${day}.xml" > "${day}.ttl~"
  16. rapper --input turtle --output turtle "${day}.ttl~" "http://www.europarl.europa.eu/" > "${day}.ttl"
  17. rapper --input turtle --output rdfxml-abbrev "${day}.ttl~" "http://www.europarl.europa.eu/" > "${day}.rdf"
  18. done