Browse Source

Port to Dune

Thibaut Mattio 9 months ago
parent
commit
4dc23fd3bd
37 changed files with 815 additions and 9441 deletions
  1. 52 0
      .github/workflows/ci.yml
  2. 9 7
      .gitignore
  3. 0 10
      .merlin
  4. 4 0
      .ocamlformat
  5. 0 1
      LICENSE
  6. 32 30
      Makefile
  7. 54 7
      README.md
  8. 0 35
      _oasis
  9. 0 44
      _tags
  10. 0 27
      configure
  11. 36 0
      dune-project
  12. 31 0
      example/aggregate_feeds.ml
  13. 3 0
      example/dune
  14. 0 9
      examples/data_blog.txt
  15. 0 304
      examples/ocl_planet.ml
  16. 0 37
      examples/write_atom.ml
  17. 0 12
      lib/META
  18. 4 0
      lib/dune
  19. 40 0
      lib/feed.ml
  20. 73 0
      lib/http.ml
  21. 9 3
      lib/http.mli
  22. 80 0
      lib/meta.ml
  23. 273 0
      lib/post.ml
  24. 0 81
      lib/ri_feeds.ml
  25. 0 37
      lib/ri_feeds.mli
  26. 0 101
      lib/ri_http.ml
  27. 0 263
      lib/ri_posts.ml
  28. 0 35
      lib/ri_posts.mli
  29. 18 7
      lib/river.ml
  30. 0 8
      lib/river.mldylib
  31. 58 53
      lib/river.mli
  32. 0 8
      lib/river.mllib
  33. 3 16
      lib/util.ml
  34. 0 895
      myocamlbuild.ml
  35. 0 26
      opam
  36. 36 0
      river.opam
  37. 0 7385
      setup.ml

+ 52 - 0
.github/workflows/ci.yml

@@ -0,0 +1,52 @@
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build-and-test:
+    strategy:
+      fail-fast: false
+
+      matrix:
+        os:
+          - macos-latest
+          - ubuntu-latest
+          - windows-latest
+
+        ocaml-compiler:
+          - 4.12.x
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Use OCaml ${{ matrix.ocaml-compiler }}
+        uses: ocaml/setup-ocaml@v2
+        with:
+          ocaml-compiler: ${{ matrix.ocaml-compiler }}
+          dune-cache: ${{ matrix.os != 'macos-latest' }}
+
+      - name: Install ocamlformat
+        run: opam install ocamlformat.0.18.0
+        if: ${{ matrix.os == 'ubuntu-latest' }}
+
+      - name: Install opam packages
+        run: opam install . --with-test
+
+      - name: Check formatting
+        run: make fmt
+        if: ${{ matrix.os == 'ubuntu-latest' && always() }}
+
+      - name: Run build
+        run: make build
+
+      - name: Run the unit tests
+        run: make test
+        timeout-minutes: 1

+ 9 - 7
.gitignore

@@ -1,7 +1,9 @@
-*.native
-_build
-*~
-*.swp
-setup.data
-setup.log
-index.html
+# Dune generated files
+_build/
+*.install
+
+# Merlin configuring file for Vim and Emacs
+.merlin
+
+# Local OPAM switch
+_opam/

+ 0 - 10
.merlin

@@ -1,10 +0,0 @@
-S lib
-
-B _build/lib
-
-PKG lwt
-PKG ptime.clock.os
-PKG syndic
-PKG cohttp
-PKG cow
-PKG netstring

+ 4 - 0
.ocamlformat

@@ -0,0 +1,4 @@
+version = 0.20.1
+profile = conventional
+parse-docstrings = true
+wrap-comments = true

+ 0 - 1
LICENSE

@@ -11,4 +11,3 @@ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-

+ 32 - 30
Makefile

@@ -1,41 +1,43 @@
-# OASIS_START
-# DO NOT EDIT (digest: a3c674b4239234cbbe53afe090018954)
-
-SETUP = ocaml setup.ml
-
-build: setup.data
-	$(SETUP) -build $(BUILDFLAGS)
-
-doc: setup.data build
-	$(SETUP) -doc $(DOCFLAGS)
-
-test: setup.data build
-	$(SETUP) -test $(TESTFLAGS)
+.DEFAULT_GOAL := all
 
+.PHONY: all
 all:
-	$(SETUP) -all $(ALLFLAGS)
+	opam exec -- dune build --root . @install
 
-install: setup.data
-	$(SETUP) -install $(INSTALLFLAGS)
+.PHONY: deps
+deps: ## Install development dependencies
+	opam install -y dune-release ocamlformat utop ocaml-lsp-server
+	opam install --deps-only --with-test --with-doc -y .
 
-uninstall: setup.data
-	$(SETUP) -uninstall $(UNINSTALLFLAGS)
+.PHONY: create_switch
+create_switch: ## Create an opam switch without any dependency
+	opam switch create . --no-install -y
 
-reinstall: setup.data
-	$(SETUP) -reinstall $(REINSTALLFLAGS)
+.PHONY: switch
+switch: ## Create an opam switch and install development dependencies
+	opam install . --deps-only --with-doc --with-test
+	opam install -y dune-release ocamlformat utop ocaml-lsp-server
 
-clean:
-	$(SETUP) -clean $(CLEANFLAGS)
+.PHONY: build
+build: ## Build the project, including non installable libraries and executables
+	opam exec -- dune build --root .
 
-distclean:
-	$(SETUP) -distclean $(DISTCLEANFLAGS)
+.PHONY: test
+test: ## Run the unit tests
+	opam exec -- dune runtest --root .
 
-setup.data:
-	$(SETUP) -configure $(CONFIGUREFLAGS)
+.PHONY: clean
+clean: ## Clean build artifacts and other generated files
+	opam exec -- dune clean --root .
 
-configure:
-	$(SETUP) -configure $(CONFIGUREFLAGS)
+.PHONY: doc
+doc: ## Generate odoc documentation
+	opam exec -- dune build --root . @doc
 
-.PHONY: build doc test all install uninstall reinstall clean distclean configure
+.PHONY: fmt
+fmt: ## Format the codebase with ocamlformat
+	opam exec -- dune build --root . --auto-promote @fmt
 
-# OASIS_STOP
+.PHONY: watch
+watch: ## Watch for the filesystem and rebuild on every change
+	opam exec -- dune build --root . --watch

+ 54 - 7
README.md

@@ -1,11 +1,58 @@
 # River
 
-A library for aggregating RSS2 and Atom feeds in OCaml.
+[![Actions Status](https://github.com/tmattio/river/workflows/CI/badge.svg)](https://github.com/tmattio/river/actions)
 
-Features:
+RSS2 and Atom feed aggregator for OCaml
 
-* Performs deduplication.
-* Supports pagination and generating well-formed html prefix snippets.
-* Support for generating aggregate feeds.
-* Sorts the posts from most recent to oldest.
-* Depends on ocamlnet for html parsing.
+
+## Features
+
+- Performs deduplication.
+- Supports pagination and generating well-formed html prefix snippets.
+- Support for generating aggregate feeds.
+- Sorts the posts from most recent to oldest.
+- Depends on ocamlnet for html parsing.
+
+## Installation
+
+```bash
+opam install river
+```
+
+## Usage
+
+Here's an example program that aggregates the feeds from different sources:
+
+```ocaml
+let sources =
+  River.
+    [
+      { name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" };
+      {
+        name = "Amir Chaudhry";
+        url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml";
+      };
+    ]
+
+let () =
+  let feeds = List.map River.fetch sources in
+  let posts = River.posts feeds in
+  let entries = River.create_atom_entries posts in
+  let feed =
+    let authors = [ Syndic.Atom.author "OCaml Blog" ] in
+    let id = Uri.of_string "https://ocaml.org/atom.xml" in
+    let links = [ Syndic.Atom.link ~rel:Self id ] in
+    let title : Syndic.Atom.text_construct =
+      Text "OCaml Blog: Read the latest OCaml news from the community."
+    in
+    let updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get in
+    Syndic.Atom.feed ~authors ~links ~id ~title ~updated entries
+  in
+  let out_channel = open_out "example/atom.xml" in
+  Syndic.Atom.output feed (`Channel out_channel);
+  close_out out_channel
+```
+
+## Contributing
+
+Take a look at our [Contributing Guide](CONTRIBUTING.md).

+ 0 - 35
_oasis

@@ -1,35 +0,0 @@
-Name:         river
-Version:      0.1.3
-Synopsis:     A RSS/Atom feed aggregator
-Authors:      KC Sivaramakrishnan
-License:      ISC
-BuildTools:   ocamlbuild
-OASISFormat:  0.4
-Plugins:      Meta (0.4), DevFiles (0.4)
-
-Flag examples
-  Description: build the examples
-  Default: false
-
-Library "river"
-  Path:             lib/
-  Findlibname:      river
-  InternalModules:  Ri_http, Ri_feeds, Ri_posts, Ri_utils
-  Modules:          River
-  BuildDepends:     lwt, syndic, netstring, cohttp.lwt, ptime.clock.os
-
-Executable "ocl-planet"
-  Path:             examples/
-  MainIs:           ocl_planet.ml
-  Build$:           flag(examples)
-  CompiledObject:   best
-  Install:          false
-  BuildDepends:     netstring, syndic, river
-
-Executable "write-atom"
-  Path:             examples/
-  MainIs:           write_atom.ml
-  Build$:           flag(examples)
-  CompiledObject:   best
-  Install:          false
-  BuildDepends:     netstring, syndic, river

+ 0 - 44
_tags

@@ -1,44 +0,0 @@
-# OASIS_START
-# DO NOT EDIT (digest: 4199c91b63001772b6357e13b2e1221e)
-# Ignore VCS directories, you can use the same kind of rule outside
-# OASIS_START/STOP if you want to exclude directories that contains
-# useless stuff for the build process
-true: annot, bin_annot
-<**/.svn>: -traverse
-<**/.svn>: not_hygienic
-".bzr": -traverse
-".bzr": not_hygienic
-".hg": -traverse
-".hg": not_hygienic
-".git": -traverse
-".git": not_hygienic
-"_darcs": -traverse
-"_darcs": not_hygienic
-# Library river
-"lib/river.cmxs": use_river
-<lib/*.ml{,i,y}>: pkg_cohttp.lwt
-<lib/*.ml{,i,y}>: pkg_lwt
-<lib/*.ml{,i,y}>: pkg_netstring
-<lib/*.ml{,i,y}>: pkg_ptime.clock.os
-<lib/*.ml{,i,y}>: pkg_syndic
-# Executable ocl-planet
-<examples/ocl_planet.{native,byte}>: pkg_cohttp.lwt
-<examples/ocl_planet.{native,byte}>: pkg_lwt
-<examples/ocl_planet.{native,byte}>: pkg_netstring
-<examples/ocl_planet.{native,byte}>: pkg_ptime.clock.os
-<examples/ocl_planet.{native,byte}>: pkg_syndic
-<examples/ocl_planet.{native,byte}>: use_river
-# Executable write-atom
-<examples/write_atom.{native,byte}>: pkg_cohttp.lwt
-<examples/write_atom.{native,byte}>: pkg_lwt
-<examples/write_atom.{native,byte}>: pkg_netstring
-<examples/write_atom.{native,byte}>: pkg_ptime.clock.os
-<examples/write_atom.{native,byte}>: pkg_syndic
-<examples/write_atom.{native,byte}>: use_river
-<examples/*.ml{,i,y}>: pkg_cohttp.lwt
-<examples/*.ml{,i,y}>: pkg_lwt
-<examples/*.ml{,i,y}>: pkg_netstring
-<examples/*.ml{,i,y}>: pkg_ptime.clock.os
-<examples/*.ml{,i,y}>: pkg_syndic
-<examples/*.ml{,i,y}>: use_river
-# OASIS_STOP

+ 0 - 27
configure

@@ -1,27 +0,0 @@
-#!/bin/sh
-
-# OASIS_START
-# DO NOT EDIT (digest: dc86c2ad450f91ca10c931b6045d0499)
-set -e
-
-FST=true
-for i in "$@"; do
-  if $FST; then
-    set --
-    FST=false
-  fi
-
-  case $i in
-    --*=*)
-      ARG=${i%%=*}
-      VAL=${i##*=}
-      set -- "$@" "$ARG" "$VAL"
-      ;;
-    *)
-      set -- "$@" "$i"
-      ;;
-  esac
-done
-
-ocaml setup.ml -configure "$@"
-# OASIS_STOP

+ 36 - 0
dune-project

@@ -0,0 +1,36 @@
+(lang dune 2.0)
+
+(name river)
+
+(documentation "https://tmattio.github.io/river/")
+
+(source
+ (github kayceesrk/river))
+
+(license MIT)
+
+(authors "KC Sivaramakrishnan <sk826@cl.cam.ac.uk>")
+
+(maintainers "KC Sivaramakrishnan <sk826@cl.cam.ac.uk>")
+
+(generate_opam_files true)
+
+(package
+ (name river)
+ (synopsis "RSS2 and Atom feed aggregator for OCaml")
+ (description "RSS2 and Atom feed aggregator for OCaml")
+ (depends
+  (ocaml
+   (>= 4.08.0))
+  dune
+  (syndic
+   (>= 1.5))
+  (cohttp
+   (>= 5.0.0))
+  (cohttp-lwt
+   (>= 5.0.0))
+  (cohttp-lwt-unix
+   (>= 5.0.0))
+  ptime
+  lwt
+  (odoc :with-doc)))

+ 31 - 0
example/aggregate_feeds.ml

@@ -0,0 +1,31 @@
+let sources =
+  River.
+    [
+      { name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" };
+      {
+        name = "Amir Chaudhry";
+        url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml";
+      };
+    ]
+
+let main () =
+  let feeds = List.map River.fetch sources in
+  let posts = River.posts feeds in
+  let entries = River.create_atom_entries posts in
+  let feed =
+    let authors = [ Syndic.Atom.author "OCaml Blog" ] in
+    let id = Uri.of_string "https://ocaml.org/atom.xml" in
+    let links = [ Syndic.Atom.link ~rel:Self id ] in
+    let title : Syndic.Atom.text_construct =
+      Text "OCaml Blog: Read the latest OCaml news from the community."
+    in
+    let updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get in
+    Syndic.Atom.feed ~authors ~links ~id ~title ~updated entries
+  in
+  let out_channel = open_out "example/atom.xml" in
+  Syndic.Atom.output feed (`Channel out_channel);
+  close_out out_channel
+
+let () =
+  Printexc.record_backtrace true;
+  main ()

+ 3 - 0
example/dune

@@ -0,0 +1,3 @@
+(executable
+ (name aggregate_feeds)
+ (libraries river))

+ 0 - 9
examples/data_blog.txt

@@ -1,9 +0,0 @@
-KC Sivaramakrishnan|http://kcsrk.info/atom-ocaml.xml
-Anil Madhavapeddy|http://anil.recoil.org/feeds/atom-ocaml.xml
-Amir Chaudhry|http://amirchaudhry.com/tags/ocamllabs-atom.xml
-Compiler Hacking|http://ocamllabs.github.io/compiler-hacking/rss.xml
-OCL Monthly News|http://www.cl.cam.ac.uk/projects/ocamllabs/news/atom.xml
-Heidi Howard|http://hh360.user.srcf.net/blog/category/pl/ocaml/feed/
-Thomas Leonard|http://roscidus.com/blog/atom.xml
-Mirage OS|http://openmirage.org/blog/atom.xml
-SRG Syslog|http://www.syslog.cl.cam.ac.uk/tag/ocamllabs/feed/atom

+ 0 - 304
examples/ocl_planet.ml

@@ -1,304 +0,0 @@
-(*
- * Copyright (c) 2014, OCaml.org project
- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-open River
-open Nethtml
-open Printf
-
-let mk_recent ~date ~url ~author ~title : string =
-  sprintf
-"<tr>
-    <td><i> %s </i></td>
-    <td><a href=\"#%s\">%s</a></td>
-    <td>%s</td>
- </tr>
-" date url title author
-
-let mk_post ~url ~title ~blog_url ~blog_title ~blog_name ~author
-            ~date ~content : string =
-  sprintf
-"<div class=\"channelgroup\">
-  <div class=\"entrygroup\" id=\"%s\">
-    <a name=\"#%s\"> </a>
-    <h1 class=\"posttitle\">
-      <a href=\"%s\">%s</a>
-      (<a href=\"%s\" title=\"%s\">%s</a>)
-    </h1>
-    <hr/>
-    <div class=\"entry\">
-      <div class=\"content\">
-        <div>
-          %s
-        </div>
-      </div>
-      <div>
-      <p class=\"date\">
-        <a href=\"%s\">by %s at %s </a>
-      </p>
-      </div>
-    </div>
-  </div>
-</div>
-" url url url title blog_url blog_title blog_name content url author date
-
-let mk_post_with_face ~url ~title ~blog_url ~blog_title ~blog_name ~author
-                      ~date ~content ~face ~face_height : string =
-  sprintf
-"<div class=\"channelgroup\">
-  <div class=\"entrygroup\" id=\"%s\">
-    <a name=\"#%s\"> </a>
-    <div>
-    <img style=\"float:right; padding-left: 20px;\" class=\"face\" src=\"%s\" width=\"\" height=\"%d\" alt=\"\" />
-    <h1 class=\"posttitle\">
-      <a href=\"%s\">%s</a>
-      (<a href=\"%s\" title=\"%s\">%s</a>)
-    </h1>
-    </div>
-    <hr/>
-    <div class=\"entry\">
-      <div class=\"content\">
-        <div>
-          %s
-        </div>
-      </div>
-      <div>
-      <p class=\"date\">
-        <a href=\"%s\">by %s at %s </a>
-      </p>
-      </div>
-    </div>
-  </div>
-</div>
-" url url face face_height url title blog_url blog_title blog_name content url author date
-
-let mk_body ~recentList ~postList : string =
-"<head> <title>Blogs</title>
-  <link rel=\"alternate\" href=\"http://www.cl.cam.ac.uk/projects/ocamllabs/blogs/rss10.xml\" title=\"\" type=\"application/rss+xml\" />
-  <style>
-      a.icon-github {
-    background: url(../github.png) no-repeat 0 0;
-          background: url(../github.png) no-repeat 0 0;
-    padding: 0 0 2px 2em;
-      }
-      a.icon-cloud {
-    background: url(../cloud.png) no-repeat 0 0;
-          background-size: 17px;
-    padding: 0 0 2px 2em;
-      }
-      a.icon-bullhorn {
-    background: url(../bullhorn.png) no-repeat 0 0;
-          background-size: 17px;
-    padding: 0 0 2px 2em;
-      }
-      a.icon-wrench {
-    background: url(../wrench.png) no-repeat 0 0;
-          background-size: 17px;
-    padding: 0 0 2px 2em;
-      }
-      h2.posttitle {
-          font-size: 120%;
-      }
-  div.toc {
-      background-color: rgb(239, 239, 239);
-      margin: 0.5em 0em 1.5em 1px;
-      border: 1px solid black;
-      font-size: 0.7em;
-      padding: 0px 0px 1ex;
-      font-size: 100%;
-  }
-    a.planet-toggle {
-      font-size: 90%;
-      padding: 5px 10px;
-      margin-bottom: 2ex;
-      color: #4b4b4b;
-      background: #e6e6e6;
-      border: 1px solid #dedede;
-    }
-
-    a.planet-toggle:hover, a.planet-toggle:focus {
-      color: #ffffff;
-      background: #c77a27;
-    }
-
-    .btn {
-      display: inline-block;
-      color: #ffffff;
-      *display: inline;
-      /* IE7 inline-block hack */
-
-      *zoom: 1;
-      padding: 10px 20px;
-      margin-bottom: 0;
-      font-family: Lato, sans-serif;
-      font-weight: bold;
-      font-size: 18px;
-      line-height: 28px;
-      text-align: center;
-      vertical-align: middle;
-      cursor: pointer;
-      background: #8eaf20;
-      border: 1px solid #8eaf20;
-      *border: 0;
-      -webkit-border-radius: 4px;
-      -moz-border-radius: 4px;
-      border-radius: 4px;
-      *margin-left: .3em;
-      text-shadow: rgba(0, 0, 0, 0.34) 1px 1px 2px;
-      -webkit-box-shadow: rgba(0, 0, 0, 0.46) 0 2px 2px;
-      -moz-box-shadow: rgba(0, 0, 0, 0.46) 0 2px 2px;
-      box-shadow: rgba(0, 0, 0, 0.46) 0 2px 2px;
-    }
-    .btn:first-child {
-      *margin-left: 0;
-    }
-    .btn:hover,
-    .btn:focus {
-      color: #ffffff;
-      text-decoration: none;
-      background-position: 0 -15px;
-      -webkit-transition: background-position 0.1s linear;
-      -moz-transition: background-position 0.1s linear;
-      -o-transition: background-position 0.1s linear;
-      transition: background-position 0.1s linear;
-    }
-    .btn:focus {
-      outline: none;
-    }
-    .btn.active,
-    .btn:active {
-      background-image: none;
-      outline: 0;
-      -webkit-box-shadow: inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);
-      -moz-box-shadow: inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);
-      box-shadow: inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);
-    }
-    .btn.disabled,
-    .btn[disabled] {
-      cursor: default;
-      background-image: none;
-      opacity: 0.65;
-      filter: alpha(opacity=65);
-      -webkit-box-shadow: none;
-      -moz-box-shadow: none;
-      box-shadow: none;
-    }
-
-  div#content-primary p img, div#content-primary img.right { float: none; }
-
-  </style>
-  <script type = \"text/javascript\">
-    function switchContent(id1,id2) {
-     // Get the DOM reference
-     var contentId1 = document.getElementById(id1);
-     var contentId2 = document.getElementById(id2);
-     // Toggle
-     contentId1.style.display = \"none\";
-     contentId2.style.display = \"block\";
-     }
-  </script>
-  </head>
-
-  <body>
-
-  <div id=\"container\">
-
-  <h4>Recent Posts</h4>
-  <table width=\"90%\">\n" ^ recentList ^
-"</table>
-" ^ postList ^
-" </div>
-  </body>"
-
-let encode_document html = Nethtml.encode ~enc:`Enc_utf8 html
-
-let date_of_post p =
-  match p.date with
-  | None -> "<Date Unknown>"
-  | Some d ->
-       let open Syndic.Date in
-       sprintf "%s %02d, %d" (string_of_month(month d)) (day d) (year d)
-
-let rec length_html html =
-  List.fold_left (fun l h -> l + length_html_el h) 0 html
-and length_html_el = function
-  | Element(_, _, content) -> length_html content
-  | Data d -> String.length d
-
-let new_id =
-  let id = ref 0 in
-  fun () -> incr id; sprintf "ocamlorg-post%i" !id
-
-(* [toggle html1 html2] return some piece of html with buttons to pass
-   from [html1] to [html2] and vice versa. *)
-let toggle ?(anchor="") html1 html2 =
-  let button id1 id2 text =
-    Element("a", ["onclick", sprintf "switchContent('%s','%s')" id1 id2;
-                  "class", "btn planet-toggle";
-                  "href", "#" ^ anchor],
-            [Data text])
-  in
-  let id1 = new_id() and id2 = new_id() in
-  [Element("div", ["id", id1],
-           html1 @ [button id1 id2 "Read more..."]);
-   Element("div", ["id", id2; "style", "display: none"],
-           html2 @ [button id2 id1 "Hide"])]
-
-
-let write_posts ?num_posts ?ofs ~out_file in_file =
-  let posts = get_posts ?n:num_posts ?ofs in_file in
-  let recentList =
-    List.map (fun p ->
-      let date = date_of_post p in
-      let title = p.title in
-      let url = match p.link with
-        | Some u -> Uri.to_string u
-        | None -> Digest.to_hex (Digest.string (p.title)) in
-      let author = p.author in
-      mk_recent date url author title) posts in
-  let postList =
-    List.map (fun p ->
-      let title = p.title in
-      let date = date_of_post p in
-      let url = match p.link with
-        | Some u -> Uri.to_string u
-        | None -> Digest.to_hex (Digest.string (p.title)) in
-      let author = p.author in
-      let blog_name = p.contributor.name in
-      let blog_title = p.contributor.title in
-      let blog_url = p.contributor.url in
-      (* Write contents *)
-      let buffer = Buffer.create 0 in
-      let channel = new Netchannels.output_buffer buffer in
-      let desc = if length_html p.desc < 1000 then p.desc
-                else toggle (prefix_of_html p.desc 1000) p.desc ~anchor:url in
-      let _ = Nethtml.write channel @@ encode_document desc in
-      let content = Buffer.contents buffer in
-      mk_post url title blog_url blog_title blog_name author date content)
-    posts in
-  let body = mk_body (String.concat "\n" recentList)
-                     (String.concat "\n<br/><br/><br/>\n" postList) in
-  (* write to file *)
-  let f = open_out out_file in
-  let () = output_string f body in
-  close_out f
-
-let main () =
-  let in_file = "examples/data_blog.txt" in
-  let out_file = "index.html" in
-  write_posts ?num_posts:(Some 50) ~out_file:out_file in_file
-
-let () = main ()

+ 0 - 37
examples/write_atom.ml

@@ -1,37 +0,0 @@
-(*
- * Copyright (c) 2014, OCaml.org project
- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-open River
-open Syndic.Atom
-
-let main () =
-  let in_file = "examples/data_blog.txt" in
-  let out_file = "atom.xml" in
-  let posts = get_posts in_file in
-  let entries = mk_entries posts in
-  let feed =
-    let authors = [ author "OCaml Labs" ] in
-    let id = Uri.of_string "http://ocaml.io/blogs/atom.xml" in
-    let links = [ link ~rel:Self id ] in
-    let title : text_construct = Text "OCaml Labs: Real World Functional Programming" in
-    let updated = Ptime_clock.now () in
-    feed ~authors ~links ~id ~title ~updated entries in
-  let out_channel = open_out out_file in
-  output feed (`Channel out_channel);
-  close_out out_channel
-
-let () = main ()

+ 0 - 12
lib/META

@@ -1,12 +0,0 @@
-# OASIS_START
-# DO NOT EDIT (digest: f1a4ef818c8cdcff4a535ce0c7cc752f)
-version = "0.1.3"
-description = "A RSS/Atom feed aggregator"
-requires = "lwt syndic netstring cohttp.lwt ptime.clock.os"
-archive(byte) = "river.cma"
-archive(byte, plugin) = "river.cma"
-archive(native) = "river.cmxa"
-archive(native, plugin) = "river.cmxs"
-exists_if = "river.cma"
-# OASIS_STOP
-

+ 4 - 0
lib/dune

@@ -0,0 +1,4 @@
+(library
+ (name river)
+ (public_name river)
+ (libraries cohttp cohttp-lwt cohttp-lwt-unix syndic netstring lambdasoup))

+ 40 - 0
lib/feed.ml

@@ -0,0 +1,40 @@
+(*
+ * Copyright (c) 2014, OCaml.org project
+ * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *)
+
+type source = { name : string; url : string }
+type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel
+
+let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2"
+
+type t = { name : string; title : string; url : string; content : content }
+
+let classify_feed ~xmlbase (xml : string) =
+  try Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, xml))))
+  with Syndic.Atom.Error.Error _ -> (
+    try Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, xml))))
+    with Syndic.Rss2.Error.Error _ -> failwith "Neither Atom nor RSS2 feed")
+
+let fetch (source : source) =
+  let xmlbase = Uri.of_string @@ source.url in
+  let response = Http.get source.url in
+  let content = classify_feed ~xmlbase response in
+  let title =
+    match content with
+    | Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title
+    | Rss2 ch -> ch.Syndic.Rss2.title
+  in
+  { name = source.name; title; content; url = source.url }

+ 73 - 0
lib/http.ml

@@ -0,0 +1,73 @@
+(*
+ * Copyright (c) 2014, OCaml.org project
+ * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *)
+
+(* Download urls and cache them — especially during development, it slows down
+   the rendering to download over and over again the same URL. *)
+
+open Printf
+open Lwt
+open Cohttp
+open Cohttp.Response
+open Cohttp.Code
+
+exception Status_unhandled of string
+exception Timeout
+
+let max_num_redirects = 5
+
+let get_location_exn headers =
+  match Header.get headers "location" with
+  | Some x -> x
+  | None -> raise @@ Status_unhandled "Location HTTP header not found"
+
+let rec get_uri uri = function
+  | 0 -> raise (Status_unhandled "Too many redirects")
+  | n ->
+      let main =
+        Cohttp_lwt_unix.Client.get uri >>= fun (resp, body) ->
+        match resp.status with
+        | `OK -> Cohttp_lwt.Body.to_string body
+        | `Found | `See_other | `Moved_permanently | `Temporary_redirect
+        | `Permanent_redirect -> (
+            let l = Uri.of_string @@ get_location_exn resp.headers in
+            match Uri.host l with
+            | Some _ -> get_uri l (n - 1)
+            | None ->
+                let host = Uri.host uri in
+                let scheme = Uri.scheme uri in
+                let new_uri = Uri.with_scheme (Uri.with_host l host) scheme in
+                get_uri new_uri (n - 1))
+        | _ -> raise @@ Status_unhandled (string_of_status resp.status)
+      in
+      let timeout =
+        Lwt_unix.sleep (float_of_int 3) >>= fun () -> Lwt.fail Timeout
+      in
+      Lwt.pick [ main; timeout ]
+
+let get url =
+  eprintf "Downloading %s ... %!" url;
+  try
+    let data = Lwt_main.run @@ get_uri (Uri.of_string url) max_num_redirects in
+    eprintf "done %!\n";
+    data
+  with
+  | (Status_unhandled s | Failure s) as e ->
+      eprintf "Failed: %s\n" s;
+      raise e
+  | Timeout as e ->
+      eprintf "Failed: Timeout\n";
+      raise e

+ 9 - 3
lib/ri_http.mli → lib/http.mli

@@ -13,10 +13,16 @@
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
+ *)
 
 exception Status_unhandled of string
-
 exception Timeout
 
-val get : ?cache_secs:float -> string -> string
+val get : string -> string
+(** [get uri] returns the body of the response of the HTTP GET request on [uri].
+
+    If the answer of is a redirection, it will follow the redirections up to 5
+    redirects.
+
+    The answer is cached for [cache_secs] seconds, where [cache_secs] is 3600
+    seconds (1 hour) by default. *)

+ 80 - 0
lib/meta.ml

@@ -0,0 +1,80 @@
+(*
+ * Copyright (c) 2014, OCaml.org project
+ * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *)
+
+(** This module determines an image to be used as preview of a website.
+
+    It does this by following the same logic Google+ and other websites use, and
+    described in this article:
+    https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)
+
+let og_image html =
+  let open Soup in
+  let soup = parse html in
+  try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some
+  with Failure _ -> None
+
+let image_src html =
+  let open Soup in
+  let soup = parse html in
+  try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some
+  with Failure _ -> None
+
+let twitter_image html =
+  let open Soup in
+  let soup = parse html in
+  try
+    soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"
+    |> Option.some
+  with Failure _ -> None
+
+let og_description html =
+  let open Soup in
+  let soup = parse html in
+  try
+    soup $ "meta[property=og:description]" |> R.attribute "content"
+    |> Option.some
+  with Failure _ -> None
+
+let description html =
+  let open Soup in
+  let soup = parse html in
+  try
+    soup $ "meta[property=description]" |> R.attribute "content" |> Option.some
+  with Failure _ -> None
+
+let preview_image html =
+  let preview_image =
+    match og_image html with
+    | None -> (
+        match image_src html with
+        | None -> twitter_image html
+        | Some x -> Some x)
+    | Some x -> Some x
+  in
+  match Option.map String.trim preview_image with
+  | Some "" -> None
+  | Some x -> Some x
+  | None -> None
+
+let description html =
+  let preview_image =
+    match og_description html with None -> description html | Some x -> Some x
+  in
+  match Option.map String.trim preview_image with
+  | Some "" -> None
+  | Some x -> Some x
+  | None -> None

+ 273 - 0
lib/post.ml

@@ -0,0 +1,273 @@
+(*
+ * Copyright (c) 2014, OCaml.org project
+ * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *)
+
+type t = {
+  title : string;
+  link : Uri.t option;
+  date : Syndic.Date.t option;
+  feed : Feed.t;
+  author : string;
+  email : string;
+  description : Nethtml.document list;
+}
+
+let rec len_prefix_of_html html len =
+  if len <= 0 then (0, [])
+  else
+    match html with
+    | [] -> (len, [])
+    | el :: tl ->
+        let len, prefix_el = len_prefix_of_el el len in
+        let len, prefix_tl = len_prefix_of_html tl len in
+        (len, prefix_el :: prefix_tl)
+
+and len_prefix_of_el el len =
+  match el with
+  | Nethtml.Data d ->
+      let len' = len - String.length d in
+      (len', if len' >= 0 then el else Data (String.sub d 0 len ^ "…"))
+  | Nethtml.Element (tag, args, content) ->
+      (* Remove "id" and "name" to avoid duplicate anchors with the whole
+         post. *)
+      let args = List.filter (fun (n, _) -> n <> "id" && n <> "name") args in
+      let len, prefix_content = len_prefix_of_html content len in
+      (len, Element (tag, args, prefix_content))
+
+let prefix_of_html html len = snd (len_prefix_of_html html len)
+
+let rec filter_map l f =
+  match l with
+  | [] -> []
+  | a :: tl -> (
+      match f a with None -> filter_map tl f | Some a -> a :: filter_map tl f)
+
+let encode_html =
+  Netencoding.Html.encode ~prefer_name:false ~in_enc:`Enc_utf8 ()
+
+let decode_document html = Nethtml.decode ~enc:`Enc_utf8 html
+let encode_document html = Nethtml.encode ~enc:`Enc_utf8 html
+
+let rec resolve ?xmlbase html = List.map (resolve_links_el ~xmlbase) html
+
+and resolve_links_el ~xmlbase = function
+  | Nethtml.Element ("a", attrs, sub) ->
+      let attrs =
+        match List.partition (fun (t, _) -> t = "href") attrs with
+        | [], _ -> attrs
+        | (_, h) :: _, attrs ->
+            let src =
+              Uri.to_string (Syndic.XML.resolve ~xmlbase (Uri.of_string h))
+            in
+            ("href", src) :: attrs
+      in
+      Nethtml.Element ("a", attrs, resolve ?xmlbase sub)
+  | Nethtml.Element ("img", attrs, sub) ->
+      let attrs =
+        match List.partition (fun (t, _) -> t = "src") attrs with
+        | [], _ -> attrs
+        | (_, src) :: _, attrs ->
+            let src =
+              Uri.to_string (Syndic.XML.resolve ~xmlbase (Uri.of_string src))
+            in
+            ("src", src) :: attrs
+      in
+      Nethtml.Element ("img", attrs, sub)
+  | Nethtml.Element (e, attrs, sub) ->
+      Nethtml.Element (e, attrs, resolve ?xmlbase sub)
+  | Data _ as d -> d
+
+(* Things that posts should not contain *)
+let undesired_tags = [ "style"; "script" ]
+let undesired_attr = [ "id" ]
+
+let remove_undesired_attr =
+  List.filter (fun (a, _) -> not (List.mem a undesired_attr))
+
+let rec remove_undesired_tags html = filter_map html remove_undesired_tags_el
+
+and remove_undesired_tags_el = function
+  | Nethtml.Element (t, a, sub) ->
+      if List.mem t undesired_tags then None
+      else
+        Some
+          (Nethtml.Element
+             (t, remove_undesired_attr a, remove_undesired_tags sub))
+  | Data _ as d -> Some d
+
+let relaxed_html40_dtd =
+  (* Allow <font> inside <pre> because blogspot uses it! :-( *)
+  let constr =
+    `Sub_exclusions
+      ( [ "img"; "object"; "applet"; "big"; "small"; "sub"; "sup"; "basefont" ],
+        `Inline )
+  in
+  let dtd = Nethtml.relaxed_html40_dtd in
+  ("pre", (`Block, constr)) :: List.remove_assoc "pre" dtd
+
+let html_of_text ?xmlbase s =
+  try
+    Nethtml.parse (new Netchannels.input_string s) ~dtd:relaxed_html40_dtd
+    |> decode_document |> resolve ?xmlbase |> remove_undesired_tags
+  with _ -> [ Nethtml.Data (encode_html s) ]
+
+(* Do not trust sites using XML for HTML content. Convert to string and parse
+   back. (Does not always fix bad HTML unfortunately.) *)
+let html_of_syndic =
+  let ns_prefix _ = Some "" in
+  fun ?xmlbase h ->
+    html_of_text ?xmlbase
+      (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))
+
+let string_of_option = function None -> "" | Some s -> s
+
+(* Email on the forge contain the name in parenthesis *)
+let forge_name_re = Str.regexp ".*(\\([^()]*\\))"
+
+let post_compare p1 p2 =
+  (* Most recent posts first. Posts with no date are always last *)
+  match (p1.date, p2.date) with
+  | Some d1, Some d2 -> Syndic.Date.compare d2 d1
+  | None, Some _ -> 1
+  | Some _, None -> -1
+  | None, None -> 1
+
+let rec remove n l =
+  if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl
+
+let rec take n = function
+  | [] -> []
+  | e :: tl -> if n > 0 then e :: take (n - 1) tl else []
+
+(* Blog feed
+ ***********************************************************************)
+
+let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =
+  let link =
+    try
+      Some
+        (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)
+          .href
+    with Not_found -> (
+      match e.links with l :: _ -> Some l.href | [] -> None)
+  in
+  let date =
+    match e.published with Some _ -> e.published | None -> Some e.updated
+  in
+  let description =
+    match e.content with
+    | Some (Text s) -> html_of_text s
+    | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
+    | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
+    | Some (Mime _) | Some (Src _) | None -> (
+        match e.summary with
+        | Some (Text s) -> html_of_text s
+        | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
+        | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
+        | None -> [])
+  in
+  let author, _ = e.authors in
+  {
+    title = Util.string_of_text_construct e.title;
+    link;
+    date;
+    feed;
+    author = author.name;
+    email = "";
+    description;
+  }
+
+let post_of_rss2 ~(feed : Feed.t) it =
+  let title, description =
+    match it.Syndic.Rss2.story with
+    | All (t, xmlbase, d) -> (
+        ( t,
+          match it.content with
+          | _, "" -> html_of_text ?xmlbase d
+          | xmlbase, c -> html_of_text ?xmlbase c ))
+    | Title t -> (t, [])
+    | Description (xmlbase, d) -> (
+        ( "",
+          match it.content with
+          | _, "" -> html_of_text ?xmlbase d
+          | xmlbase, c -> html_of_text ?xmlbase c ))
+  in
+  let link =
+    match (it.guid, it.link) with
+    | Some u, _ when u.permalink -> Some u.data
+    | _, Some _ -> it.link
+    | Some u, _ ->
+        (* Sometimes the guid is indicated with isPermaLink="false" but is
+           nonetheless the only URL we get (e.g. ocamlpro). *)
+        Some u.data
+    | None, None -> None
+  in
+  {
+    title;
+    link;
+    feed;
+    author = feed.name;
+    email = string_of_option it.author;
+    description;
+    date = it.pubDate;
+  }
+
+let posts_of_feed c =
+  match c.Feed.content with
+  | Feed.Atom f -> List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries
+  | Feed.Rss2 ch -> List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items
+
+let string_of_html html =
+  let buffer = Buffer.create 1024 in
+  let channel = new Netchannels.output_buffer buffer in
+  let () = Nethtml.write channel @@ encode_document html in
+  Buffer.contents buffer
+
+let mk_entry post =
+  let content = Syndic.Atom.Html (None, string_of_html post.description) in
+  let contributors =
+    [ Syndic.Atom.author ~uri:(Uri.of_string post.feed.url) post.feed.name ]
+  in
+  let links =
+    match post.link with
+    | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]
+    | None -> []
+  in
+  (* TODO: include source *)
+  let id =
+    match post.link with
+    | Some l -> l
+    | None -> Uri.of_string (Digest.to_hex (Digest.string post.title))
+  in
+  let authors = (Syndic.Atom.author ~email:post.email post.author, []) in
+  let title : Syndic.Atom.text_construct = Syndic.Atom.Text post.title in
+  let updated =
+    match post.date with
+    (* Atom entry requires a date but RSS2 does not. So if a date
+     * is not available, just capture the current date. *)
+    | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
+    | Some d -> d
+  in
+  Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
+    ()
+
+let mk_entries posts = List.map mk_entry posts
+
+let get_posts ?n ?(ofs = 0) planet_feeds =
+  let posts = List.concat @@ List.map posts_of_feed planet_feeds in
+  let posts = List.sort post_compare posts in
+  let posts = remove ofs posts in
+  match n with None -> posts | Some n -> take n posts

+ 0 - 81
lib/ri_feeds.ml

@@ -1,81 +0,0 @@
-(*
- * Copyright (c) 2014, OCaml.org project
- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-open Syndic
-open Ri_http
-open Printf
-
-(* Feeds
-***********************************************************************)
-
-type source = {
-  name : string;
-  url  : string
-}
-
-type feed =
-  | Atom of Atom.feed
-  | Rss2 of Rss2.channel
-  | Broken of string (* the argument gives the reason *)
-
-let string_of_feed = function
-  | Atom _ -> "Atom"
-  | Rss2 _ -> "Rss2"
-  | Broken s -> "Broken: " ^ s
-
-type contributor = {
-  name  : string;
-  title : string;
-  url   : string;
-  feed  : feed;
-}
-
-let gather_sources file_name =
-  let add_feed acc line =
-    try
-      let i = String.index line '|' in
-      let name = String.sub line 0 i in
-      let url = String.sub line (i+1)
-          (String.length line - i - 1) in
-      {name;url} :: acc
-    with Not_found -> acc in
-  List.fold_left add_feed [] (Ri_utils.lines_of_file file_name)
-
-let classify_feed ~xmlbase (xml: string) =
-  try Atom(Atom.parse ~xmlbase (Xmlm.make_input (`String(0, xml))))
-  with Atom.Error.Error _ ->
-          try Rss2(Rss2.parse ~xmlbase (Xmlm.make_input (`String(0, xml))))
-          with Rss2.Error.Error _ ->
-                Broken "Neither Atom nor RSS2 feed"
-
-let contributor_of_source (source : source) =
-  try
-    let xmlbase = Uri.of_string @@ source.url in
-    let feed = classify_feed ~xmlbase (Ri_http.get source.url) in
-    let title = match feed with
-    | Atom atom -> Ri_utils.string_of_text_construct atom.Atom.title
-    | Rss2 ch -> ch.Rss2.title
-    | Broken _ -> "" in
-    { name = source.name; title; feed; url = source.url}
-  with
-  | Status_unhandled s | Failure s ->
-      { name = source.name; title=""; feed = Broken s;
-        url = source.url }
-  | Timeout ->
-      { name = source.name; title=""; feed = Broken "Timeout";
-        url = source.url }
-

+ 0 - 37
lib/ri_feeds.mli

@@ -1,37 +0,0 @@
-(*
- * Copyright (c) 2014, OCaml.org project
- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-type source = {
-  name : string;
-  url  : string
-}
-
-type feed =
-  | Atom of Syndic.Atom.feed
-  | Rss2 of Syndic.Rss2.channel
-  | Broken of string (* the argument gives the reason *)
-
-type contributor = {
-  name  : string;
-  title : string;
-  url   : string;
-  feed  : feed;
-}
-
-val gather_sources : string -> source list
-
-val contributor_of_source : source -> contributor

+ 0 - 101
lib/ri_http.ml

@@ -1,101 +0,0 @@
-(*
- * Copyright (c) 2014, OCaml.org project
- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-(* Download urls and cache them — especially during development, it
-   slows down the rendering to download over and over again the same
-   URL. *)
-
-open Printf
-open Lwt
-open Cohttp
-open Cohttp_lwt_unix
-open Cohttp.Response
-open Cohttp.Header
-open Cohttp.Code
-
-let age fn =
-  let now = Unix.time () in (* in sec *)
-  let modif = (Unix.stat fn).Unix.st_mtime in
-  now -. modif
-
-let time_of_secs s =
-  let s = truncate s in
-  let m = s / 60 and s = s mod 60 in
-  let h = m / 60 and m = m mod 60 in
-  sprintf "%i:%02im%is" h m s
-
-exception Status_unhandled of string
-
-exception Timeout
-
-let max_num_redirects = 5
-
-let get_location headers =
-  let (k,v) = List.find (fun (k,v) -> k = "location") @@ Header.to_list headers
-  in v
-
-let rec get_uri uri = function
-  | 0 -> raise (Status_unhandled "Too many redirects")
-  | n ->
-      let main =
-        Cohttp_lwt_unix.Client.get uri >>= fun (resp, body) ->
-        match resp.status with
-        | `OK -> Cohttp_lwt_body.to_string body
-        | `Found | `See_other | `Moved_permanently ->
-            (let l = Uri.of_string @@ get_location resp.headers in
-            match Uri.host l with
-            | Some _ -> get_uri l (n-1)
-            | None ->
-                let host = Uri.host uri in
-                let scheme = Uri.scheme uri in
-                let new_uri = Uri.with_scheme (Uri.with_host l host) scheme in
-                get_uri new_uri (n-1))
-        | _ -> raise @@ Status_unhandled (string_of_status resp.status)
-      in
-      let timeout = Lwt_unix.sleep (float_of_int 3) >>= fun () ->
-                    Lwt.fail Timeout
-      in
-      Lwt.pick [main; timeout]
-
-
-let cache_secs = 3600. (* 1h *)
-
-let get ?(cache_secs=cache_secs) url =
-  let md5 = Digest.to_hex(Digest.string url) in
-  let fn = Filename.concat (Filename.get_temp_dir_name ()) ("ocamlorg-" ^ md5) in
-  eprintf "Downloading %s ... %!" url;
-  let get_from_cache () =
-    let fh = open_in fn in
-    let data = input_value fh in
-    close_in fh;
-    eprintf "done.\n  (using cache %s, updated %s ago).\n%!"
-            fn (time_of_secs(age fn));
-    data in
-  if Sys.file_exists fn && age fn <= cache_secs then get_from_cache()
-  else (
-    try
-      let data = Lwt_main.run @@ get_uri (Uri.of_string url) max_num_redirects in
-      eprintf "done %!";
-      let fh = open_out fn in
-      output_value fh data;
-      close_out fh;
-      eprintf "(cached).\n%!";
-      data
-    with
-    | (Status_unhandled s | Failure s) as e -> (eprintf "Failed: %s\n" s; raise e)
-    | Timeout as e -> (eprintf "Failed: Timeout\n"; raise e)
-  )

+ 0 - 263
lib/ri_posts.ml

@@ -1,263 +0,0 @@
-(*
- * Copyright (c) 2014, OCaml.org project
- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-open Ri_feeds
-open Nethtml
-open Printf
-open Syndic
-open Syndic.Atom
-
-type html = Nethtml.document list
-
-(** Our representation of a "post". *)
-type post = {
-  title : string;
-  link  : Uri.t option;
-  date  : Syndic.Date.t option;
-  contributor : contributor;
-  author : string;
-  email : string;
-  desc  : html;
-}
-
-(* Utils
- ***********************************************************************)
-
-let rec length_html html =
-  List.fold_left (fun l h -> l + length_html_el h) 0 html
-and length_html_el = function
-  | Element(_, _, content) -> length_html content
-  | Data d -> String.length d
-
-let rec len_prefix_of_html html len =
-  if len <= 0 then 0, []
-  else match html with
-       | [] -> len, []
-       | el :: tl -> let len, prefix_el = len_prefix_of_el el len in
-                    let len, prefix_tl = len_prefix_of_html tl len in
-                    len, prefix_el :: prefix_tl
-and len_prefix_of_el el len =
-  match el with
-  | Data d ->
-     let len' = len - String.length d in
-     len', (if len' >= 0 then el else Data(String.sub d 0 len ^ "…"))
-  | Element(tag, args, content) ->
-     (* Remove "id" and "name" to avoid duplicate anchors with the
-        whole post. *)
-     let args = List.filter (fun (n,_) -> n <> "id" && n <> "name") args in
-     let len, prefix_content = len_prefix_of_html content len in
-     len, Element(tag, args, prefix_content)
-
-let rec prefix_of_html html len =
-  snd(len_prefix_of_html html len)
-
-let rec filter_map l f =
-  match l with
-  | [] -> []
-  | a :: tl -> match f a with
-              | None -> filter_map tl f
-              | Some a -> a :: filter_map tl f
-
-let encode_html =
-  Netencoding.Html.encode ~prefer_name:false ~in_enc:`Enc_utf8 ()
-
-let decode_document html = Nethtml.decode ~enc:`Enc_utf8 html
-
-let encode_document html = Nethtml.encode ~enc:`Enc_utf8 html
-
-let rec resolve ?xmlbase html =
-  List.map (resolve_links_el ~xmlbase) html
-and resolve_links_el ~xmlbase = function
-  | Nethtml.Element("a", attrs, sub) ->
-     let attrs = match List.partition (fun (t,_) -> t = "href") attrs with
-       | [], _ -> attrs
-       | (_, h) :: _, attrs ->
-          let src = Uri.to_string(XML.resolve xmlbase (Uri.of_string h)) in
-          ("href", src) :: attrs in
-     Nethtml.Element("a", attrs, resolve ?xmlbase sub)
-  | Nethtml.Element("img", attrs, sub) ->
-     let attrs = match List.partition (fun (t,_) -> t = "src") attrs with
-       | [], _ -> attrs
-       | (_, src) :: _, attrs ->
-          let src = Uri.to_string(XML.resolve xmlbase (Uri.of_string src)) in
-          ("src", src) :: attrs in
-     Nethtml.Element("img", attrs, sub)
-  | Nethtml.Element(e, attrs, sub) ->
-     Nethtml.Element(e, attrs, resolve ?xmlbase sub)
-  | Data _ as d -> d
-
-
-(* Things that posts should not contain *)
-let undesired_tags = ["style"; "script"]
-let undesired_attr = ["id"]
-
-let remove_undesired_attr =
-  List.filter (fun (a,_) -> not(List.mem a undesired_attr))
-
-let rec remove_undesired_tags html =
-  filter_map html remove_undesired_tags_el
-and remove_undesired_tags_el = function
-  | Nethtml.Element(t, a, sub) ->
-     if List.mem t undesired_tags then None
-     else Some(Nethtml.Element(t, remove_undesired_attr a,
-                               remove_undesired_tags sub))
-  | Data _ as d -> Some d
-
-let relaxed_html40_dtd =
-  (* Allow <font> inside <pre> because blogspot uses it! :-( *)
-  let constr = `Sub_exclusions([ "img"; "object"; "applet"; "big"; "small";
-                                 "sub"; "sup"; "basefont"],
-                               `Inline) in
-  let dtd = Nethtml.relaxed_html40_dtd in
-  ("pre", (`Block, constr)) :: List.remove_assoc "pre" dtd
-
-
-let html_of_text ?xmlbase s =
-  try Nethtml.parse (new Netchannels.input_string s)
-                    ~dtd:relaxed_html40_dtd
-      |> decode_document
-      |> resolve ?xmlbase
-      |> remove_undesired_tags
-  with _ ->
-    [Nethtml.Data(encode_html s)]
-
-(* Do not trust sites using XML for HTML content.  Convert to string
-   and parse back.  (Does not always fix bad HTML unfortunately.) *)
-let rec html_of_syndic =
-  let ns_prefix _ = Some "" in
-  fun ?xmlbase h ->
-  html_of_text ?xmlbase
-               (String.concat "" (List.map (XML.to_string ~ns_prefix) h))
-
-
-
-let string_of_option = function None -> "" | Some s -> s
-
-(* Email on the forge contain the name in parenthesis *)
-let forge_name_re =
-  Str.regexp ".*(\\([^()]*\\))"
-
-let post_compare p1 p2 =
-  (* Most recent posts first.  Posts with no date are always last *)
-  match p1.date, p2.date with
-  | Some d1, Some d2 -> Syndic.Date.compare d2 d1
-  | None, Some _ -> 1
-  | Some _, None -> -1
-  | None, None -> 1
-
-let rec remove n l =
-  if n <= 0 then l
-  else match l with [] -> []
-                  | _ :: tl -> remove (n - 1) tl
-
-let rec take n = function
-  | [] -> []
-  | e :: tl -> if n > 0 then e :: take (n-1) tl else []
-
-(* Blog feed
- ***********************************************************************)
-
-let post_of_atom ~contributor (e: Atom.entry) =
-  let open Atom in
-  let link = try Some (List.find (fun l -> l.rel = Alternate) e.links).href
-             with Not_found -> match e.links with
-                              | l :: _ -> Some l.href
-                              | [] -> None in
-  let date = match e.published with
-    | Some _ -> e.published
-    | None -> Some e.updated in
-  let desc = match e.content with
-    | Some(Text s) -> html_of_text s
-    | Some(Html(xmlbase, s)) -> html_of_text ?xmlbase s
-    | Some(Xhtml(xmlbase, h)) -> html_of_syndic ?xmlbase h
-    | Some(Mime _) | Some(Src _)
-    | None ->
-       match e.summary with
-       | Some(Text s) -> html_of_text s
-       | Some(Html(xmlbase, s)) -> html_of_text ?xmlbase s
-       | Some(Xhtml(xmlbase, h)) -> html_of_syndic ?xmlbase h
-       | None -> [] in
-  let author, _ = e.authors in
-  { title = Ri_utils.string_of_text_construct e.title;
-    link;  date; contributor; author = author.name;
-    email = ""; desc }
-
-let post_of_rss2 ~(contributor: contributor) it =
-  let open Syndic.Rss2 in
-  let title, desc = match it.story with
-    | All (t, xmlbase, d) ->
-       t, (match it.content with
-           | (_, "") -> html_of_text ?xmlbase d
-           | (xmlbase, c) -> html_of_text ?xmlbase c)
-    | Title t -> t, []
-    | Description(xmlbase, d) ->
-       "", (match it.content with
-            | (_, "") -> html_of_text ?xmlbase d
-            | (xmlbase, c) -> html_of_text ?xmlbase c) in
-  let link = match it.guid, it.link with
-    | Some u, _ when u.permalink -> Some u.data
-    | _, Some _ -> it.link
-    | Some u, _ ->
-       (* Sometimes the guid is indicated with isPermaLink="false" but
-          is nonetheless the only URL we get (e.g. ocamlpro). *)
-       Some u.data
-    | None, None -> None in
-  { title; link; contributor; author = contributor.name;
-    email = string_of_option it.author; desc; date = it.pubDate }
-
-let posts_of_contributor c =
-  match c.feed with
-  | Atom f -> List.map (post_of_atom ~contributor:c) f.Atom.entries
-  | Rss2 ch -> List.map (post_of_rss2 ~contributor:c) ch.Rss2.items
-  | Broken _ -> []
-
-
-let string_of_html html =
-  let buffer = Buffer.create 1024 in
-  let channel = new Netchannels.output_buffer buffer in
-  let () = Nethtml.write channel @@ encode_document html in
-  Buffer.contents buffer
-
-let mk_entry post : entry =
-  let content = Html (None, string_of_html post.desc) in
-  let contributors = [ author ~uri:(Uri.of_string post.contributor.url)
-                              post.contributor.name ] in
-  let links = match post.link with
-              | Some l -> [ link ~rel:Syndic.Atom.Alternate l ]
-              | None -> [] in
-  (* TODO: include source *)
-  let id = match post.link with
-           | Some l -> l
-           | None -> Uri.of_string (Digest.to_hex (Digest.string (post.title))) in
-  let authors = (author ~email:post.email post.author, []) in
-  let title : text_construct = Text post.title in
-  let updated = match post.date with
-                  (* Atom entry requires a date but RSS2 does not. So if a date
-                   * is not available, just capture the current date. *)
-                  | None -> Ptime_clock.now ()
-                  | Some d -> d in
-  entry ~content ~contributors ~links ~id ~authors ~title ~updated ()
-
-let mk_entries posts = List.map mk_entry posts
-
-let get_posts ?n ?(ofs=0) planet_feeds =
-  let posts = List.concat @@ List.map posts_of_contributor planet_feeds in
-  let posts = List.sort post_compare posts in
-  let posts = remove ofs posts in
-  match n with
-  | None -> posts
-  | Some n -> take n posts

+ 0 - 35
lib/ri_posts.mli

@@ -1,35 +0,0 @@
-(*
- * Copyright (c) 2014, OCaml.org project
- * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-type html = Nethtml.document list
-
-(** Our representation of a "post". *)
-type post = {
-  title : string;
-  link  : Uri.t option;
-  date  : Syndic.Date.t option;
-  contributor : Ri_feeds.contributor;
-  author : string;
-  email : string;
-  desc  : html;
-}
-
-val get_posts: ?n:int -> ?ofs:int -> Ri_feeds.contributor list -> post list
-
-val prefix_of_html: html -> int -> html
-
-val mk_entries : post list -> Syndic.Atom.entry list

+ 18 - 7
lib/river.ml

@@ -13,12 +13,23 @@
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
+ *)
 
-include Ri_posts
-include Ri_feeds
+type source = Feed.source = { name : string; url : string }
+type feed = Feed.t
+type post = Post.t
 
-let get_posts ?n ?ofs file_name =
-  let sources = gather_sources file_name in
-  let contributors = List.map contributor_of_source sources in
-  get_posts ?n ?ofs contributors
+let fetch = Feed.fetch
+let name feed = feed.Feed.name
+let url feed = feed.Feed.url
+let posts feeds = Post.get_posts feeds
+let title post = post.Post.title
+let link post = post.Post.link
+let date post = post.Post.date
+let feed post = post.Post.feed
+let author post = post.Post.author
+let email post = post.Post.email
+let content post = Post.string_of_html post.Post.description
+let meta_description post = Meta.description (content post)
+let seo_image post = Meta.preview_image (content post)
+let create_atom_entries = Post.mk_entries

+ 0 - 8
lib/river.mldylib

@@ -1,8 +0,0 @@
-# OASIS_START
-# DO NOT EDIT (digest: 558c72f6c1f2c6efcd1456ab263a58b1)
-River
-Ri_http
-Ri_feeds
-Ri_posts
-Ri_utils
-# OASIS_STOP

+ 58 - 53
lib/river.mli

@@ -13,56 +13,61 @@
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
-
-
-type html = Nethtml.document list
-
-type feed =
-    Atom of Syndic.Atom.feed
-  | Rss2 of Syndic.Rss2.channel
-  | Broken of string
-(** The feed is either an Atom or Rss2. If the feed is Broken [message], then
-    the [message] gives the reason. *)
-
-type contributor = {
-  name  : string;
-  title : string;
-  url   : string;
-  feed  : feed;
-}
-(** Feed information. *)
-
-type post = {
-  title : string;
-  link  : Uri.t option;
-  date  : Syndic.Date.t option;
-  contributor : contributor;
-  author : string;
-  email : string;
-  desc  : html;
-}
-(** Each post has a title, author, email and content (desc). The link, if
-    available, points to the location of the original post. *)
-
-val get_posts: ?n:int -> ?ofs:int -> string -> post list
-(** [get_posts n ofs fname] fetches a deduplicated list of posts, sorted based
-    on the date, with the lastest post appearing first. The optional argument [n]
-    fetches the first [n] posts. By default, all the posts are fetched. [ofs]
-    represents the offset into the post list. For example, [get_posts 10 10]
-    fetches the posts 10 to 20.
-
-    [fname] is the input file with the list of feeds. The format is:
-
-      <feed_name>|<feed_url>
-      <feed_name>|<feed_url>
-      ...
-  *)
-
-val prefix_of_html: html -> int -> html
-(** [prefix_of_html html n] truncates the given document to [n] characters.
-    The truncated document is ensured to be a well-formed docuemnt. *)
-
-val mk_entries: post list -> Syndic.Atom.entry list
-(** [mk_entries posts] creates a list of atom entries, which can then be used to
-    create an atom feed that is an aggregate of the posts. *)
+ *)
+
+type source = { name : string; url : string }
+(** The source of a feed. *)
+
+type feed
+type post
+
+val fetch : source -> feed
+(** [fetch source] returns an Atom or RSS feed from a source. *)
+
+val name : feed -> string
+(** [name feed] is the name of the feed source passed to [fetch]. *)
+
+val url : feed -> string
+(** [url feed] is the url of the feed source passed to [fetch]. *)
+
+val posts : feed list -> post list
+(** [posts feeds] is the list of deduplicated posts of the given feeds. *)
+
+val feed : post -> feed
+(** [feed post] is the feed the post originates from. *)
+
+val title : post -> string
+(** [title post] is the title of the post. *)
+
+val link : post -> Uri.t option
+(** [link post] is the link of the post. *)
+
+val date : post -> Syndic.Date.t option
+(** [date post] is the date of the post. *)
+
+val author : post -> string
+(** [author post] is the author of the post. *)
+
+val email : post -> string
+(** [email post] is the email of the post. *)
+
+val content : post -> string
+(** [content post] is the content of the post. *)
+
+val meta_description : post -> string option
+(** [meta_description post] is the meta description of the post on the origin
+    site.
+
+    To get the meta description, we make get the content of [link post] and look
+    for an HTML meta tag with the name "description" or "og:description".*)
+
+val seo_image : post -> string option
+(** [seo_image post] is the image to be used by social networks and links to the
+    post.
+
+    To get the seo image, we make get the content of [link post] and look for an
+    HTML meta tag with the name "og:image" or "twitter:image". *)
+
+val create_atom_entries : post list -> Syndic.Atom.entry list
+(** [create_atom_feed posts] creates a list of atom entries, which can then be
+    used to create an atom feed that is an aggregate of the posts. *)

+ 0 - 8
lib/river.mllib

@@ -1,8 +0,0 @@
-# OASIS_START
-# DO NOT EDIT (digest: 558c72f6c1f2c6efcd1456ab263a58b1)
-River
-Ri_http
-Ri_feeds
-Ri_posts
-Ri_utils
-# OASIS_STOP

+ 3 - 16
lib/ri_utils.ml → lib/util.ml

@@ -13,7 +13,7 @@
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*)
+ *)
 
 open Syndic
 
@@ -29,18 +29,5 @@ let syndic_to_string x =
 
 let string_of_text_construct : Atom.text_construct -> string = function
   (* FIXME: we probably would like to parse the HTML and remove the tags *)
-  | Atom.Text s | Atom.Html(_,s) -> s
-  | Atom.Xhtml(_, x) -> syndic_to_string x
-
-let lines_of_file fname =
-  let lines = ref [] in
-  let fh = open_in fname in
-  try
-    while true do
-      lines := input_line fh :: !lines
-    done;
-    assert false
-  with End_of_file ->
-    close_in fh;
-    List.rev !lines
-
+  | Atom.Text s | Atom.Html (_, s) -> s
+  | Atom.Xhtml (_, x) -> syndic_to_string x

+ 0 - 895
myocamlbuild.ml

@@ -1,895 +0,0 @@
-(* OASIS_START *)
-(* DO NOT EDIT (digest: 85895cf456e4f70c166a63a818065a28) *)
-module OASISGettext = struct
-(* # 22 "src/oasis/OASISGettext.ml" *)
-
-
-  let ns_ str = str
-  let s_ str = str
-  let f_ (str: ('a, 'b, 'c, 'd) format4) = str
-
-
-  let fn_ fmt1 fmt2 n =
-    if n = 1 then
-      fmt1^^""
-    else
-      fmt2^^""
-
-
-  let init = []
-end
-
-module OASISString = struct
-(* # 22 "src/oasis/OASISString.ml" *)
-
-
-  (** Various string utilities.
-
-      Mostly inspired by extlib and batteries ExtString and BatString libraries.
-
-      @author Sylvain Le Gall
-  *)
-
-
-  let nsplitf str f =
-    if str = "" then
-      []
-    else
-      let buf = Buffer.create 13 in
-      let lst = ref [] in
-      let push () =
-        lst := Buffer.contents buf :: !lst;
-        Buffer.clear buf
-      in
-      let str_len = String.length str in
-      for i = 0 to str_len - 1 do
-        if f str.[i] then
-          push ()
-        else
-          Buffer.add_char buf str.[i]
-      done;
-      push ();
-      List.rev !lst
-
-
-  (** [nsplit c s] Split the string [s] at char [c]. It doesn't include the
-      separator.
-  *)
-  let nsplit str c =
-    nsplitf str ((=) c)
-
-
-  let find ~what ?(offset=0) str =
-    let what_idx = ref 0 in
-    let str_idx = ref offset in
-    while !str_idx < String.length str &&
-          !what_idx < String.length what do
-      if str.[!str_idx] = what.[!what_idx] then
-        incr what_idx
-      else
-        what_idx := 0;
-      incr str_idx
-    done;
-    if !what_idx <> String.length what then
-      raise Not_found
-    else
-      !str_idx - !what_idx
-
-
-  let sub_start str len =
-    let str_len = String.length str in
-    if len >= str_len then
-      ""
-    else
-      String.sub str len (str_len - len)
-
-
-  let sub_end ?(offset=0) str len =
-    let str_len = String.length str in
-    if len >= str_len then
-      ""
-    else
-      String.sub str 0 (str_len - len)
-
-
-  let starts_with ~what ?(offset=0) str =
-    let what_idx = ref 0 in
-    let str_idx = ref offset in
-    let ok = ref true in
-    while !ok &&
-          !str_idx < String.length str &&
-          !what_idx < String.length what do
-      if str.[!str_idx] = what.[!what_idx] then
-        incr what_idx
-      else
-        ok := false;
-      incr str_idx
-    done;
-    !what_idx = String.length what
-
-
-  let strip_starts_with ~what str =
-    if starts_with ~what str then
-      sub_start str (String.length what)
-    else
-      raise Not_found
-
-
-  let ends_with ~what ?(offset=0) str =
-    let what_idx = ref ((String.length what) - 1) in
-    let str_idx = ref ((String.length str) - 1) in
-    let ok = ref true in
-    while !ok &&
-          offset <= !str_idx &&
-          0 <= !what_idx do
-      if str.[!str_idx] = what.[!what_idx] then
-        decr what_idx
-      else
-        ok := false;
-      decr str_idx
-    done;
-    !what_idx = -1
-
-
-  let strip_ends_with ~what str =
-    if ends_with ~what str then
-      sub_end str (String.length what)
-    else
-      raise Not_found
-
-
-  let replace_chars f s =
-    let buf = Buffer.create (String.length s) in
-    String.iter (fun c -> Buffer.add_char buf (f c)) s;
-    Buffer.contents buf
-
-  let lowercase_ascii =
-    replace_chars
-      (fun c ->
-         if (c >= 'A' && c <= 'Z') then
-           Char.chr (Char.code c + 32)
-         else
-           c)
-
-  let uncapitalize_ascii s =
-    if s <> "" then
-      (lowercase_ascii (String.sub s 0 1)) ^ (String.sub s 1 ((String.length s) - 1))
-    else
-      s
-
-  let uppercase_ascii =
-    replace_chars
-      (fun c ->
-         if (c >= 'a' && c <= 'z') then
-           Char.chr (Char.code c - 32)
-         else
-           c)
-
-  let capitalize_ascii s =
-    if s <> "" then
-      (uppercase_ascii (String.sub s 0 1)) ^ (String.sub s 1 ((String.length s) - 1))
-    else
-      s
-
-end
-
-module OASISUtils = struct
-(* # 22 "src/oasis/OASISUtils.ml" *)
-
-
-  open OASISGettext
-
-
-  module MapExt =
-  struct
-    module type S =
-    sig
-      include Map.S
-      val add_list: 'a t -> (key * 'a) list -> 'a t
-      val of_list: (key * 'a) list -> 'a t
-      val to_list: 'a t -> (key * 'a) list
-    end
-
-    module Make (Ord: Map.OrderedType) =
-    struct
-      include Map.Make(Ord)
-
-      let rec add_list t =
-        function
-          | (k, v) :: tl -> add_list (add k v t) tl
-          | [] -> t
-
-      let of_list lst = add_list empty lst
-
-      let to_list t = fold (fun k v acc -> (k, v) :: acc) t []
-    end
-  end
-
-
-  module MapString = MapExt.Make(String)
-
-
-  module SetExt  =
-  struct
-    module type S =
-    sig
-      include Set.S
-      val add_list: t -> elt list -> t
-      val of_list: elt list -> t
-      val to_list: t -> elt list
-    end
-
-    module Make (Ord: Set.OrderedType) =
-    struct
-      include Set.Make(Ord)
-
-      let rec add_list t =
-        function
-          | e :: tl -> add_list (add e t) tl
-          | [] -> t
-
-      let of_list lst = add_list empty lst
-
-      let to_list = elements
-    end
-  end
-
-
-  module SetString = SetExt.Make(String)
-
-
-  let compare_csl s1 s2 =
-    String.compare (OASISString.lowercase_ascii s1) (OASISString.lowercase_ascii s2)
-
-
-  module HashStringCsl =
-    Hashtbl.Make
-      (struct
-         type t = string
-         let equal s1 s2 = (compare_csl s1 s2) = 0
-         let hash s = Hashtbl.hash (OASISString.lowercase_ascii s)
-       end)
-
-  module SetStringCsl =
-    SetExt.Make
-      (struct
-         type t = string
-         let compare = compare_csl
-       end)
-
-
-  let varname_of_string ?(hyphen='_') s =
-    if String.length s = 0 then
-      begin
-        invalid_arg "varname_of_string"
-      end
-    else
-      begin
-        let buf =
-          OASISString.replace_chars
-            (fun c ->
-               if ('a' <= c && c <= 'z')
-                 ||
-                  ('A' <= c && c <= 'Z')
-                 ||
-                  ('0' <= c && c <= '9') then
-                 c
-               else
-                 hyphen)
-            s;
-        in
-        let buf =
-          (* Start with a _ if digit *)
-          if '0' <= s.[0] && s.[0] <= '9' then
-            "_"^buf
-          else
-            buf
-        in
-          OASISString.lowercase_ascii buf
-      end
-
-
-  let varname_concat ?(hyphen='_') p s =
-    let what = String.make 1 hyphen in
-    let p =
-      try
-        OASISString.strip_ends_with ~what p
-      with Not_found ->
-        p
-    in
-    let s =
-      try
-        OASISString.strip_starts_with ~what s
-      with Not_found ->
-        s
-    in
-      p^what^s
-
-
-  let is_varname str =
-    str = varname_of_string str
-
-
-  let failwithf fmt = Printf.ksprintf failwith fmt
-
-
-  let rec file_location ?pos1 ?pos2 ?lexbuf () =
-      match pos1, pos2, lexbuf with
-      | Some p, None, _ | None, Some p, _ ->
-        file_location ~pos1:p ~pos2:p ?lexbuf ()
-      | Some p1, Some p2, _ ->
-        let open Lexing in
-        let fn, lineno = p1.pos_fname, p1.pos_lnum in
-        let c1 = p1.pos_cnum - p1.pos_bol in
-        let c2 = c1 + (p2.pos_cnum - p1.pos_cnum) in
-        Printf.sprintf (f_ "file %S, line %d, characters %d-%d")  fn lineno c1 c2
-      | _, _, Some lexbuf ->
-        file_location
-          ~pos1:(Lexing.lexeme_start_p lexbuf)
-          ~pos2:(Lexing.lexeme_end_p lexbuf)
-          ()
-      | None, None, None ->
-        s_ "<position undefined>"
-
-
-  let failwithpf ?pos1 ?pos2 ?lexbuf fmt =
-    let loc = file_location ?pos1 ?pos2 ?lexbuf () in
-    Printf.ksprintf (fun s -> failwith (Printf.sprintf "%s: %s" loc s)) fmt
-
-
-end
-
-module OASISExpr = struct
-(* # 22 "src/oasis/OASISExpr.ml" *)
-
-
-  open OASISGettext
-  open OASISUtils
-
-
-  type test = string
-  type flag = string
-
-
-  type t =
-    | EBool of bool
-    | ENot of t
-    | EAnd of t * t
-    | EOr of t * t
-    | EFlag of flag
-    | ETest of test * string
-
-
-  type 'a choices = (t * 'a) list
-
-
-  let eval var_get t =
-    let rec eval' =
-      function
-        | EBool b ->
-            b
-
-        | ENot e ->
-            not (eval' e)
-
-        | EAnd (e1, e2) ->
-            (eval' e1) && (eval' e2)
-
-        | EOr (e1, e2) ->
-            (eval' e1) || (eval' e2)
-
-        | EFlag nm ->
-            let v =
-              var_get nm
-            in
-              assert(v = "true" || v = "false");
-              (v = "true")
-
-        | ETest (nm, vl) ->
-            let v =
-              var_get nm
-            in
-              (v = vl)
-    in
-      eval' t
-
-
-  let choose ?printer ?name var_get lst =
-    let rec choose_aux =
-      function
-        | (cond, vl) :: tl ->
-            if eval var_get cond then
-              vl
-            else
-              choose_aux tl
-        | [] ->
-            let str_lst =
-              if lst = [] then
-                s_ "<empty>"
-              else
-                String.concat
-                  (s_ ", ")
-                  (List.map
-                     (fun (cond, vl) ->
-                        match printer with
-                          | Some p -> p vl
-                          | None -> s_ "<no printer>")
-                     lst)
-            in
-              match name with
-                | Some nm ->
-                    failwith
-                      (Printf.sprintf
-                         (f_ "No result for the choice list '%s': %s")
-                         nm str_lst)
-                | None ->
-                    failwith
-                      (Printf.sprintf
-                         (f_ "No result for a choice list: %s")
-                         str_lst)
-    in
-      choose_aux (List.rev lst)
-
-
-end
-
-
-# 437 "myocamlbuild.ml"
-module BaseEnvLight = struct
-(* # 22 "src/base/BaseEnvLight.ml" *)
-
-
-  module MapString = Map.Make(String)
-
-
-  type t = string MapString.t
-
-
-  let default_filename = Filename.concat (Sys.getcwd ()) "setup.data"
-
-
-  let load ?(allow_empty=false) ?(filename=default_filename) ?stream () =
-    let line = ref 1 in
-    let lexer st =
-      let st_line =
-        Stream.from
-          (fun _ ->
-             try
-               match Stream.next st with
-               | '\n' -> incr line; Some '\n'
-               | c -> Some c
-             with Stream.Failure -> None)
-      in
-      Genlex.make_lexer ["="] st_line
-    in
-    let rec read_file lxr mp =
-      match Stream.npeek 3 lxr with
-      | [Genlex.Ident nm; Genlex.Kwd "="; Genlex.String value] ->
-        Stream.junk lxr; Stream.junk lxr; Stream.junk lxr;
-        read_file lxr (MapString.add nm value mp)
-      | [] -> mp
-      | _ ->
-        failwith
-          (Printf.sprintf "Malformed data file '%s' line %d" filename !line)
-    in
-    match stream with
-    | Some st -> read_file (lexer st) MapString.empty
-    | None ->
-      if Sys.file_exists filename then begin
-        let chn = open_in_bin filename in
-        let st = Stream.of_channel chn in
-        try
-          let mp = read_file (lexer st) MapString.empty in
-          close_in chn; mp
-        with e ->
-          close_in chn; raise e
-      end else if allow_empty then begin
-        MapString.empty
-      end else begin
-        failwith
-          (Printf.sprintf
-             "Unable to load environment, the file '%s' doesn't exist."
-             filename)
-      end
-
-  let rec var_expand str env =
-    let buff = Buffer.create ((String.length str) * 2) in
-    Buffer.add_substitute
-      buff
-      (fun var ->
-         try
-           var_expand (MapString.find var env) env
-         with Not_found ->
-           failwith
-             (Printf.sprintf
-                "No variable %s defined when trying to expand %S."
-                var
-                str))
-      str;
-    Buffer.contents buff
-
-
-  let var_get name env = var_expand (MapString.find name env) env
-  let var_choose lst env = OASISExpr.choose (fun nm -> var_get nm env) lst
-end
-
-
-# 517 "myocamlbuild.ml"
-module MyOCamlbuildFindlib = struct
-(* # 22 "src/plugins/ocamlbuild/MyOCamlbuildFindlib.ml" *)
-
-
-  (** OCamlbuild extension, copied from
-    * https://ocaml.org/learn/tutorials/ocamlbuild/Using_ocamlfind_with_ocamlbuild.html
-    * by N. Pouillard and others
-    *
-    * Updated on 2016-06-02
-    *
-    * Modified by Sylvain Le Gall
-  *)
-  open Ocamlbuild_plugin
-
-
-  type conf = {no_automatic_syntax: bool}
-
-
-  let run_and_read = Ocamlbuild_pack.My_unix.run_and_read
-
-
-  let blank_sep_strings = Ocamlbuild_pack.Lexers.blank_sep_strings
-
-
-  let exec_from_conf exec =
-    let exec =
-      let env = BaseEnvLight.load ~allow_empty:true () in
-      try
-        BaseEnvLight.var_get exec env
-      with Not_found ->
-        Printf.eprintf "W: Cannot get variable %s\n" exec;
-        exec
-    in
-    let fix_win32 str =
-      if Sys.os_type = "Win32" then begin
-        let buff = Buffer.create (String.length str) in
-        (* Adapt for windowsi, ocamlbuild + win32 has a hard time to handle '\\'.
-        *)
-        String.iter
-          (fun c -> Buffer.add_char buff (if c = '\\' then '/' else c))
-          str;
-        Buffer.contents buff
-      end else begin
-        str
-      end
-    in
-    fix_win32 exec
-
-
-  let split s ch =
-    let buf = Buffer.create 13 in
-    let x = ref [] in
-    let flush () =
-      x := (Buffer.contents buf) :: !x;
-      Buffer.clear buf
-    in
-    String.iter
-      (fun c ->
-         if c = ch then
-           flush ()
-         else
-           Buffer.add_char buf c)
-      s;
-    flush ();
-    List.rev !x
-
-
-  let split_nl s = split s '\n'
-
-
-  let before_space s =
-    try
-      String.before s (String.index s ' ')
-    with Not_found -> s
-
-  (* ocamlfind command *)
-  let ocamlfind x = S[Sh (exec_from_conf "ocamlfind"); x]
-
-  (* This lists all supported packages. *)
-  let find_packages () =
-    List.map before_space (split_nl & run_and_read (exec_from_conf "ocamlfind" ^ " list"))
-
-
-  (* Mock to list available syntaxes. *)
-  let find_syntaxes () = ["camlp4o"; "camlp4r"]
-
-
-  let well_known_syntax = [
-    "camlp4.quotations.o";
-    "camlp4.quotations.r";
-    "camlp4.exceptiontracer";
-    "camlp4.extend";
-    "camlp4.foldgenerator";
-    "camlp4.listcomprehension";
-    "camlp4.locationstripper";
-    "camlp4.macro";
-    "camlp4.mapgenerator";
-    "camlp4.metagenerator";
-    "camlp4.profiler";
-    "camlp4.tracer"
-  ]
-
-
-  let dispatch conf =
-    function
-      | After_options ->
-        (* By using Before_options one let command line options have an higher
-         * priority on the contrary using After_options will guarantee to have
-         * the higher priority override default commands by ocamlfind ones *)
-        Options.ocamlc     := ocamlfind & A"ocamlc";
-        Options.ocamlopt   := ocamlfind & A"ocamlopt";
-        Options.ocamldep   := ocamlfind & A"ocamldep";
-        Options.ocamldoc   := ocamlfind & A"ocamldoc";
-        Options.ocamlmktop := ocamlfind & A"ocamlmktop";
-        Options.ocamlmklib := ocamlfind & A"ocamlmklib"
-
-      | After_rules ->
-
-        (* Avoid warnings for unused tag *)
-        flag ["tests"] N;
-
-        (* When one link an OCaml library/binary/package, one should use
-         * -linkpkg *)
-        flag ["ocaml"; "link"; "program"] & A"-linkpkg";
-
-        (* For each ocamlfind package one inject the -package option when
-         * compiling, computing dependencies, generating documentation and
-         * linking. *)
-        List.iter
-          begin fun pkg ->
-            let base_args = [A"-package"; A pkg] in
-            (* TODO: consider how to really choose camlp4o or camlp4r. *)
-            let syn_args = [A"-syntax"; A "camlp4o"] in
-            let (args, pargs) =
-              (* Heuristic to identify syntax extensions: whether they end in
-                 ".syntax"; some might not.
-              *)
-              if not (conf.no_automatic_syntax) &&
-                 (Filename.check_suffix pkg "syntax" ||
-                  List.mem pkg well_known_syntax) then
-                (syn_args @ base_args, syn_args)
-              else
-                (base_args, [])
-            in
-            flag ["ocaml"; "compile";  "pkg_"^pkg] & S args;
-            flag ["ocaml"; "ocamldep"; "pkg_"^pkg] & S args;
-            flag ["ocaml"; "doc";      "pkg_"^pkg] & S args;
-            flag ["ocaml"; "link";     "pkg_"^pkg] & S base_args;
-            flag ["ocaml"; "infer_interface"; "pkg_"^pkg] & S args;
-
-            (* TODO: Check if this is allowed for OCaml < 3.12.1 *)
-            flag ["ocaml"; "compile";  "package("^pkg^")"] & S pargs;
-            flag ["ocaml"; "ocamldep"; "package("^pkg^")"] & S pargs;
-            flag ["ocaml"; "doc";      "package("^pkg^")"] & S pargs;
-            flag ["ocaml"; "infer_interface"; "package("^pkg^")"] & S pargs;
-          end
-          (find_packages ());
-
-        (* Like -package but for extensions syntax. Morover -syntax is useless
-         * when linking. *)
-        List.iter begin fun syntax ->
-          flag ["ocaml"; "compile";  "syntax_"^syntax] & S[A"-syntax"; A syntax];
-          flag ["ocaml"; "ocamldep"; "syntax_"^syntax] & S[A"-syntax"; A syntax];
-          flag ["ocaml"; "doc";      "syntax_"^syntax] & S[A"-syntax"; A syntax];
-          flag ["ocaml"; "infer_interface"; "syntax_"^syntax] &
-          S[A"-syntax"; A syntax];
-        end (find_syntaxes ());
-
-        (* The default "thread" tag is not compatible with ocamlfind.
-         * Indeed, the default rules add the "threads.cma" or "threads.cmxa"
-         * options when using this tag. When using the "-linkpkg" option with
-         * ocamlfind, this module will then be added twice on the command line.
-         *
-         * To solve this, one approach is to add the "-thread" option when using
-         * the "threads" package using the previous plugin.
-        *)
-        flag ["ocaml"; "pkg_threads"; "compile"] (S[A "-thread"]);
-        flag ["ocaml"; "pkg_threads"; "doc"] (S[A "-I"; A "+threads"]);
-        flag ["ocaml"; "pkg_threads"; "link"] (S[A "-thread"]);
-        flag ["ocaml"; "pkg_threads"; "infer_interface"] (S[A "-thread"]);
-        flag ["c"; "pkg_threads"; "compile"] (S[A "-thread"]);
-        flag ["ocaml"; "package(threads)"; "compile"] (S[A "-thread"]);
-        flag ["ocaml"; "package(threads)"; "doc"] (S[A "-I"; A "+threads"]);
-        flag ["ocaml"; "package(threads)"; "link"] (S[A "-thread"]);
-        flag ["ocaml"; "package(threads)"; "infer_interface"] (S[A "-thread"]);
-        flag ["c"; "package(threads)"; "compile"] (S[A "-thread"]);
-
-      | _ ->
-        ()
-end
-
-module MyOCamlbuildBase = struct
-(* # 22 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *)
-
-
-  (** Base functions for writing myocamlbuild.ml
-      @author Sylvain Le Gall
-    *)
-
-
-  open Ocamlbuild_plugin
-  module OC = Ocamlbuild_pack.Ocaml_compiler
-
-
-  type dir = string
-  type file = string
-  type name = string
-  type tag = string
-
-
-  type t =
-      {
-        lib_ocaml: (name * dir list * string list) list;
-        lib_c:     (name * dir * file list) list;
-        flags:     (tag list * (spec OASISExpr.choices)) list;
-        (* Replace the 'dir: include' from _tags by a precise interdepends in
-         * directory.
-         *)
-        includes:  (dir * dir list) list;
-      }
-
-
-(* # 110 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *)
-
-
-  let env_filename = Pathname.basename BaseEnvLight.default_filename
-
-
-  let dispatch_combine lst =
-    fun e ->
-      List.iter
-        (fun dispatch -> dispatch e)
-        lst
-
-
-  let tag_libstubs nm =
-    "use_lib"^nm^"_stubs"
-
-
-  let nm_libstubs nm =
-    nm^"_stubs"
-
-
-  let dispatch t e =
-    let env = BaseEnvLight.load ~allow_empty:true () in
-      match e with
-        | Before_options ->
-            let no_trailing_dot s =
-              if String.length s >= 1 && s.[0] = '.' then
-                String.sub s 1 ((String.length s) - 1)
-              else
-                s
-            in
-              List.iter
-                (fun (opt, var) ->
-                   try
-                     opt := no_trailing_dot (BaseEnvLight.var_get var env)
-                   with Not_found ->
-                     Printf.eprintf "W: Cannot get variable %s\n" var)
-                [
-                  Options.ext_obj, "ext_obj";
-                  Options.ext_lib, "ext_lib";
-                  Options.ext_dll, "ext_dll";
-                ]
-
-        | After_rules ->
-            (* Declare OCaml libraries *)
-            List.iter
-              (function
-                 | nm, [], intf_modules ->
-                     ocaml_lib nm;
-                     let cmis =
-                       List.map (fun m -> (OASISString.uncapitalize_ascii m) ^ ".cmi")
-                                intf_modules in
-                     dep ["ocaml"; "link"; "library"; "file:"^nm^".cma"] cmis
-                 | nm, dir :: tl, intf_modules ->
-                     ocaml_lib ~dir:dir (dir^"/"^nm);
-                     List.iter
-                       (fun dir ->
-                          List.iter
-                            (fun str ->
-                               flag ["ocaml"; "use_"^nm; str] (S[A"-I"; P dir]))
-                            ["compile"; "infer_interface"; "doc"])
-                       tl;
-                     let cmis =
-                       List.map (fun m -> dir^"/"^(OASISString.uncapitalize_ascii m)^".cmi")
-                                intf_modules in
-                     dep ["ocaml"; "link"; "library"; "file:"^dir^"/"^nm^".cma"]
-                         cmis)
-              t.lib_ocaml;
-
-            (* Declare directories dependencies, replace "include" in _tags. *)
-            List.iter
-              (fun (dir, include_dirs) ->
-                 Pathname.define_context dir include_dirs)
-              t.includes;
-
-            (* Declare C libraries *)
-            List.iter
-              (fun (lib, dir, headers) ->
-                   (* Handle C part of library *)
-                   flag ["link"; "library"; "ocaml"; "byte"; tag_libstubs lib]
-                     (S[A"-dllib"; A("-l"^(nm_libstubs lib)); A"-cclib";
-                        A("-l"^(nm_libstubs lib))]);
-
-                   flag ["link"; "library"; "ocaml"; "native"; tag_libstubs lib]
-                     (S[A"-cclib"; A("-l"^(nm_libstubs lib))]);
-
-                   if bool_of_string (BaseEnvLight.var_get "native_dynlink" env) then
-                     flag ["link"; "program"; "ocaml"; "byte"; tag_libstubs lib]
-                         (S[A"-dllib"; A("dll"^(nm_libstubs lib))]);
-
-                   (* When ocaml link something that use the C library, then one
-                      need that file to be up to date.
-                      This holds both for programs and for libraries.
-                    *)
-                   dep ["link"; "ocaml"; tag_libstubs lib]
-                     [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)];
-
-                   dep  ["compile"; "ocaml"; tag_libstubs lib]
-                     [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)];
-
-                   (* TODO: be more specific about what depends on headers *)
-                   (* Depends on .h files *)
-                   dep ["compile"; "c"]
-                     headers;
-
-                   (* Setup search path for lib *)
-                   flag ["link"; "ocaml"; "use_"^lib]
-                     (S[A"-I"; P(dir)]);
-              )
-              t.lib_c;
-
-              (* Add flags *)
-              List.iter
-              (fun (tags, cond_specs) ->
-                 let spec = BaseEnvLight.var_choose cond_specs env in
-                 let rec eval_specs =
-                   function
-                     | S lst -> S (List.map eval_specs lst)
-                     | A str -> A (BaseEnvLight.var_expand str env)
-                     | spec -> spec
-                 in
-                   flag tags & (eval_specs spec))
-              t.flags
-        | _ ->
-            ()
-
-
-  let dispatch_default conf t =
-    dispatch_combine
-      [
-        dispatch t;
-        MyOCamlbuildFindlib.dispatch conf;
-      ]
-
-
-end
-
-
-# 878 "myocamlbuild.ml"
-open Ocamlbuild_plugin;;
-let package_default =
-  {
-     MyOCamlbuildBase.lib_ocaml = [("river", ["lib"], [])];
-     lib_c = [];