link_widgets.ml 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. (* Link target manipulation widgets *)
  2. module OH = Otoml.Helpers
  3. open Soupault_common
  4. (* By default, exclude three categories of links from target rewriting:
  5. 1. Links that have a URI schema (^([a-zA-Z0-9]+):), e.g. https://example.com
  6. 2. Links to anchors within the same page (^#), e.g. #my-anchor
  7. 3. Hand-made relative links (^\.), e.g. ../style.css
  8. 4. Protocol-relative URLs that begin with //
  9. *)
  10. let default_exclude_regex = "^((([a-zA-Z0-9]+):)|#|\\.|//)"
  11. let link_selectors = ["a"; "link"; "img"; "script"; "audio"; "video"; "object"; "embed"]
  12. let get_target_attr elem =
  13. let tag_name = Soup.name elem in
  14. match tag_name with
  15. | "a" -> "href"
  16. | "link" -> "href"
  17. | "img" -> "src"
  18. | "script" -> "src"
  19. | "audio" -> "src"
  20. | "video" -> "src"
  21. | "embed" -> "src"
  22. | "object" -> "data"
  23. | _ ->
  24. (* Shouldn't happen *)
  25. internal_error @@ Printf.sprintf
  26. "a relative_links or an absolute_links widget tried to process unsupported element <%s>" tag_name
  27. let target_matches only_regex exclude_regex target =
  28. match only_regex with
  29. | Some r ->
  30. if not (Regex_utils.Internal.matches target r)
  31. then (let () = Logs.debug @@ fun m -> m "Link target \"%s\" does not match only_target_regex" target in false)
  32. else true
  33. | None ->
  34. if (Regex_utils.Internal.matches target exclude_regex)
  35. then (let () = Logs.debug @@ fun m -> m "Link target \"%s\" matches exclude_target_regex" target in false)
  36. else true
  37. let relativize_link_target check_file env target =
  38. let open Defaults in
  39. (* Before doing any real work, check if the link target is pointing at a file that actually exists
  40. at a path relative to _this_ page. If it does, the target is _already correct_
  41. and doesn't need to be relativized.
  42. If the target points at a path that doesn't exist, it _probably_ comes from a page template
  43. at the top level, while the current page is deeper in the directory tree.
  44. We are checking in the target rather than source dir for two reasons:
  45. 1. soupault copies static assets from the site_dir before processing page files,
  46. so if an asset file exists in the site_dir, it's guaranteed to also be in the target_dir
  47. when this code runs;
  48. 2. If a file is not in the site_dir, it doesn't mean it's not in the target_dir either.
  49. It may be a dynamically generated asset created by a Lua plugin or an external script.
  50. *)
  51. if check_file && (Sys.file_exists (FilePath.concat env.target_dir target)) then target else
  52. (* Remove the build_dir from the target path to produce a path relative to the site root. *)
  53. let relative_target_dir = Regex_utils.Internal.replace env.target_dir ("^" ^ env.settings.build_dir) "" in
  54. (* The assumption is that the target is valid for a page at the site root.
  55. Thus, for pages in sub-directories, we need to add a "../" for every nesting level.
  56. *)
  57. let parent_path = Utils.split_path relative_target_dir |> List.map (fun _ -> "..") in
  58. (* Strip leading slashes *)
  59. let target = Regex_utils.Internal.replace target "^/+" "" in
  60. (* Prepend generated double-dot path to the original target. *)
  61. String.concat "/" (parent_path @ [target])
  62. let relativize elem env check_file only_regex exclude_regex =
  63. let target_attr = get_target_attr elem in
  64. let target = Soup.attribute target_attr elem in
  65. match target with
  66. | None ->
  67. Logs.debug @@ fun m -> m "Ignoring a <%s> element without \"%s\" attribute" (Soup.name elem) target_attr
  68. | Some target ->
  69. if not (target_matches only_regex exclude_regex target)
  70. then Logs.debug @@ fun m -> m "Link target \"%s\" is excluded by regex options, ignoring" target
  71. else begin
  72. let target = relativize_link_target check_file env target in
  73. Soup.set_attribute (get_target_attr elem) target elem
  74. end
  75. let absolutize elem env prefix check_file only_regex exclude_regex =
  76. let open Defaults in
  77. let target_attr = get_target_attr elem in
  78. let target = Soup.attribute target_attr elem in
  79. match target with
  80. | None ->
  81. Logs.debug @@ fun m -> m "Ignoring a <%s> element without \"%s\" attribute" (Soup.name elem) target_attr
  82. | Some target ->
  83. if not (target_matches only_regex exclude_regex target)
  84. then Logs.debug @@ fun m -> m "Link target \"%s\" matches exclude_target_regex, ignoring" target
  85. else begin
  86. (* Remove the build_dir from the path *)
  87. let relative_target_dir = Regex_utils.Internal.replace env.target_dir ("^" ^ env.settings.build_dir) "" in
  88. (* Strip leading slashes *)
  89. let target = Regex_utils.Internal.replace target "^/+" "" in
  90. let parent_path =
  91. (if check_file && (Sys.file_exists (FilePath.concat env.target_dir target))
  92. then let dir_path = Utils.split_path relative_target_dir in String.concat "/" (prefix :: dir_path)
  93. else prefix)
  94. in
  95. let target = String.concat "/" [parent_path; target] in
  96. Soup.set_attribute (get_target_attr elem) target elem
  97. end
  98. (** Converts all internal links to relative according to the page's location in the directory tree. *)
  99. let relative_links env config soup =
  100. let valid_options = List.append Config.common_widget_options ["exclude_target_regex"; "only_target_regex"; "check_file"] in
  101. let () = Config.check_options valid_options config "widget \"relative_links\"" in
  102. let exclude_regex = OH.find_string_opt config ["exclude_target_regex"] in
  103. let only_regex = OH.find_string_opt config ["only_target_regex"] in
  104. if (Option.is_some exclude_regex) && (Option.is_some only_regex)
  105. then Config.config_error "exclude_target_regex and only_target_regex options are mutually exclusive"
  106. else begin
  107. let exclude_regex = Option.value ~default:default_exclude_regex exclude_regex in
  108. let check_file = Config.find_bool_or ~default:false config ["check_file"] in
  109. let nodes = Html_utils.select_all link_selectors soup in begin
  110. match nodes with
  111. | [] ->
  112. Logs.debug @@ fun m -> m "Page has no link elements that need adjustment"
  113. | ns -> List.iter (fun e -> relativize e env check_file only_regex exclude_regex) ns
  114. end;
  115. Ok ()
  116. end
  117. (** Converts all internal links to absolute. *)
  118. let absolute_links env config soup =
  119. let (let*) = Result.bind in
  120. let valid_options = List.append Config.common_widget_options
  121. ["exclude_target_regex"; "only_target_regex"; "check_file"; "prefix"]
  122. in
  123. let () = Config.check_options valid_options config "widget \"absolute_links\"" in
  124. let* prefix = Config.find_string_result config ["prefix"] in
  125. (* Strip trailing slashes to avoid duplicate slashes after concatenation *)
  126. let prefix = Regex_utils.Internal.replace prefix "/+$" "" in
  127. let exclude_regex = OH.find_string_opt config ["exclude_target_regex"] in
  128. let only_regex = OH.find_string_opt config ["only_target_regex"] in
  129. if (Option.is_some exclude_regex) && (Option.is_some only_regex)
  130. then Config.config_error "exclude_target_regex and only_target_regex options are mutually exclusive"
  131. else begin
  132. let exclude_regex = Option.value ~default:default_exclude_regex exclude_regex in
  133. let check_file = Config.find_bool_or ~default:false config ["check_file"] in
  134. let nodes = Html_utils.select_all link_selectors soup in begin
  135. match nodes with
  136. | [] ->
  137. Logs.debug @@ fun m -> m "Page has no link elements that need adjustment"
  138. | ns -> List.iter (fun e -> absolutize e env prefix check_file only_regex exclude_regex) ns
  139. end;
  140. Ok ()
  141. end