From 50e0a9ae56ad2704240956d1f93cc04bafcb8b75 Mon Sep 17 00:00:00 2001
From: "Haelwenn (lanodan) Monnier" <contact@hacktivis.me>
Date: Tue, 16 Oct 2018 03:00:37 +0200
Subject: [PATCH 1/4] lib/pleroma/html.ex: Fix scheme lists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gosh please don’t break ourselves…

Also this is copy-paste of the list in lib/pleroma/formatter.ex,
I think this should be put in a common variable, but where?
---
 lib/pleroma/html.ex | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index cf18f070c6..f868556718 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -39,7 +39,22 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
   require HtmlSanitizeEx.Scrubber.Meta
   alias HtmlSanitizeEx.Scrubber.Meta
 
-  @valid_schemes ["http", "https"]
+  @valid_schemes [
+    "https://",
+    "http://",
+    "dat://",
+    "dweb://",
+    "gopher://",
+    "ipfs://",
+    "ipns://",
+    "irc:",
+    "ircs:",
+    "magnet:",
+    "mailto:",
+    "mumble:",
+    "ssb://",
+    "xmpp:"
+  ]
 
   Meta.remove_cdata_sections_before_scrub()
   Meta.strip_comments()
-- 
GitLab


From d7654c77de1f027091a380630559bbda9abb9540 Mon Sep 17 00:00:00 2001
From: "Haelwenn (lanodan) Monnier" <contact@hacktivis.me>
Date: Tue, 16 Oct 2018 03:34:33 +0200
Subject: [PATCH 2/4] lib/pleroma/html.ex: Use a function as a variable (broken
 for some reason)

---
 lib/pleroma/html.ex | 46 ++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index f868556718..8a5ede6142 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -3,6 +3,25 @@ defmodule Pleroma.HTML do
 
   @markup Application.get_env(:pleroma, :markup)
 
+  def valid_schemes() do
+    [
+      "https://",
+      "http://",
+      "dat://",
+      "dweb://",
+      "gopher://",
+      "ipfs://",
+      "ipns://",
+      "irc:",
+      "ircs:",
+      "magnet:",
+      "mailto:",
+      "mumble:",
+      "ssb://",
+      "xmpp:"
+    ]
+  end
+
   defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
   defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
   defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
@@ -39,28 +58,13 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
   require HtmlSanitizeEx.Scrubber.Meta
   alias HtmlSanitizeEx.Scrubber.Meta
 
-  @valid_schemes [
-    "https://",
-    "http://",
-    "dat://",
-    "dweb://",
-    "gopher://",
-    "ipfs://",
-    "ipns://",
-    "irc:",
-    "ircs:",
-    "magnet:",
-    "mailto:",
-    "mumble:",
-    "ssb://",
-    "xmpp:"
-  ]
+  alias Pleroma.HTML
 
   Meta.remove_cdata_sections_before_scrub()
   Meta.strip_comments()
 
   # links
-  Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
+  Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes())
   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 
   # paragraphs and linebreaks
@@ -75,7 +79,7 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 
   if @allow_inline_images do
-    Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
+    Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes())
 
     Meta.allow_tag_with_these_attributes("img", [
       "width",
@@ -94,12 +98,12 @@ defmodule Pleroma.HTML.Scrubber.Default do
   require HtmlSanitizeEx.Scrubber.Meta
   alias HtmlSanitizeEx.Scrubber.Meta
 
-  @valid_schemes ["http", "https"]
+  alias Pleroma.HTML
 
   Meta.remove_cdata_sections_before_scrub()
   Meta.strip_comments()
 
-  Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
+  Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes())
   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 
   Meta.allow_tag_with_these_attributes("b", [])
@@ -122,7 +126,7 @@ defmodule Pleroma.HTML.Scrubber.Default do
   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 
   if @allow_inline_images do
-    Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
+    Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes())
 
     Meta.allow_tag_with_these_attributes("img", [
       "width",
-- 
GitLab


From 2154c5dcd891cf2a85c0251e07424b5681aa88a2 Mon Sep 17 00:00:00 2001
From: "Haelwenn (lanodan) Monnier" <contact@hacktivis.me>
Date: Thu, 18 Oct 2018 07:36:58 +0200
Subject: [PATCH 3/4] lib/pleroma/html.ex: Use macros for valid_schemes, change
 config for schemes

---
 config/config.exs        | 18 +++++++++++++++++-
 lib/pleroma/formatter.ex | 27 +++------------------------
 lib/pleroma/html.ex      | 37 ++++++++++++-------------------------
 3 files changed, 32 insertions(+), 50 deletions(-)

diff --git a/config/config.exs b/config/config.exs
index 3202969912..e5f0b4f6f9 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -24,7 +24,23 @@
 
 config :pleroma, :emoji, shortcode_globs: ["/emoji/custom/**/*.png"]
 
-config :pleroma, :uri_schemes, additionnal_schemes: []
+config :pleroma, :uri_schemes,
+  valid_schemes: [
+    "https",
+    "http",
+    "dat",
+    "dweb",
+    "gopher",
+    "ipfs",
+    "ipns",
+    "irc",
+    "ircs",
+    "magnet",
+    "mailto",
+    "mumble",
+    "ssb",
+    "xmpp"
+  ]
 
 # Configures the endpoint
 config :pleroma, Pleroma.Web.Endpoint,
diff --git a/lib/pleroma/formatter.ex b/lib/pleroma/formatter.ex
index 5b63fb795e..d7de5b4830 100644
--- a/lib/pleroma/formatter.ex
+++ b/lib/pleroma/formatter.ex
@@ -171,25 +171,8 @@ def get_custom_emoji() do
 
   @link_regex ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui
 
-  # IANA got a list https://www.iana.org/assignments/uri-schemes/ but
-  # Stuff like ipfs isn’t in it
-  # There is very niche stuff
-  @uri_schemes [
-    "https://",
-    "http://",
-    "dat://",
-    "dweb://",
-    "gopher://",
-    "ipfs://",
-    "ipns://",
-    "irc:",
-    "ircs:",
-    "magnet:",
-    "mailto:",
-    "mumble:",
-    "ssb://",
-    "xmpp:"
-  ]
+  @uri_schemes Application.get_env(:pleroma, :uri_schemes, [])
+  @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, [])
 
   # TODO: make it use something other than @link_regex
   def html_escape(text, "text/html") do
@@ -207,14 +190,10 @@ def html_escape(text, "text/plain") do
 
   @doc "changes scheme:... urls to html links"
   def add_links({subs, text}) do
-    additionnal_schemes =
-      Application.get_env(:pleroma, :uri_schemes, [])
-      |> Keyword.get(:additionnal_schemes, [])
-
     links =
       text
       |> String.split([" ", "\t", "<br>"])
-      |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes ++ additionnal_schemes) end)
+      |> Enum.filter(fn word -> String.starts_with?(word, @valid_schemes) end)
       |> Enum.filter(fn word -> Regex.match?(@link_regex, word) end)
       |> Enum.map(fn url -> {Ecto.UUID.generate(), url} end)
       |> Enum.sort_by(fn {_, url} -> -String.length(url) end)
diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index 8a5ede6142..2d2155b826 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -3,25 +3,6 @@ defmodule Pleroma.HTML do
 
   @markup Application.get_env(:pleroma, :markup)
 
-  def valid_schemes() do
-    [
-      "https://",
-      "http://",
-      "dat://",
-      "dweb://",
-      "gopher://",
-      "ipfs://",
-      "ipns://",
-      "irc:",
-      "ircs:",
-      "magnet:",
-      "mailto:",
-      "mumble:",
-      "ssb://",
-      "xmpp:"
-    ]
-  end
-
   defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
   defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
   defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
@@ -55,6 +36,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
   paragraphs, breaks and links are allowed through the filter.
   """
 
+  @markup Application.get_env(:pleroma, :markup)
+  @uri_schemes Application.get_env(:pleroma, :uri_schemes, [])
+  @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, [])
+
   require HtmlSanitizeEx.Scrubber.Meta
   alias HtmlSanitizeEx.Scrubber.Meta
 
@@ -64,7 +49,7 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
   Meta.strip_comments()
 
   # links
-  Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes())
+  Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 
   # paragraphs and linebreaks
@@ -75,11 +60,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
   Meta.allow_tag_with_these_attributes("span", [])
 
   # allow inline images for custom emoji
-  @markup Application.get_env(:pleroma, :markup)
   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 
   if @allow_inline_images do
-    Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes())
+    Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
 
     Meta.allow_tag_with_these_attributes("img", [
       "width",
@@ -100,10 +84,14 @@ defmodule Pleroma.HTML.Scrubber.Default do
 
   alias Pleroma.HTML
 
+  @markup Application.get_env(:pleroma, :markup)
+  @uri_schemes Application.get_env(:pleroma, :uri_schemes, [])
+  @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, [])
+
   Meta.remove_cdata_sections_before_scrub()
   Meta.strip_comments()
 
-  Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes())
+  Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 
   Meta.allow_tag_with_these_attributes("b", [])
@@ -122,11 +110,10 @@ defmodule Pleroma.HTML.Scrubber.Default do
   Meta.allow_tag_with_these_attributes("u", [])
   Meta.allow_tag_with_these_attributes("ul", [])
 
-  @markup Application.get_env(:pleroma, :markup)
   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 
   if @allow_inline_images do
-    Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes())
+    Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
 
     Meta.allow_tag_with_these_attributes("img", [
       "width",
-- 
GitLab


From 595d855f0ebd88faede51bf3e08384e956465722 Mon Sep 17 00:00:00 2001
From: William Pitcock <nenolod@dereferenced.org>
Date: Thu, 18 Oct 2018 14:29:31 +0000
Subject: [PATCH 4/4] html scrubbing policies: restrict img tags to http/https
 only for mediaproxy compatibility

---
 lib/pleroma/html.ex | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index 2d2155b826..fdc5e7dab9 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -63,7 +63,8 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 
   if @allow_inline_images do
-    Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
+    # restrict img tags to http/https only, because of MediaProxy.
+    Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 
     Meta.allow_tag_with_these_attributes("img", [
       "width",
@@ -113,7 +114,8 @@ defmodule Pleroma.HTML.Scrubber.Default do
   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 
   if @allow_inline_images do
-    Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
+    # restrict img tags to http/https only, because of MediaProxy.
+    Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 
     Meta.allow_tag_with_these_attributes("img", [
       "width",
-- 
GitLab