...
 
Commits (2)
defmodule FastSanitize do
alias FastSanitize.Sanitizer
@moduledoc """
Fast HTML sanitization module.
"""
@doc """
Strip all tags from a given document fragment.
## Example
iex> FastSanitize.strip_tags("<h1>hello world</h1>")
{:ok, "hello world"}
"""
def strip_tags(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.StripTags)
@doc """
Strip tags from a given document fragment that are not basic HTML.
## Example
iex> FastSanitize.basic_html("<h1>hello world</h1><script>alert('xss')</script>")
{:ok, "<h1>hello world</h1>"}
"""
def basic_html(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.BasicHTML)
end
......@@ -31,6 +31,12 @@ defmodule FastSanitize.Fragment do
# comment node
defp fragment_to_html({:comment, _, text}), do: "<!-- #{text} -->"
# bare subtree
defp fragment_to_html(subtree) when is_list(subtree) do
{:ok, result} = subtree_to_html(subtree)
result
end
# a node which can never accept children will have nil instead of a subtree
defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs)
......@@ -48,7 +54,8 @@ defmodule FastSanitize.Fragment do
defp subtree_to_html(tree) do
rendered =
Enum.map(tree, &fragment_to_html/1)
Enum.reject(tree, &is_nil/1)
|> Enum.map(&fragment_to_html/1)
|> Enum.join("")
{:ok, rendered}
......
defmodule FastSanitize.Sanitizer do
alias FastSanitize.Fragment
@moduledoc """
Defines the contract that Sanitizer modules must follow.
"""
@doc """
Scrubs a document node.
"""
@callback scrub({atom(), list(), list()}) :: tuple()
@doc """
Scrubs an unknown node.
"""
@callback scrub({binary(), list(), list()}) :: tuple()
@doc """
Scrubs a text node.
"""
@callback scrub(binary()) :: binary()
# fallbacks
def scrub("", _), do: ""
def scrub(nil, _), do: ""
def scrub(doc, scrubber) do
with {:ok, subtree} <- Fragment.to_tree(doc) do
Enum.map(subtree, fn fragment ->
scrubber.scrub(fragment)
end)
|> Fragment.to_html()
else
e ->
{:error, e}
end
end
end
defmodule FastSanitize.Sanitizer.BasicHTML do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
@valid_schemes ["http", "https", "mailto"]
Meta.strip_comments()
Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
Meta.allow_tag_with_these_attributes(:b, [])
Meta.allow_tag_with_these_attributes(:blockquote, [])
Meta.allow_tag_with_these_attributes(:br, [])
Meta.allow_tag_with_these_attributes(:code, [])
Meta.allow_tag_with_these_attributes(:del, [])
Meta.allow_tag_with_these_attributes(:em, [])
Meta.allow_tag_with_these_attributes(:h1, [])
Meta.allow_tag_with_these_attributes(:h2, [])
Meta.allow_tag_with_these_attributes(:h3, [])
Meta.allow_tag_with_these_attributes(:h4, [])
Meta.allow_tag_with_these_attributes(:h5, [])
Meta.allow_tag_with_these_attributes(:hr, [])
Meta.allow_tag_with_these_attributes(:i, [])
Meta.allow_tag_with_uri_attributes(:img, ["src"], @valid_schemes)
Meta.allow_tag_with_these_attributes(:img, [
"width",
"height",
"title",
"alt"
])
Meta.allow_tag_with_these_attributes(:li, [])
Meta.allow_tag_with_these_attributes(:ol, [])
Meta.allow_tag_with_these_attributes(:p, [])
Meta.allow_tag_with_these_attributes(:pre, [])
Meta.allow_tag_with_these_attributes(:span, [])
Meta.allow_tag_with_these_attributes(:strong, [])
Meta.allow_tag_with_these_attributes(:table, [])
Meta.allow_tag_with_these_attributes(:tbody, [])
Meta.allow_tag_with_these_attributes(:td, [])
Meta.allow_tag_with_these_attributes(:th, [])
Meta.allow_tag_with_these_attributes(:thead, [])
Meta.allow_tag_with_these_attributes(:tr, [])
Meta.allow_tag_with_these_attributes(:u, [])
Meta.allow_tag_with_these_attributes(:ul, [])
Meta.strip_children_of(:script)
Meta.strip_everything_not_covered()
end
# Based on HtmlSanitizeEx.Scrubber.Meta
# Copyright (c) 2015-2019 René Föhring (@rrrene)
defmodule FastSanitize.Sanitizer.Meta do
@moduledoc """
This module contains some meta-programming magic to define your own rules
for scrubbers.
The StripTags scrubber is a good starting point:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.strip_everything_not_covered
end
You can use the `allow_tag_with_uri_attributes/3` and
`allow_tag_with_these_attributes/2` macros to define what is allowed:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.strip_everything_not_covered
end
You can stack these if convenient:
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
@doc """
Allow these tags and use the regular `scrub_attribute/2` function to scrub
the attributes.
"""
defmacro allow_tags_and_scrub_their_attributes(list) do
Enum.map(list, fn tag_name ->
allow_this_tag_and_scrub_its_attributes(tag_name)
end)
end
@doc """
Allow the given +list+ of attributes for the specified +tag+.
Meta.allow_tag_with_these_attributes "a", ["name", "title"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
list
|> Enum.map(fn attr_name ->
allow_this_tag_with_this_attribute(tag_name, attr_name)
end)
|> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
end
@doc """
Allow the given list of +values+ for the given +attribute+ on the
specified +tag+.
Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
"""
defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
when value in unquote(values) do
{unquote(attribute), value}
end
end
end
@doc """
Allow the given +list+ of attributes to contain URI information for the
specified +tag+.
# Only allow SSL-enabled and mailto links
Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]
# Only allow none-SSL images
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
"""
defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
list
|> Enum.map(fn name ->
allow_tag_with_uri_attribute(tag, name, valid_schemes)
end)
end
@doc """
"""
defmacro allow_tags_with_style_attributes(list) do
list
|> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
end
@doc """
Strips all comments.
"""
defmacro strip_comments do
quote do
def scrub({:comment, _, _}), do: ""
end
end
@doc """
Ensures any tags/attributes not explicitly whitelisted until this
statement are stripped.
"""
defmacro strip_everything_not_covered do
quote do
# If we haven't covered the attribute until here, we just scrap it.
def scrub_attribute(_tag, _attribute), do: nil
# If we haven't covered the attribute until here, we just scrap it.
def scrub({_tag, _attributes, children}), do: children
# Text is left alone
def scrub("" <> text), do: text
end
end
@doc """
Ensures any tags/attributes that are explicitly disallowed have
their children dropped.
"""
defmacro strip_children_of(tag_name) do
quote do
def scrub({unquote(tag_name), _attributes, _children}), do: nil
end
end
defp allow_this_tag_and_scrub_its_attributes(tag_name) do
quote do
def scrub({unquote(tag_name), attributes, children}) do
{unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children}
end
defp scrub_attributes(unquote(tag_name), attributes) do
Enum.map(attributes, fn attr ->
scrub_attribute(unquote(tag_name), attr)
end)
|> Enum.reject(&is_nil(&1))
end
end
end
defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
{unquote(attr_name), value}
end
end
end
defp allow_this_tag_with_style_attribute(tag_name) do
quote do
def scrub_attribute(unquote(tag_name), {"style", value}) do
{"style", scrub_css(value)}
end
end
end
defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
nil
end
@protocol_separator ":|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A"
@protocol_separator_regex Regex.compile!(@protocol_separator, "mi")
@http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
@other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"
@scheme_capture Regex.compile!(
"(#{@http_like_scheme})|(#{@other_schemes})",
"mi"
)
def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
valid_schema =
if uri =~ @protocol_separator_regex do
case Regex.named_captures(@scheme_capture, uri) do
%{"scheme" => scheme, "other_schemes" => ""} ->
scheme in unquote(valid_schemes)
%{"other_schemes" => scheme, "scheme" => ""} ->
scheme in unquote(valid_schemes)
_ ->
false
end
else
true
end
if valid_schema, do: {unquote(attr_name), uri}
end
end
end
end
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments()
Meta.strip_everything_not_covered()
end