Commit 337ffdd9 authored by kaniini's avatar kaniini

Merge branch 'feature/rip-out-nodex' into 'master'

Rip out nodex

Closes #2

See merge request pleroma/myhtmlex!8
parents 692c1d2d a08c1cc8
Pipeline #19627 passed with stage
in 56 seconds
defmodule BasicHtmlBench do
use Benchfella
bench "decode" do
{html, _} = bench_context
:fast_html.decode(html)
end
bench "decode w/ html_atoms" do
{html, _} = bench_context
:fast_html.decode(html, format: [:html_atoms])
end
bench "decode w/ nil_self_closing" do
{html, _} = bench_context
:fast_html.decode(html, format: [:nil_self_closing])
end
bench "decode w/ html_atoms, nil_self_closing" do
{html, _} = bench_context
:fast_html.decode(html, format: [:html_atoms, :nil_self_closing])
end
end
defmodule CnodeFileSizesBench do
use Benchfella
setup_all do
Nodex.Distributed.up
{:ok, _pid} = Nodex.Cnode.start_link(%{exec_path: "priv/myhtml_worker"}, name: Myhtmlex.Safe.Cnode)
contents = {
File.read!("bench/github_trending_js.html"),
File.read!("bench/w3c_html5.html"),
File.read!("bench/wikipedia_hyperlink.html")
}
{:ok, contents}
end
bench "github_trending_js.html 341k" do
{ref, _, _} = bench_context
:fast_html.decode(ref)
end
bench "w3c_html5.html 131k" do
{_, ref, _} = bench_context
:fast_html.decode(ref)
end
bench "wikipedia_hyperlink.html 97k" do
{_, _, ref} = bench_context
:fast_html.decode(ref)
end
end
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -395,6 +395,8 @@ static void build_tree (ei_x_buff * response, myhtml_tree_t * tree, myhtml_tree_
// ok we're going to send an actual response so start encoding it
response->index = 0;
ei_x_encode_version (response);
ei_x_encode_tuple_header(response, 2);
ei_x_encode_atom(response, "myhtml_worker");
while (current_node != NULL)
{
......@@ -447,7 +449,7 @@ static void build_tree (ei_x_buff * response, myhtml_tree_t * tree, myhtml_tree_
strncpy (tag_string, tag_name, sizeof buffer - 1);
}
if (response->index > 1)
if (stack.used > 0)
{
EMIT_LIST_HDR;
}
......
......@@ -96,6 +96,7 @@ defmodule :fast_html do
"""
@spec decode(String.t(), format: [format_flag()]) :: tree()
def decode(bin, format: flags) do
Myhtmlex.Safe.decode(bin, flags)
{:ok, res} = FastHtml.Cnode.call({:decode, bin, flags})
res
end
end
defmodule FastHtml.Application do
@moduledoc false
use Application
def random_sname, do: :crypto.strong_rand_bytes(4) |> Base.encode16(case: :lower)
def start(_type, _args) do
case maybe_setup_node() do
{:error, message} -> raise message
_ -> :ok
end
Supervisor.start_link([FastHtml.Cnode], strategy: :one_for_one, name: FastHtml.Supervisor)
end
defp maybe_setup_node() do
with {_, false} <- {:alive, Node.alive?()},
{:ok, epmd_path} <- find_epmd(),
:ok <- start_epmd(epmd_path),
{:ok, _pid} = pid_tuple <- start_node() do
pid_tuple
else
{:alive, _} ->
:ok
{:error, _} = e ->
e
end
end
defp find_epmd() do
case System.find_executable("epmd") do
nil ->
{:error,
"Could not find epmd executable. Please ensure the location it's in is present in your PATH or start epmd manually beforehand"}
executable ->
{:ok, executable}
end
end
defp start_epmd(path) do
case System.cmd(path, ["-daemon"]) do
{_result, 0} -> :ok
{_result, exit_code} -> {:error, "Could not start epmd, exit code: #{exit_code}"}
end
end
defp start_node() do
Node.start(:"master_#{random_sname()}@127.0.0.1")
end
end
defmodule FastHtml.Cnode do
@moduledoc false
@spawn_inactive_timeout 10000
application = Mix.Project.config()[:app]
use GenServer
require Logger
def start_link(args) do
GenServer.start_link(__MODULE__, args, name: __MODULE__)
end
def init(args) do
args =
if args == [] do
%{}
else
args
end
exec_path = Path.join(:code.priv_dir(unquote(application)), "myhtml_worker")
sname = Map.get_lazy(args, :sname, &default_sname/0)
hostname = Map.get_lazy(args, :hostname, &master_hostname/0)
addr = :"#{sname}@#{hostname}"
spawn_inactive_timeout = Map.get(args, :spawn_inactive_timeout, @spawn_inactive_timeout)
state = %{
exec_path: exec_path,
sname: sname,
addr: addr,
hostname: hostname,
spawn_inactive_timeout: spawn_inactive_timeout
}
connect_or_spawn_cnode(state)
end
defp default_sname, do: "myhtml_#{FastHtml.Application.random_sname()}"
defp master_sname, do: Node.self() |> to_string |> String.split("@") |> List.first()
defp master_hostname, do: Node.self() |> to_string |> String.split("@") |> List.last()
defp connect_or_spawn_cnode(state) do
case connect_cnode(state) do
{:stop, _} -> spawn_cnode(state)
{:ok, state} -> state
end
end
defp connect_cnode(%{addr: addr} = state) do
if Node.connect(addr) do
Logger.debug("connected to #{addr}")
{:ok, state}
else
Logger.debug("connecting to #{addr} failed")
{:stop, :cnode_connection_fail}
end
end
defp spawn_cnode(%{exec_path: exec_path, sname: sname, hostname: hostname} = state) do
Logger.debug("Spawning #{sname}@#{hostname}")
cookie = :erlang.get_cookie()
port =
Port.open({:spawn_executable, exec_path}, [
:binary,
:exit_status,
:stderr_to_stdout,
line: 4096,
args: [sname, hostname, cookie, master_sname()]
])
pid = Keyword.get(Port.info(port), :os_pid)
state = Map.put(state, :pid, pid)
await_cnode_ready(port, state)
end
defp await_cnode_ready(
port,
%{spawn_inactive_timeout: timeout, addr: addr} = state
) do
ready_line = to_string(addr) <> " ready"
receive do
{^port, {:data, {:eol, ^ready_line}}} ->
connect_cnode(state)
{^port, {:data, {:eol, line}}} ->
Logger.debug("c-node is saying: #{line}")
await_cnode_ready(port, state)
{^port, {:exit_status, exit_status}} ->
Logger.debug("unexpected c-node exit: #{exit_status}")
{:stop, :cnode_unexpected_exit}
message ->
Logger.warn("unhandled message while waiting for cnode to be ready:\n#{inspect(message)}")
await_cnode_ready(port, state)
after
timeout ->
{:stop, :spawn_inactive_timeout}
end
end
def handle_info({:nodedown, _cnode}, state) do
{:stop, :nodedown, state}
end
def handle_info(msg, state) do
Logger.warn("unhandled handle_info: #{inspect(msg)}")
{:noreply, state}
end
def handle_call(:addr, _from, %{addr: addr} = state) do
{:reply, addr, state}
end
def terminate(_reason, %{pid: pid}) when pid != nil do
System.cmd("kill", ["-9", to_string(pid)])
:normal
end
def call(msg, timeout \\ 10000) do
node = GenServer.call(__MODULE__, :addr)
send({nil, node}, msg)
receive do
{:myhtml_worker, res} -> {:ok, res}
after
timeout -> {:error, :timeout}
end
end
end
defmodule Mix.Tasks.FastHtml.Bench do
@moduledoc "Benchmarking task."
use Mix.Task
@input_dir "lib/mix/tasks/fast_html/html"
def run(_) do
Application.ensure_all_started(:fast_html)
inputs =
Enum.reduce(File.ls!(@input_dir), %{}, fn input_name, acc ->
input = File.read!(Path.join(@input_dir, input_name))
Map.put(acc, input_name, input)
end)
Benchee.run(
%{
"Decoding" => fn input -> :fast_html.decode(input) end
},
inputs: inputs,
save: [path: "fast_html.bench"],
load: "fast_html.bench"
)
end
end
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
<p>Sanitize is a whitelist-based HTML sanitizer. Given a list of acceptable
elements and attributes, Sanitize will remove all unacceptable HTML from a
string.</p>
<p>Using a simple configuration syntax, you can tell Sanitize to allow certain
elements, certain attributes within those elements, and even certain URL
protocols within attributes that contain URLs. Any HTML elements or attributes
that you don't explicitly allow will be removed.</p>
<p>Sanitize is based on <a href="https://github.com/google/gumbo-parser">Google's Gumbo HTML5 parser</a>, which parses HTML
exactly the same way modern browsers do. As long as your whitelist config only
allows safe markup, even the most malformed or malicious input will be
transformed into safe output.</p>
defmodule Myhtmlex.Safe do
@moduledoc false
use Application
app = Mix.Project.config()[:app]
defp random_sname, do: :crypto.strong_rand_bytes(4) |> Base.encode16(case: :lower)
defp sname, do: :"myhtmlex_#{random_sname()}"
def start(_type, _args) do
import Supervisor.Spec
unless Node.alive?() do
Nodex.Distributed.up()
end
myhtml_worker = Path.join(:code.priv_dir(unquote(app)), "myhtml_worker")
children = [
worker(Nodex.Cnode, [
%{exec_path: myhtml_worker, sname: sname()},
[name: Myhtmlex.Safe.Cnode]
])
]
Supervisor.start_link(children, strategy: :one_for_one, name: Myhtmlex.Safe.Supervisor)
end
def decode(bin) do
decode(bin, [])
end
def decode(bin, flags) do
{:ok, res} = Nodex.Cnode.call(Myhtmlex.Safe.Cnode, {:decode, bin, flags})
res
end
end
......@@ -49,12 +49,9 @@ defmodule FastHTML.Mixfile do
def application do
[
extra_applications: [:logger],
mod: {Myhtmlex.Safe, []},
mod: {FastHtml.Application, []},
# used to detect conflicts with other applications named processes
registered: [Myhtmlex.Safe.Cnode, Myhtmlex.Safe.Supervisor],
env: [
mode: Myhtmlex.Safe
]
registered: [FastHtml.Cnode, FastHtml.Supervisor]
]
end
......@@ -63,11 +60,7 @@ defmodule FastHTML.Mixfile do
# documentation helpers
{:ex_doc, ">= 0.0.0", only: :dev},
# benchmarking helpers
{:benchfella, "~> 0.3.0", only: :dev},
# cnode helpers
{:nodex,
git: "https://git.pleroma.social/pleroma/nodex",
ref: "cb6730f943cfc6aad674c92161be23a8411f15d1"}
{:benchee, "~> 1.0", only: :dev}
]
end
......
%{
"benchfella": {:hex, :benchfella, "0.3.5", "b2122c234117b3f91ed7b43b6e915e19e1ab216971154acd0a80ce0e9b8c05f5", [:mix], [], "hexpm"},
"cnodex": {:git, "https://github.com/Overbryd/cnodex.git", "c1c4cde21295db07f87bb74006ab5f7222720db9", []},
"benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm"},
"deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm"},
"earmark": {:hex, :earmark, "1.2.3", "206eb2e2ac1a794aa5256f3982de7a76bf4579ff91cb28d0e17ea2c9491e46a4", [:mix], [], "hexpm"},
"ex_doc": {:hex, :ex_doc, "0.16.3", "cd2a4cfe5d26e37502d3ec776702c72efa1adfa24ed9ce723bb565f4c30bd31a", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}], "hexpm"},
"myhtml": {:git, "https://github.com/lexborisov/myhtml.git", "fe2cf577570666d058a2b7167c26d3384a758e19", [branch: "master"]},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment