...
 
Commits (2)
image: elixir:1.7 # Needed because open_port's typespec is incorrect
# in earlier versions, see
# https://github.com/erlang/otp/commit/b9051f0f94c3d9f56f9d8d9b9c316446db7fd7a9
# and there are no upstream Elixir docker images with OTP 23
image: erlang:23-alpine
variables: variables:
MIX_ENV: test MIX_ENV: test
...@@ -15,6 +19,7 @@ stages: ...@@ -15,6 +19,7 @@ stages:
- publish - publish
before_script: before_script:
- apk add elixir build-base cmake
- mix local.hex --force - mix local.hex --force
- mix local.rebar --force - mix local.rebar --force
- mix deps.get --only test - mix deps.get --only test
...@@ -30,3 +35,8 @@ unit-testing: ...@@ -30,3 +35,8 @@ unit-testing:
coverage: '/(\d+\.\d+\%) \| Total/' coverage: '/(\d+\.\d+\%) \| Total/'
script: script:
- mix test --trace --preload-modules --cover - mix test --trace --preload-modules --cover
dialyzer:
stage: test
script:
- mix dialyzer
[submodule "c_src/myhtml"] [submodule "c_src/lexbor"]
path = c_src/myhtml path = c_src/lexbor
url = https://github.com/lexborisov/myhtml.git url = https://github.com/lexbor/lexbor
...@@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file. ...@@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [2.0.0] - 2020-08-01
### Changed
- **Breaking:** CMake is now required at compile-time due to it being lexbor's build system
- **Breaking:** namespaces are no longer automatically appended. i.e "<svg> </svg>" will be `{"svg", [], []}` instead of `{"svg:svg", [], []}`
- **Breaking:** when using `:nil_self_closing` flag, only valid [void elements](https://html.spec.whatwg.org/#void-elements) will have `nil` in children
- Now deprecated myhtml was switched to [lexbor](https://github.com/lexbor/lexbor)
- The worker process now communicates with the node via stdio, instead of TCP, which was known to cause issues
on BSD systems
### Added
- `FastHtml.Pool` for fast_html workers. There is a default pool of `System.schedulers_online/0` workers, but a custom pool can be started if desired, or it can be disabled altogether. See `FastHtml.Pool` module documentation for more info
## [1.0.3] - 2020-02-10 ## [1.0.3] - 2020-02-10
### Fixed ### Fixed
- C-Node not respawning after being killed. - C-Node not respawning after being killed.
......
MIX = mix MIX = mix
CMAKE = cmake
CNODE_CFLAGS = -g -O2 -std=c99 -pedantic -Wcomment -Wextra -Wno-old-style-declaration -Wall CNODE_CFLAGS = -g -O2 -std=c99 -pedantic -Wcomment -Wextra -Wno-old-style-declaration -Wall
# ignore unused parameter warnings # ignore unused parameter warnings
...@@ -11,9 +12,9 @@ CNODE_CFLAGS += -I$(ERLANG_PATH)/include ...@@ -11,9 +12,9 @@ CNODE_CFLAGS += -I$(ERLANG_PATH)/include
# expecting myhtml as a submodule in c_src/ # expecting myhtml as a submodule in c_src/
# that way we can pin a version and package the whole thing in hex # that way we can pin a version and package the whole thing in hex
# hex does not allow for non-app related dependencies. # hex does not allow for non-app related dependencies.
MYHTML_PATH = c_src/myhtml LXB_PATH = c_src/lexbor
MYHTML_STATIC = $(MYHTML_PATH)/lib/libmyhtml_static.a LXB_STATIC = $(LXB_PATH)/liblexbor_static.a
CNODE_CFLAGS += -I$(MYHTML_PATH)/include CNODE_CFLAGS += -I$(LXB_PATH)/source
# avoid undefined reference errors to phtread_mutex_trylock # avoid undefined reference errors to phtread_mutex_trylock
CNODE_CFLAGS += -lpthread CNODE_CFLAGS += -lpthread
...@@ -34,17 +35,19 @@ CNODE_LDFLAGS += -lei -pthread ...@@ -34,17 +35,19 @@ CNODE_LDFLAGS += -lei -pthread
.PHONY: all .PHONY: all
all: priv/myhtml_worker all: priv/fasthtml_worker
$(MYHTML_STATIC): $(MYHTML_PATH) $(LXB_STATIC): $(LXB_PATH)
$(MAKE) -C $(MYHTML_PATH) library MyCORE_BUILD_WITHOUT_THREADS=YES # Sadly, build components separately seems to sporadically fail
cd $(LXB_PATH); cmake -DLEXBOR_BUILD_SEPARATELY=OFF -DLEXBOR_BUILD_SHARED=OFF
$(MAKE) -C $(LXB_PATH)
priv/myhtml_worker: c_src/myhtml_worker.c $(MYHTML_STATIC) priv/fasthtml_worker: c_src/fasthtml_worker.c $(LXB_STATIC)
$(CC) -o $@ $< $(MYHTML_STATIC) $(CNODE_CFLAGS) $(CNODE_LDFLAGS) $(CC) -o $@ $< $(LXB_STATIC) $(CNODE_CFLAGS) $(CNODE_LDFLAGS)
clean: clean-myhtml clean: clean-myhtml
$(RM) -r priv/myhtmlex* $(RM) -r priv/myhtmlex*
$(RM) priv/myhtml_worker $(RM) priv/fasthtml_worker
$(RM) myhtmlex-*.tar $(RM) myhtmlex-*.tar
$(RM) -r package-test $(RM) -r package-test
......
...@@ -3,31 +3,25 @@ ...@@ -3,31 +3,25 @@
A C Node wrapping lexborisov's [myhtml](https://github.com/lexborisov/myhtml). A C Node wrapping lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
Primarily used with [FastSanitize](https://git.pleroma.social/pleroma/fast_sanitize). Primarily used with [FastSanitize](https://git.pleroma.social/pleroma/fast_sanitize).
* Available as a hex package: `{:fast_html, "~> 1.0"}` * Available as a hex package: `{:fast_html, "~> 2.0"}`
* [Documentation](https://hexdocs.pm/fast_html/fast_html.html) * [Documentation](https://hexdocs.pm/fast_html/fast_html.html)
## Benchmarks ## Benchmarks
The following table provides median times it takes to decode a string to a tree for html parsers that can be used from Elixir. Benchmarks were conducted on a machine with `Intel Core i7-3520M @ 2.90GHz` CPU and 16GB of RAM. The `mix fast_html.bench` task can be used for running the benchmark by yourself. The following table provides median times it takes to decode a string to a tree for html parsers that can be used from Elixir. Benchmarks were conducted on a machine with an `AMD Ryzen 9 3950X (32) @ 3.500GHz` CPU and 32GB of RAM. The `mix fast_html.bench` task can be used for running the benchmark by yourself.
| File/Parser | fast_html (C-Node) | mochiweb_html (erlang) | html5ever (Rust NIF) | Myhtmlex (NIF)¹ | | File/Parser | fast_html (Port) | mochiweb_html (erlang) | html5ever (Rust NIF) | Myhtmlex (NIF)¹ |
|----------------------|--------------------|------------------------|----------------------|----------------| |----------------------|--------------------|------------------------|----------------------|----------------|
| document-large.html | 178.13 ms | 3471.70 ms | 799.20 ms | 402.64 ms | | document-large.html (6.9M) | 125.12 ms | 1778.34 ms | 395.21 ms | 327.17 ms |
| document-medium.html | 2.85 ms | 26.58 ms | 9.06 ms | 3.72 ms | | document-medium.html (85K) | 1.93 ms | 12.10 ms | 4.74 ms | 3.82 ms |
| document-small.html | 1.08 ms | 5.45 ms | 2.10 ms | 1.24 ms | | document-small.html (25K)| 0.50 ms | 2.76 ms | 1.72 ms | 1.19 ms |
| fragment-large.html | 1.50 ms | 10.91 ms | 6.03 ms | 1.91 ms | | fragment-large.html (33K)| 0.93 ms | 4.78 ms | 2.34 ms | 2.15 ms |
| fragment-small.html² | 434.64 μs | 83.02 μs | 57.97 μs | 311.39 μs | | fragment-small.html² (757B)| 44.60 μs | 42.13 μs | 43.58 μs | 289.71 μs |
1. Myhtmlex has a C-Node mode as well, but it wasn't benchmarked here because it segfaults on `document-large.html` Full benchmark output can be seen in [this snippet](https://git.pleroma.social/pleroma/elixir-libraries/fast_html/snippets/3128)
2. The slowdown on `fragment-small.html` is due to C-Node overhead. Unlike html5ever and Myhtmlex in NIF mode, `fast_html` has the parser process isolated and communicates with it over the network, so even if a fatal crash in the parser happens, it won't bring down the entire VM.
## Note about running with [Swarm](https://github.com/bitwalker/swarm) 1. Myhtmlex has a C-Node mode, but it wasn't benchmarked here because it segfaults on `document-large.html`
2. The slowdown on `fragment-small.html` is due to Port overhead. Unlike html5ever and Myhtmlex in NIF mode, `fast_html` has the parser process isolated and communicates with it over stdio, so even if a fatal crash in the parser happens, it won't bring down the entire VM.
Since the myhtml worker runs as a separate node, Swarm will try to sync with it. Of course it will fail since it's not a real Erlang node. To prevent it from doing that, you can add the following to your configuration:
```elixir
config :swarm, node_blacklist: [~r/myhtml_.*$/]
```
## Contribution / Bug Reports ## Contribution / Bug Reports
......
Subproject commit 7cfc4b48aa8ffba251c249eb343aad94d16f9a59
Subproject commit fe2cf577570666d058a2b7167c26d3384a758e19
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#define GROW_BY 30 #define GROW_BY 30
typedef struct { typedef struct {
myhtml_tree_node_t **data; lxb_dom_node_t **data;
size_t used; size_t used;
size_t size; size_t size;
} tstack; } tstack;
void tstack_init(tstack *stack, size_t initial_size) { void tstack_init(tstack *stack, size_t initial_size) {
stack->data = (myhtml_tree_node_t **) malloc(initial_size * sizeof(myhtml_tree_node_t *)); stack->data = (lxb_dom_node_t **) malloc(initial_size * sizeof(lxb_dom_node_t *));
stack->used = 0; stack->used = 0;
stack->size = initial_size; stack->size = initial_size;
} }
...@@ -20,18 +20,18 @@ void tstack_free(tstack *stack) { ...@@ -20,18 +20,18 @@ void tstack_free(tstack *stack) {
} }
void tstack_resize(tstack *stack, size_t new_size) { void tstack_resize(tstack *stack, size_t new_size) {
stack->data = (myhtml_tree_node_t **) realloc(stack->data, new_size * sizeof(myhtml_tree_node_t *)); stack->data = (lxb_dom_node_t **) realloc(stack->data, new_size * sizeof(lxb_dom_node_t *));
stack->size = new_size; stack->size = new_size;
} }
void tstack_push(tstack *stack, myhtml_tree_node_t * element) { void tstack_push(tstack *stack, lxb_dom_node_t * element) {
if(stack->used == stack->size) { if(stack->used == stack->size) {
tstack_resize(stack, stack->size + GROW_BY); tstack_resize(stack, stack->size + GROW_BY);
} }
stack->data[stack->used++] = element; stack->data[stack->used++] = element;
} }
myhtml_tree_node_t* tstack_pop(tstack *stack) { lxb_dom_node_t * tstack_pop(tstack *stack) {
return stack->data[--(stack->used)]; return stack->data[--(stack->used)];
} }
......
...@@ -19,13 +19,14 @@ defmodule :fast_html do ...@@ -19,13 +19,14 @@ defmodule :fast_html do
Returns a tree representation from the given html string. Returns a tree representation from the given html string.
`opts` is a keyword list of options, the options available: `opts` is a keyword list of options, the options available:
* `timeout` - Call timeout * `timeout` - Call timeout. If pooling is used and the worker doesn't return
* `format` - Format flags for the tree the result in time, the worker will be killed with a warning.
* `format` - Format flags for the tree.
The following format flags are available: The following format flags are available:
* `:html_atoms` uses atoms for known html tags (faster), binaries for everything else. * `:html_atoms` uses atoms for known html tags (faster), binaries for everything else.
* `:nil_self_closing` uses `nil` to designate self-closing tags and void elements. * `:nil_self_closing` uses `nil` to designate void elements.
For example `<br>` is then being represented like `{"br", [], nil}`. For example `<br>` is then being represented like `{"br", [], nil}`.
See http://w3c.github.io/html-reference/syntax.html#void-elements for a full list of void elements. See http://w3c.github.io/html-reference/syntax.html#void-elements for a full list of void elements.
* `:comment_tuple3` uses 3-tuple elements for comments, instead of the default 2-tuple element. * `:comment_tuple3` uses 3-tuple elements for comments, instead of the default 2-tuple element.
...@@ -61,7 +62,7 @@ defmodule :fast_html do ...@@ -61,7 +62,7 @@ defmodule :fast_html do
iex> :fast_html.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3]) iex> :fast_html.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3])
{:ok, [{:html, [], {:ok, [{:html, [],
[{:head, [], []}, [{:head, [], []},
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], nil}]}]}]} {:body, [], [{:comment, [], " a comment "}, {"unknown", [], []}]}]}]}
""" """
@spec decode(String.t(), format: [format_flag()]) :: @spec decode(String.t(), format: [format_flag()]) ::
...@@ -69,7 +70,8 @@ defmodule :fast_html do ...@@ -69,7 +70,8 @@ defmodule :fast_html do
def decode(bin, opts \\ []) do def decode(bin, opts \\ []) do
flags = Keyword.get(opts, :format, []) flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000) timeout = Keyword.get(opts, :timeout, 10000)
FastHtml.Cnode.call({:decode, bin, flags}, timeout)
find_and_use_port({:decode, bin, flags}, timeout, opts)
end end
@doc """ @doc """
...@@ -77,22 +79,69 @@ defmodule :fast_html do ...@@ -77,22 +79,69 @@ defmodule :fast_html do
`opts` is a keyword list of options, the options available are the same as in `decode/2` with addition of: `opts` is a keyword list of options, the options available are the same as in `decode/2` with addition of:
* `context` - Name of the context element, defaults to `div` * `context` - Name of the context element, defaults to `div`
* `format` - Format flags for the tree
Example: Example:
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl") iex> :fast_html.decode_fragment("rin is the <i>best</i> girl")
{:ok, [{"html", [], ["rin is the ", {"i", [], ["best"]}, " girl"]}]} {:ok, ["rin is the ", {"i", [], ["best"]}, " girl"]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "title") iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "title")
{:ok, [{"html", [], ["rin is the <i>best</i> girl"]}]} {:ok, ["rin is the <i>best</i> girl"]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "objective_truth") iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "objective_truth")
{:error, :unknown_context_tag} {:error, :unknown_context_tag}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", format: [:html_atoms]) iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", format: [:html_atoms])
{:ok, [{:html, [], ["rin is the ", {:i, [], ["best"]}, " girl"]}]} {:ok, ["rin is the ", {:i, [], ["best"]}, " girl"]}
""" """
def decode_fragment(bin, opts \\ []) do def decode_fragment(bin, opts \\ []) do
flags = Keyword.get(opts, :format, []) flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000) timeout = Keyword.get(opts, :timeout, 10000)
context = Keyword.get(opts, :context, "div") context = Keyword.get(opts, :context, "div")
FastHtml.Cnode.call({:decode_fragment, bin, flags, context}, timeout)
find_and_use_port({:decode_fragment, bin, flags, context}, timeout, opts)
end
@default_pool FastHtml.Pool
defp find_and_use_port(term_command, timeout, opts) do
command = :erlang.term_to_binary(term_command)
pool =
cond do
pool = Keyword.get(opts, :pool) -> pool
Application.get_env(:fast_html, :pool, enabled: true)[:enabled] -> @default_pool
true -> nil
end
execute_command_fun = fn port ->
send(port, {self(), {:command, command}})
receive do
{^port, {:data, res}} -> {:ok, res}
after
timeout ->
{:error, :timeout}
end
end
result =
if pool do
FastHtml.Pool.get_port(pool, execute_command_fun)
else
port = open_port()
result = execute_command_fun.(port)
Port.close(port)
result
end
case result do
{:ok, result} -> :erlang.binary_to_term(result)
{:error, _} = e -> e
end
end
def open_port do
Port.open({:spawn_executable, Path.join([:code.priv_dir(:fast_html), "fasthtml_worker"])}, [
:binary,
{:packet, 4},
:use_stdio,
:exit_status
])
end end
end end
...@@ -3,54 +3,13 @@ defmodule FastHtml.Application do ...@@ -3,54 +3,13 @@ defmodule FastHtml.Application do
use Application use Application
def random_sname, do: :crypto.strong_rand_bytes(4) |> Base.encode16(case: :lower)
def start(_type, _args) do def start(_type, _args) do
case maybe_setup_node() do default_pool_config = Application.get_env(:fast_html, :pool, enabled: true)
{:error, message} -> raise message children = if default_pool_config[:enabled], do: [FastHtml.Pool], else: []
_ -> :ok
end
Supervisor.start_link([{FastHtml.Cnode, Application.get_env(:fast_html, :cnode, [])}], Supervisor.start_link(children,
strategy: :one_for_one, strategy: :one_for_one,
name: FastHtml.Supervisor name: FastHtml.Supervisor
) )
end end
defp maybe_setup_node() do
with {_, false} <- {:alive, Node.alive?()},
{:ok, epmd_path} <- find_epmd(),
:ok <- start_epmd(epmd_path),
{:ok, _pid} = pid_tuple <- start_node() do
pid_tuple
else
{:alive, _} ->
:ok
{:error, _} = e ->
e
end
end
defp find_epmd() do
case System.find_executable("epmd") do
nil ->
{:error,
"Could not find epmd executable. Please ensure the location it's in is present in your PATH or start epmd manually beforehand"}
executable ->
{:ok, executable}
end
end
defp start_epmd(path) do
case System.cmd(path, ["-daemon"]) do
{_result, 0} -> :ok
{_result, exit_code} -> {:error, "Could not start epmd, exit code: #{exit_code}"}
end
end
defp start_node() do
Node.start(:"master_#{random_sname()}@127.0.0.1")
end
end end
defmodule FastHtml.Cnode do
@moduledoc """
Manages myhtml c-node.
## Configuration
```elixir
config :fast_html, :cnode,
sname: "myhtml_worker", # Defaults to myhtml_<random bytes>
spawn_inactive_timeout: 5000 # Defaults to 10000
```
"""
@spawn_inactive_timeout 10000
application = Mix.Project.config()[:app]
use GenServer
require Logger
@doc false
def start_link(args) do
GenServer.start_link(__MODULE__, args, name: __MODULE__)
end
@doc false
def init(args) do
exec_path = Path.join(:code.priv_dir(unquote(application)), "myhtml_worker")
sname = Keyword.get_lazy(args, :sname, &default_sname/0)
hostname = master_hostname()
addr = :"#{sname}@#{hostname}"
spawn_inactive_timeout = Keyword.get(args, :spawn_inactive_timeout, @spawn_inactive_timeout)
state = %{
exec_path: exec_path,
sname: sname,
addr: addr,
hostname: hostname,
spawn_inactive_timeout: spawn_inactive_timeout
}
connect_or_spawn_cnode(state)
end
defp default_sname, do: "myhtml_#{FastHtml.Application.random_sname()}"
defp master_sname, do: Node.self() |> to_string |> String.split("@") |> List.first()
defp master_hostname, do: Node.self() |> to_string |> String.split("@") |> List.last()
defp connect_or_spawn_cnode(state) do
case connect_cnode(state) do
{:stop, _} -> spawn_cnode(state)
{:ok, state} -> state
end
end
defp connect_cnode(%{addr: addr} = state) do
if Node.connect(addr) do
Logger.debug("connected to #{addr}")
Node.monitor(addr, true)
{:ok, state}
else
Logger.debug("connecting to #{addr} failed")
{:stop, :cnode_connection_fail}
end
end
defp spawn_cnode(%{exec_path: exec_path, sname: sname, hostname: hostname} = state) do
Logger.debug("Spawning #{sname}@#{hostname}")
cookie = :erlang.get_cookie()
port =
Port.open({:spawn_executable, exec_path}, [
:binary,
:exit_status,
:stderr_to_stdout,
line: 4096,
args: [sname, hostname, cookie, master_sname()]
])
pid = Keyword.get(Port.info(port), :os_pid)
state = Map.put(state, :pid, pid)
await_cnode_ready(port, state)
end
defp await_cnode_ready(
port,
%{spawn_inactive_timeout: timeout, addr: addr} = state
) do
ready_line = to_string(addr) <> " ready"
receive do
{^port, {:data, {:eol, ^ready_line}}} ->
connect_cnode(state)
{^port, {:data, {:eol, line}}} ->
Logger.debug("c-node is saying: #{line}")
await_cnode_ready(port, state)
{^port, {:exit_status, exit_status}} ->
Logger.debug("unexpected c-node exit: #{exit_status}")
{:stop, :cnode_unexpected_exit}
message ->
Logger.warn("unhandled message while waiting for cnode to be ready:\n#{inspect(message)}")
await_cnode_ready(port, state)
after
timeout ->
{:stop, :spawn_inactive_timeout}
end
end
@doc false
def handle_info({:nodedown, _cnode}, state) do
{:stop, :nodedown, state}
end
@doc false
def handle_info(msg, state) do
Logger.warn("unhandled handle_info: #{inspect(msg)}")
{:noreply, state}
end
@doc false
def handle_call(:addr, _from, %{addr: addr} = state) do
{:reply, addr, state}
end
@doc false
def terminate(_reason, %{pid: pid}) when pid != nil do
System.cmd("kill", ["-9", to_string(pid)])
:normal
end
@doc "Call into myhtml cnode"
def call(msg, timeout \\ 10000) do
node = GenServer.call(__MODULE__, :addr)
send({nil, node}, msg)
receive do
{:myhtml_worker, res} -> res
after
timeout -> {:error, :timeout}
end
end
end
defmodule FastHtml.Pool do
@behaviour NimblePool
@moduledoc """
"""
require Logger
@doc false
def child_spec(opts) do
%{
id: __MODULE__,
start: {__MODULE__, :start_link, [opts]},
type: :worker,
restart: :permanent
}
end
@doc """
Starts the port pool.
### Options
- `:size` - Number of ports in the pool. Defaults to `System.schedulers_online/0` if not set.
- `:name` - Registered name of the pool. Defaults to `#{__MODULE__}` if not set, set to `false` to not register the process.
"""
@type option :: {:size, pos_integer()} | {:name, atom()}
@spec start_link([option()]) :: term()
def start_link(options) do
{size, options} = Keyword.pop(options, :size, System.schedulers_online())
NimblePool.start_link(worker: {__MODULE__, options}, pool_size: size)
end
@type pool :: atom() | pid()
@type result :: {:ok, term()} | {:error, atom()}
@spec get_port(pool(), (port() -> result())) :: result()
def get_port(pool, fun) do
NimblePool.checkout!(pool, :checkout, fn _from, port ->
result = fun.(port)
client_state =
case result do
{:ok, _} ->
:ok
{:error, reason} ->
reason
end
send(port, {self(), {:connect, GenServer.whereis(pool)}})
client_state =
receive do
{^port, :connected} -> client_state
{:EXIT, ^port, reason} -> {:EXIT, reason}
end
{result, client_state}
end)
end
@impl NimblePool
@doc false
def init_pool(state) do
{name, options} =
case Keyword.pop(state, :name) do
{nil, state} -> {__MODULE__, state}
{name, state} when is_atom(name) -> {name, state}
{_, state} -> {nil, state}
end
if name, do: Process.register(self(), name)
{:ok, options}
end
@impl NimblePool
@doc false
def init_worker(pool_state) do
port = :fast_html.open_port()
{:ok, port, pool_state}
end
@impl NimblePool
@doc false
def terminate_worker({:EXIT, reason}, port, pool_state) do
Logger.warn(fn ->
"[#{__MODULE__}]: Port #{port} unexpectedly exited with reason: #{reason}"
end)
{:ok, pool_state}
end
@impl NimblePool
@doc false
def terminate_worker(_reason, port, pool_state) do
Port.close(port)
{:ok, pool_state}
end
@impl NimblePool
@doc false
def handle_checkout(:checkout, {client_pid, _}, port) do
send(port, {self(), {:connect, client_pid}})
receive do
{^port, :connected} -> {:ok, port, port}
{:EXIT, ^port, reason} -> {:remove, {:EXIT, reason}}
end
end
@impl NimblePool
@doc false
def handle_checkin(:timeout, _, _), do: {:remove, :timeout}
@impl NimblePool
@doc false
def handle_checkin(_, _, port), do: {:ok, port}
@impl NimblePool
@doc false
def handle_info({:EXIT, port, reason}, port), do: {:remove, {:EXIT, reason}}
@impl NimblePool
@doc false
def handle_info({:EXIT, _, _}, port), do: {:ok, port}
# Port sent data to the pool, this happens when the timeout was reached
# and the port got disconnected from the client, but not yet killed by the pool.
# Just discard the message.
@impl NimblePool
@doc false
def handle_info({_sending_port, {:data, _}}, port), do: {:ok, port}
end
...@@ -4,11 +4,12 @@ defmodule FastHtml.Mixfile do ...@@ -4,11 +4,12 @@ defmodule FastHtml.Mixfile do
def project do def project do
[ [
app: :fast_html, app: :fast_html,
version: "1.0.3", version: "2.0.0",
elixir: "~> 1.5", elixir: "~> 1.5",
deps: deps(), deps: deps(),
package: package(), package: package(),
compilers: [:fast_html_cnode_make] ++ Mix.compilers(), compilers: [:elixir_make] ++ Mix.compilers(),
make_env: make_env(),
build_embedded: Mix.env() == :prod, build_embedded: Mix.env() == :prod,
start_permanent: Mix.env() == :prod, start_permanent: Mix.env() == :prod,
name: "FastHtml", name: "FastHtml",
...@@ -68,9 +69,13 @@ defmodule FastHtml.Mixfile do ...@@ -68,9 +69,13 @@ defmodule FastHtml.Mixfile do
{:ex_doc, "~> 0.19", only: :dev}, {:ex_doc, "~> 0.19", only: :dev},
# benchmarking helpers # benchmarking helpers
{:benchee, "~> 1.0", only: :bench, optional: true}, {:benchee, "~> 1.0", only: :bench, optional: true},
{:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false},
{:myhtmlex, "~> 0.2.0", only: :bench, runtime: false, optional: true}, {:myhtmlex, "~> 0.2.0", only: :bench, runtime: false, optional: true},
{:mochiweb, "~> 2.18", only: :bench, optional: true}, {:mochiweb, "~> 2.18", only: :bench, optional: true},
{:html5ever, "~> 0.7.0", only: :bench, optional: true} {:html5ever,
git: "https://github.com/rusterlium/html5ever_elixir.git", only: :bench, optional: true},
{:nimble_pool, "~> 0.1"},
{:elixir_make, "~> 0.4", runtime: false}
] ]
end end
...@@ -80,24 +85,6 @@ defmodule FastHtml.Mixfile do ...@@ -80,24 +85,6 @@ defmodule FastHtml.Mixfile do
extras: ["README.md"] extras: ["README.md"]
] ]
end end
end
defmodule Mix.Tasks.Compile.FastHtmlCnodeMake do
@artifacts [
"priv/myhtml_worker"
]
def find_make do
_make_cmd =
System.get_env("MAKE") ||
case :os.type() do
{:unix, :freebsd} -> "gmake"
{:unix, :openbsd} -> "gmake"
{:unix, :netbsd} -> "gmake"
{:unix, :dragonfly} -> "gmake"
_ -> "make"
end
end
defp otp_version do defp otp_version do
:erlang.system_info(:otp_release) :erlang.system_info(:otp_release)
...@@ -109,49 +96,14 @@ defmodule Mix.Tasks.Compile.FastHtmlCnodeMake do ...@@ -109,49 +96,14 @@ defmodule Mix.Tasks.Compile.FastHtmlCnodeMake do
otp_version() >= 22 otp_version() >= 22
end end
def run(_) do defp make_env do
make_cmd = find_make() %{
"OTP22_DEF" =>
exit_code = if otp_22_or_newer?() do
if match?({:win32, _}, :os.type()) do "YES"
IO.warn("Windows is not yet a target.") else
1 "NO"
else end
{result, exit_code} = }
System.cmd(
make_cmd,
@artifacts,
stderr_to_stdout: true,
env: [
{"MIX_ENV", to_string(Mix.env())},
{"OTP22_DEF", (otp_22_or_newer?() && "YES") || "NO"}
]
)
IO.binwrite(result)
exit_code
end
if exit_code == 0 do
:ok
else
{:error,
[
%Mix.Task.Compiler.Diagnostic{
compiler_name: "FastHtml Cnode",
message: "Make exited with #{exit_code}",
severity: :error,
file: nil,
position: nil
}
]}
end
end
def clean() do
make_cmd = find_make()
{result, _error_code} = System.cmd(make_cmd, ["clean"], stderr_to_stdout: true)
Mix.shell().info(result)
:ok
end end
end end
%{ %{
"benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm"}, "benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm", "3ad58ae787e9c7c94dd7ceda3b587ec2c64604563e049b2a0e8baafae832addb"},
"deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm"}, "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"},
"earmark": {:hex, :earmark, "1.4.2", "3aa0bd23bc4c61cf2f1e5d752d1bb470560a6f8539974f767a38923bb20e1d7f", [:mix], [], "hexpm"}, "dialyxir": {:hex, :dialyxir, "1.0.0", "6a1fa629f7881a9f5aaf3a78f094b2a51a0357c843871b8bc98824e7342d00a5", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "aeb06588145fac14ca08d8061a142d52753dbc2cf7f0d00fc1013f53f8654654"},
"ex_doc": {:hex, :ex_doc, "0.21.2", "caca5bc28ed7b3bdc0b662f8afe2bee1eedb5c3cf7b322feeeb7c6ebbde089d6", [:mix], [{:earmark, "~> 1.3.3 or ~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"}, "earmark": {:hex, :earmark, "1.4.2", "3aa0bd23bc4c61cf2f1e5d752d1bb470560a6f8539974f767a38923bb20e1d7f", [:mix], [], "hexpm", "5e8806285d8a3a8999bd38e4a73c58d28534c856bc38c44818e5ba85bbda16fb"},
"html5ever": {:hex, :html5ever, "0.7.0", "9f63ec1c783b2dc9f326840fcc993c01e926dbdef4e51ba1bbe5355993c258b4", [:mix], [{:rustler, "~> 0.18.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm"}, "elixir_make": {:hex, :elixir_make, "0.6.0", "38349f3e29aff4864352084fc736fa7fa0f2995a819a737554f7ebd28b85aaab", [:mix], [], "hexpm", "d522695b93b7f0b4c0fcb2dfe73a6b905b1c301226a5a55cb42e5b14d509e050"},
"makeup": {:hex, :makeup, "1.0.0", "671df94cf5a594b739ce03b0d0316aa64312cee2574b6a44becb83cd90fb05dc", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
"makeup_elixir": {:hex, :makeup_elixir, "0.14.0", "cf8b7c66ad1cff4c14679698d532f0b5d45a3968ffbcbfd590339cb57742f1ae", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"}, "ex_doc": {:hex, :ex_doc, "0.21.2", "caca5bc28ed7b3bdc0b662f8afe2bee1eedb5c3cf7b322feeeb7c6ebbde089d6", [:mix], [{:earmark, "~> 1.3.3 or ~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "f1155337ae17ff7a1255217b4c1ceefcd1860b7ceb1a1874031e7a861b052e39"},
"mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm"}, "html5ever": {:git, "https://github.com/rusterlium/html5ever_elixir.git", "f6743865c353aaebaec1959ae4025596f8344587", []},
"makeup": {:hex, :makeup, "1.0.0", "671df94cf5a594b739ce03b0d0316aa64312cee2574b6a44becb83cd90fb05dc", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "a10c6eb62cca416019663129699769f0c2ccf39428b3bb3c0cb38c718a0c186d"},
"makeup_elixir": {:hex, :makeup_elixir, "0.14.0", "cf8b7c66ad1cff4c14679698d532f0b5d45a3968ffbcbfd590339cb57742f1ae", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "d4b316c7222a85bbaa2fd7c6e90e37e953257ad196dc229505137c5e505e9eff"},
"mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm", "b93e2b1e564bdbadfecc297277f9e6d0902da645b417d6c9210f6038ac63489a"},
"myhtml": {:git, "https://github.com/lexborisov/myhtml.git", "fe2cf577570666d058a2b7167c26d3384a758e19", [branch: "master"]}, "myhtml": {:git, "https://github.com/lexborisov/myhtml.git", "fe2cf577570666d058a2b7167c26d3384a758e19", [branch: "master"]},
"myhtmlex": {:hex, :myhtmlex, "0.2.1", "d6f3eb1826f7cdaa0225a996569da0930d1a334405510845c905ae59295ab226", [:make, :mix], [{:nodex, "~> 0.1.1", [hex: :nodex, repo: "hexpm", optional: false]}], "hexpm"}, "myhtmlex": {:hex, :myhtmlex, "0.2.1", "d6f3eb1826f7cdaa0225a996569da0930d1a334405510845c905ae59295ab226", [:make, :mix], [{:nodex, "~> 0.1.1", [hex: :nodex, repo: "hexpm", optional: false]}], "hexpm", "1c22e79731cc492365279d111fcaf7f4f17ca845d2b9660d2285b91c56cbd1c2"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.1", "c90796ecee0289dbb5ad16d3ad06f957b0cd1199769641c961cfe0b97db190e0", [:mix], [], "hexpm"}, "nimble_parsec": {:hex, :nimble_parsec, "0.5.1", "c90796ecee0289dbb5ad16d3ad06f957b0cd1199769641c961cfe0b97db190e0", [:mix], [], "hexpm", "00e3ebdc821fb3a36957320d49e8f4bfa310d73ea31c90e5f925dc75e030da8f"},
"nodex": {:hex, :nodex, "0.1.1", "ed2f7bbe19ea62a43ad4b7ad332eb3f9ca12c64a35a5802a0eb545b93ebe32af", [:mix], [], "hexpm"}, "nimble_pool": {:hex, :nimble_pool, "0.1.0", "ffa9d5be27eee2b00b0c634eb649aa27f97b39186fec3c493716c2a33e784ec6", [:mix], [], "hexpm", "343a1eaa620ddcf3430a83f39f2af499fe2370390d4f785cd475b4df5acaf3f9"},
"rustler": {:hex, :rustler, "0.18.0", "db4bd0c613d83a1badc31be90ddada6f9821de29e4afd15c53a5da61882e4f2d", [:mix], [], "hexpm"}, "nodex": {:hex, :nodex, "0.1.1", "ed2f7bbe19ea62a43ad4b7ad332eb3f9ca12c64a35a5802a0eb545b93ebe32af", [:mix], [], "hexpm", "0e6804b0138a71e33e99a2e6423019db01de34fe341f765220bce38830779548"},
"rustler": {:hex, :rustler, "0.21.1", "5299980be32da997c54382e945bacaa015ed97a60745e1e639beaf6a7b278c65", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6ee1651e10645b2b2f3bb70502bf180341aa058709177e9bc28c105934094bc6"},
"toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm", "f1e3dabef71fb510d015fad18c0e05e7c57281001141504c6b69d94e99750a07"},
} }
...@@ -43,7 +43,7 @@ defmodule :fast_html_test do ...@@ -43,7 +43,7 @@ defmodule :fast_html_test do
{"body", [], {"body", [],
[ [
{"br", [], nil}, {"br", [], nil},
{"esi:include", [], nil} {"esi:include", [], []}
]} ]}
]} ]}
]} = :fast_html.decode("<br><esi:include />", format: [:nil_self_closing]) ]} = :fast_html.decode("<br><esi:include />", format: [:nil_self_closing])
...@@ -125,24 +125,6 @@ defmodule :fast_html_test do ...@@ -125,24 +125,6 @@ defmodule :fast_html_test do
]} = :fast_html.decode(~s'<a <> asdf', format: [:html_atoms]) ]} = :fast_html.decode(~s'<a <> asdf', format: [:html_atoms])
end end
test "namespaced tags" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{"svg:svg", [],
[
{"svg:path", [], []},
{"svg:a", [], []}
]}
]}
]}
]} = :fast_html.decode(~s'<svg><path></path><a></a></svg>', format: [:html_atoms])
end
test "custom namespaced tags" do test "custom namespaced tags" do
assert {:ok, assert {:ok,
[ [
...@@ -151,7 +133,7 @@ defmodule :fast_html_test do ...@@ -151,7 +133,7 @@ defmodule :fast_html_test do
{:head, [], []}, {:head, [], []},
{:body, [], {:body, [],
[ [
{"esi:include", [], nil} {"esi:include", [], []}
]} ]}
]} ]}
]} = :fast_html.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing]) ]} = :fast_html.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing])
...@@ -178,6 +160,6 @@ defmodule :fast_html_test do ...@@ -178,6 +160,6 @@ defmodule :fast_html_test do
test "doesn't return attribute name in attribute value when the latter is empty" do test "doesn't return attribute name in attribute value when the latter is empty" do
assert :fast_html.decode_fragment("<meta content=\"\"/>") == assert :fast_html.decode_fragment("<meta content=\"\"/>") ==
{:ok, [{"html", [], [{"meta", [{"content", ""}], []}]}]} {:ok, [{"meta", [{"content", ""}], []}]}
end end
end end