Commit ad672380 authored by rinpatch's avatar rinpatch

Add benchmarks to the readme

parent 2cd03cde
Pipeline #19675 passed with stage
in 49 seconds
......@@ -4,31 +4,21 @@ A C Node wrapping lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
Primarily used with [FastSanitize](https://git.pleroma.social/pleroma/fast_sanitize).
* Available as a hex package: `{:fast_html, "~> 0.1.0"}`
* [Documentation](https://hexdocs.pm/fast_html/FastHTML.html)
* [Documentation](https://hexdocs.pm/fast_html/fast_html.html)
## Example
## Benchmarks
iex> :fast_html.decode("<h1>Hello world</h1>")
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}
The following table provides median times it takes to decode a string to a tree for html parsers that can be used from Elixir. Benchmarks were conducted on a machine with `Intel Core i7-3520M @ 2.90GHz` CPU and 16GB of RAM. The `mix fast_html.bench` task can be used for running the benchmark by yourself.
Benchmark results (removed Nif calling mode) on various file sizes on a 2,5Ghz Core i7:
Settings:
duration: 1.0 s
## FileSizesBench
[15:28:42] 1/3: github_trending_js.html 341k
[15:28:46] 2/3: w3c_html5.html 131k
[15:28:48] 3/3: wikipedia_hyperlink.html 97k
Finished in 7.52 seconds
## FileSizesBench
benchmark name iterations average time
wikipedia_hyperlink.html 97k 1000 1385.86 µs/op
w3c_html5.html 131k 1000 2179.30 µs/op
github_trending_js.html 341k 500 5686.21 µs/op
| File/Parser | fast_html (C-Node) | mochiweb_html (erlang) | html5ever (Rust NIF) | Myhtmlex (NIF) |
|----------------------|--------------------|------------------------|----------------------|----------------|
| document-large.html | 178.13 ms | 3471.70 ms | 799.20 ms | 402.64 ms |
| document-medium.html | 2.85 ms | 26.58 ms | 9.06 ms | 3.72 ms |
| document-small.html | 1.08 ms | 5.45 ms | 2.10 ms | 1.24 ms |
| fragment-large.html | 1.50 ms | 10.91 ms | 6.03 ms | 1.91 ms |
| fragment-small.html | 434.64 μs | 83.02 μs | 57.97 μs | 311.39 μs |
The slowdown on `fragment-small.html` is due to C-Node overhead. Unlike html5ever and Myhtmlex in NIF mode, `fast_html` has the parser process isolated and communicates with it over the network, so even if a fatal crash in the parser happens, it won't bring down the entire VM.
## Contribution / Bug Reports
* Please make sure you do `git submodule update` after a checkout/pull
......
defmodule :fast_html do
@moduledoc """
A module to decode html into a tree structure.
Based on [Alexander Borisov's myhtml](https://github.com/lexborisov/myhtml),
this binding gains the properties of being html-spec compliant and very fast.
"""
@type tag() :: String.t() | atom()
......
......@@ -16,7 +16,10 @@ defmodule Mix.Tasks.FastHtml.Bench do
Benchee.run(
%{
"Decoding" => fn input -> :fast_html.decode(input) end
"fast_html" => fn input -> :fast_html.decode(input) end,
"myhtmlex nif" => fn input -> Myhtmlex.Nif.decode(input) end,
"html5ever nif" => fn input -> Html5ever.parse(input) end,
"mochiweb_html" => fn input -> :mochiweb_html.parse(input) end
},
inputs: inputs,
save: [path: "fast_html.bench"],
......
......@@ -60,13 +60,17 @@ defmodule FastHtml.Mixfile do
# documentation helpers
{:ex_doc, "~> 0.19", only: :dev},
# benchmarking helpers
{:benchee, "~> 1.0", only: :dev}
{:benchee, "~> 1.0", only: :dev},
{:myhtmlex, "~> 0.2.0", only: :dev, runtime: false},
{:mochiweb, "~> 2.18", only: :dev},
{:html5ever, "~> 0.7.0", only: :dev}
]
end
defp docs do
[
main: "fast_html"
main: "readme",
extras: ["README.md"]
]
end
end
......
......@@ -3,9 +3,13 @@
"deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm"},
"earmark": {:hex, :earmark, "1.4.2", "3aa0bd23bc4c61cf2f1e5d752d1bb470560a6f8539974f767a38923bb20e1d7f", [:mix], [], "hexpm"},
"ex_doc": {:hex, :ex_doc, "0.21.2", "caca5bc28ed7b3bdc0b662f8afe2bee1eedb5c3cf7b322feeeb7c6ebbde089d6", [:mix], [{:earmark, "~> 1.3.3 or ~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
"html5ever": {:hex, :html5ever, "0.7.0", "9f63ec1c783b2dc9f326840fcc993c01e926dbdef4e51ba1bbe5355993c258b4", [:mix], [{:rustler, "~> 0.18.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm"},
"makeup": {:hex, :makeup, "1.0.0", "671df94cf5a594b739ce03b0d0316aa64312cee2574b6a44becb83cd90fb05dc", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
"makeup_elixir": {:hex, :makeup_elixir, "0.14.0", "cf8b7c66ad1cff4c14679698d532f0b5d45a3968ffbcbfd590339cb57742f1ae", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
"mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm"},
"myhtml": {:git, "https://github.com/lexborisov/myhtml.git", "fe2cf577570666d058a2b7167c26d3384a758e19", [branch: "master"]},
"myhtmlex": {:hex, :myhtmlex, "0.2.1", "d6f3eb1826f7cdaa0225a996569da0930d1a334405510845c905ae59295ab226", [:make, :mix], [{:nodex, "~> 0.1.1", [hex: :nodex, repo: "hexpm", optional: false]}], "hexpm"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.1", "c90796ecee0289dbb5ad16d3ad06f957b0cd1199769641c961cfe0b97db190e0", [:mix], [], "hexpm"},
"nodex": {:git, "https://git.pleroma.social/pleroma/nodex", "cb6730f943cfc6aad674c92161be23a8411f15d1", [ref: "cb6730f943cfc6aad674c92161be23a8411f15d1"]},
"nodex": {:hex, :nodex, "0.1.1", "ed2f7bbe19ea62a43ad4b7ad332eb3f9ca12c64a35a5802a0eb545b93ebe32af", [:mix], [], "hexpm"},
"rustler": {:hex, :rustler, "0.18.0", "db4bd0c613d83a1badc31be90ddada6f9821de29e4afd15c53a5da61882e4f2d", [:mix], [], "hexpm"},
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment