Commit dc559829 authored by kaniini's avatar kaniini

remove NIF

parent 4288bda5
Pipeline #19461 passed with stage
in 58 seconds
......@@ -58,15 +58,12 @@ endif
all: myhtmlex
myhtmlex: priv/myhtmlex.so
myhtmlex: priv/myhtml_worker
$(MIX) compile
$(MYHTML_STATIC): $(MYHTML_PATH)
$(MAKE) -C $(MYHTML_PATH) library MyCORE_BUILD_WITHOUT_THREADS=YES
priv/myhtmlex.so: c_src/myhtmlex.c $(MYHTML_STATIC)
$(CC) $(MYHTMLEX_CFLAGS) $(MYHTMLEX_LDFLAGS) -o $@ $< $(MYHTML_STATIC)
priv/myhtml_worker: c_src/myhtml_worker.c $(MYHTML_STATIC)
$(CC) -o $@ $< $(MYHTML_STATIC) $(CNODE_CFLAGS)
......
......@@ -10,7 +10,7 @@ Bindings for lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
iex> Myhtmlex.decode("<h1>Hello world</h1>")
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}
Benchmark results (Nif calling mode) on various file sizes on a 2,5Ghz Core i7:
Benchmark results (removed Nif calling mode) on various file sizes on a 2,5Ghz Core i7:
Settings:
duration: 1.0 s
......@@ -28,53 +28,6 @@ Bindings for lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
w3c_html5.html 131k 1000 2179.30 µs/op
github_trending_js.html 341k 500 5686.21 µs/op
## Configuration
The module you are calling into is always `Myhtmlex` and depending on your application configuration,
it chooses between the underlying implementations `Myhtmlex.Safe` (default) and `Myhtmlex.Nif`.
Erlang interoperability is a tricky mine-field.
You can call into C directly using native implemented functions (Nif). But this comes with the risk,
that if anything goes wrong within the C implementation, your whole VM will crash.
No more supervisor cushions for here on, just violent crashes.
That is why the default mode of operation keeps your VM safe and happy.
If you need ultimate parsing speed, or you can simply tolerate VM-level crashes, read on.
### Call into C-Node (default)
This is the default mode of operation.
If your application cannot tolerate VM-level crashes, this option allows you to gain the best of both worlds.
The added overhead is client/server communications, and a worker OS-process that runs next to your VM under VM supervision.
You do not have to do anything to start the worker process, everything is taken care of within the library.
If you are not running in distributed mode, your VM will automatically be assigned a `sname`.
The worker OS-process stays alive as long as it is under VM-supervision. If your VM goes down, the OS-process will die by itself.
If the worker OS-process dies for some reason, your VM stays unaffected and will attempt to restart it seamlessly.
### Call into Nif
If your application is aiming for ultimate parsing speed, and in the worst case can tolerate VM-level crashes, you can call directly into the Nif.
1. Require myhtmlex without runtime
in your `mix.exs`
def deps do
[
{:myhtmlex, ">= 0.0.0", runtime: false}
]
end
2. Configure the mode to `Myhtmlex.Nif`
e.g. in `config/config.exs`
config :myhtmlex, mode: Myhtmlex.Nif
3. Bonus: You can [open up in-memory references to parsed trees](https://hexdocs.pm/myhtmlex/Myhtmlex.html#open/1), without parsing + mapping erlang terms in one go
## Contribution / Bug Reports
* Please make sure you do `git submodule update` after a checkout/pull
......
This diff is collapsed.
#ifndef MYHTMLEX_H
#define MYHTMLEX_H
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "erl_nif.h"
#include <myhtml/myhtml.h>
#include <myhtml/mynamespace.h>
char*
lowercase(char* c);
// myhtmlex.c
ERL_NIF_TERM
make_atom(ErlNifEnv* env, const char* name);
ERL_NIF_TERM
nif_decode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM
nif_decode_tree(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM
nif_open(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM
build_node_attrs(ErlNifEnv* env, myhtml_tree_t* tree, myhtml_tree_node_t* node);
ERL_NIF_TERM
build_tree(ErlNifEnv* env, myhtml_tree_t* tree, myhtml_tree_node_t* node, unsigned char* flags);
ERL_NIF_TERM
build_node_children(ErlNifEnv* env, myhtml_tree_t* tree, myhtml_tree_node_t* node, unsigned char* flags);
void
nif_cleanup_myhtml_tree(ErlNifEnv* env, void* obj);
unsigned char
read_parse_flags(ErlNifEnv* env, const ERL_NIF_TERM* options);
// consts
ERL_NIF_TERM ATOM_NIL;
ERL_NIF_TERM ATOM_COMMENT;
ERL_NIF_TERM ATOM_HTML_ATOMS;
ERL_NIF_TERM ATOM_NIL_SELF_CLOSING;
ERL_NIF_TERM ATOM_COMMENT_TUPLE3;
ERL_NIF_TERM EMPTY_LIST;
const unsigned char FLAG_HTML_ATOMS = 1 << 0;
const unsigned char FLAG_NIL_SELF_CLOSING = 1 << 1;
const unsigned char FLAG_COMMENT_TUPLE3 = 1 << 2;
typedef struct {
myhtml_t* myhtml;
myhtml_tree_t* tree;
ErlNifResourceType* myhtml_tree_rt;
} myhtmlex_state_t;
typedef struct {
myhtml_tree_t* tree;
myhtml_tree_node_t *root;
} myhtmlex_ref_t;
#endif // included myhtmlex.h
......@@ -10,7 +10,7 @@ defmodule Myhtmlex do
iex> Myhtmlex.decode("<h1>Hello world</h1>")
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}
Benchmark results (Nif calling mode) on various file sizes on a 2,5Ghz Core i7:
Benchmark results (removed Nif calling mode) on various file sizes on a 2,5Ghz Core i7:
Settings:
duration: 1.0 s
......@@ -27,53 +27,6 @@ defmodule Myhtmlex do
wikipedia_hyperlink.html 97k 1000 1385.86 µs/op
w3c_html5.html 131k 1000 2179.30 µs/op
github_trending_js.html 341k 500 5686.21 µs/op
## Configuration
The module you are calling into is always `Myhtmlex` and depending on your application configuration,
it chooses between the underlying implementations `Myhtmlex.Safe` (default) and `Myhtmlex.Nif`.
Erlang interoperability is a tricky mine-field.
You can call into C directly using native implemented functions (Nif). But this comes with the risk,
that if anything goes wrong within the C implementation, your whole VM will crash.
No more supervisor cushions for here on, just violent crashes.
That is why the default mode of operation keeps your VM safe and happy.
If you need ultimate parsing speed, or you can simply tolerate VM-level crashes, read on.
### Call into C-Node (default)
This is the default mode of operation.
If your application cannot tolerate VM-level crashes, this option allows you to gain the best of both worlds.
The added overhead is client/server communications, and a worker OS-process that runs next to your VM under VM supervision.
You do not have to do anything to start the worker process, everything is taken care of within the library.
If you are not running in distributed mode, your VM will automatically be assigned a `sname`.
The worker OS-process stays alive as long as it is under VM-supervision. If your VM goes down, the OS-process will die by itself.
If the worker OS-process dies for some reason, your VM stays unaffected and will attempt to restart it seamlessly.
### Call into Nif
If your application is aiming for ultimate parsing speed, and in the worst case can tolerate VM-level crashes, you can call directly into the Nif.
1. Require myhtmlex without runtime
in your `mix.exs`
def deps do
[
{:myhtmlex, ">= 0.0.0", runtime: false}
]
end
2. Configure the mode to `Myhtmlex.Nif`
e.g. in `config/config.exs`
config :myhtmlex, mode: Myhtmlex.Nif
3. Bonus: You can [open up in-memory references to parsed trees](https://hexdocs.pm/myhtmlex/Myhtmlex.html#open/1), without parsing + mapping erlang terms in one go
"""
@type tag() :: String.t() | atom()
......@@ -89,7 +42,7 @@ defmodule Myhtmlex do
@type format_flag() :: :html_atoms | :nil_self_closing | :comment_tuple3
defp module() do
Application.get_env(:myhtmlex, :mode, Myhtmlex.Nif)
Application.get_env(:myhtmlex, :mode, Myhtmlex.Safe)
end
@doc """
......@@ -149,28 +102,4 @@ defmodule Myhtmlex do
def decode(bin, format: flags) do
module().decode(bin, flags)
end
@doc """
Returns a reference to an internally parsed myhtml_tree_t. (Nif only!)
"""
@spec open(String.t()) :: reference()
def open(bin) do
Myhtmlex.Nif.open(bin)
end
@doc """
Returns a tree representation from the given reference. See `decode/1` for example output. (Nif only!)
"""
@spec decode_tree(reference()) :: tree()
def decode_tree(ref) do
Myhtmlex.Nif.decode_tree(ref)
end
@doc """
Returns a tree representation from the given reference. See `decode/2` for options and example output. (Nif only!)
"""
@spec decode_tree(reference(), format: [format_flag()]) :: tree()
def decode_tree(ref, format: flags) do
Myhtmlex.Nif.decode_tree(ref, flags)
end
end
defmodule Myhtmlex.Nif do
@moduledoc false
@on_load {:init, 0}
app = Mix.Project.config()[:app]
def init do
path = :filename.join(:code.priv_dir(unquote(app)), 'myhtmlex')
:ok = :erlang.load_nif(path, 0)
end
def decode(bin)
def decode(_), do: exit(:nif_library_not_loaded)
def decode(bin, flags)
def decode(_, _), do: exit(:nif_library_not_loaded)
def open(bin)
def open(_), do: exit(:nif_library_not_loaded)
def decode_tree(tree)
def decode_tree(_), do: exit(:nif_library_not_loaded)
def decode_tree(tree, flags)
def decode_tree(_, _), do: exit(:nif_library_not_loaded)
end
......@@ -78,7 +78,6 @@ end
defmodule Mix.Tasks.Compile.MyhtmlexMake do
@artifacts [
"priv/myhtmlex.so",
"priv/myhtml_worker"
]
......
defmodule Myhtmlex.NifTest do
use MyhtmlexSharedTests, module: Myhtmlex.Nif
test "parse a larger file (131K)" do
html = File.read!("bench/github_trending_js.html")
ref = Myhtmlex.open(html)
assert is_reference(ref)
assert is_tuple(Myhtmlex.decode_tree(ref))
end
test "open" do
ref = Myhtmlex.open(~s'<dif class="a"></div><div class="b"></div>')
assert is_reference(ref)
end
test "open and decode_tree" do
ref = Myhtmlex.open(~s'text node')
assert is_reference(ref)
assert {:html, [],
[
{:head, [], []},
{:body, [],
[
"text node"
]}
]} = Myhtmlex.decode_tree(ref, format: [:html_atoms])
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment