...
 
Commits (2)
image: elixir:1.7
# Needed because open_port's typespec is incorrect
# in earlier versions, see
# https://github.com/erlang/otp/commit/b9051f0f94c3d9f56f9d8d9b9c316446db7fd7a9
# and there are no upstream Elixir docker images with OTP 23
image: erlang:23-alpine
variables:
MIX_ENV: test
......@@ -15,6 +19,7 @@ stages:
- publish
before_script:
- apk add elixir build-base cmake
- mix local.hex --force
- mix local.rebar --force
- mix deps.get --only test
......@@ -30,3 +35,8 @@ unit-testing:
coverage: '/(\d+\.\d+\%) \| Total/'
script:
- mix test --trace --preload-modules --cover
dialyzer:
stage: test
script:
- mix dialyzer
[submodule "c_src/myhtml"]
path = c_src/myhtml
url = https://github.com/lexborisov/myhtml.git
[submodule "c_src/lexbor"]
path = c_src/lexbor
url = https://github.com/lexbor/lexbor
......@@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [2.0.0] - 2020-08-01
### Changed
- **Breaking:** CMake is now required at compile-time due to it being lexbor's build system
- **Breaking:** namespaces are no longer automatically appended. i.e "<svg> </svg>" will be `{"svg", [], []}` instead of `{"svg:svg", [], []}`
- **Breaking:** when using `:nil_self_closing` flag, only valid [void elements](https://html.spec.whatwg.org/#void-elements) will have `nil` in children
- Now deprecated myhtml was switched to [lexbor](https://github.com/lexbor/lexbor)
- The worker process now communicates with the node via stdio, instead of TCP, which was known to cause issues
on BSD systems
### Added
- `FastHtml.Pool` for fast_html workers. There is a default pool of `System.schedulers_online/0` workers, but a custom pool can be started if desired, or it can be disabled altogether. See `FastHtml.Pool` module documentation for more info
## [1.0.3] - 2020-02-10
### Fixed
- C-Node not respawning after being killed.
......
MIX = mix
CMAKE = cmake
CNODE_CFLAGS = -g -O2 -std=c99 -pedantic -Wcomment -Wextra -Wno-old-style-declaration -Wall
# ignore unused parameter warnings
......@@ -11,9 +12,9 @@ CNODE_CFLAGS += -I$(ERLANG_PATH)/include
# expecting myhtml as a submodule in c_src/
# that way we can pin a version and package the whole thing in hex
# hex does not allow for non-app related dependencies.
MYHTML_PATH = c_src/myhtml
MYHTML_STATIC = $(MYHTML_PATH)/lib/libmyhtml_static.a
CNODE_CFLAGS += -I$(MYHTML_PATH)/include
LXB_PATH = c_src/lexbor
LXB_STATIC = $(LXB_PATH)/liblexbor_static.a
CNODE_CFLAGS += -I$(LXB_PATH)/source
# avoid undefined reference errors to phtread_mutex_trylock
CNODE_CFLAGS += -lpthread
......@@ -34,17 +35,19 @@ CNODE_LDFLAGS += -lei -pthread
.PHONY: all
all: priv/myhtml_worker
all: priv/fasthtml_worker
$(MYHTML_STATIC): $(MYHTML_PATH)
$(MAKE) -C $(MYHTML_PATH) library MyCORE_BUILD_WITHOUT_THREADS=YES
$(LXB_STATIC): $(LXB_PATH)
# Sadly, build components separately seems to sporadically fail
cd $(LXB_PATH); cmake -DLEXBOR_BUILD_SEPARATELY=OFF -DLEXBOR_BUILD_SHARED=OFF
$(MAKE) -C $(LXB_PATH)
priv/myhtml_worker: c_src/myhtml_worker.c $(MYHTML_STATIC)
$(CC) -o $@ $< $(MYHTML_STATIC) $(CNODE_CFLAGS) $(CNODE_LDFLAGS)
priv/fasthtml_worker: c_src/fasthtml_worker.c $(LXB_STATIC)
$(CC) -o $@ $< $(LXB_STATIC) $(CNODE_CFLAGS) $(CNODE_LDFLAGS)
clean: clean-myhtml
$(RM) -r priv/myhtmlex*
$(RM) priv/myhtml_worker
$(RM) priv/fasthtml_worker
$(RM) myhtmlex-*.tar
$(RM) -r package-test
......
......@@ -3,31 +3,25 @@
A C Node wrapping lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
Primarily used with [FastSanitize](https://git.pleroma.social/pleroma/fast_sanitize).
* Available as a hex package: `{:fast_html, "~> 1.0"}`
* Available as a hex package: `{:fast_html, "~> 2.0"}`
* [Documentation](https://hexdocs.pm/fast_html/fast_html.html)
## Benchmarks
The following table provides median times it takes to decode a string to a tree for html parsers that can be used from Elixir. Benchmarks were conducted on a machine with `Intel Core i7-3520M @ 2.90GHz` CPU and 16GB of RAM. The `mix fast_html.bench` task can be used for running the benchmark by yourself.
The following table provides median times it takes to decode a string to a tree for html parsers that can be used from Elixir. Benchmarks were conducted on a machine with an `AMD Ryzen 9 3950X (32) @ 3.500GHz` CPU and 32GB of RAM. The `mix fast_html.bench` task can be used for running the benchmark by yourself.
| File/Parser | fast_html (C-Node) | mochiweb_html (erlang) | html5ever (Rust NIF) | Myhtmlex (NIF)¹ |
| File/Parser | fast_html (Port) | mochiweb_html (erlang) | html5ever (Rust NIF) | Myhtmlex (NIF)¹ |
|----------------------|--------------------|------------------------|----------------------|----------------|
| document-large.html | 178.13 ms | 3471.70 ms | 799.20 ms | 402.64 ms |
| document-medium.html | 2.85 ms | 26.58 ms | 9.06 ms | 3.72 ms |
| document-small.html | 1.08 ms | 5.45 ms | 2.10 ms | 1.24 ms |
| fragment-large.html | 1.50 ms | 10.91 ms | 6.03 ms | 1.91 ms |
| fragment-small.html² | 434.64 μs | 83.02 μs | 57.97 μs | 311.39 μs |
| document-large.html (6.9M) | 125.12 ms | 1778.34 ms | 395.21 ms | 327.17 ms |
| document-medium.html (85K) | 1.93 ms | 12.10 ms | 4.74 ms | 3.82 ms |
| document-small.html (25K)| 0.50 ms | 2.76 ms | 1.72 ms | 1.19 ms |
| fragment-large.html (33K)| 0.93 ms | 4.78 ms | 2.34 ms | 2.15 ms |
| fragment-small.html² (757B)| 44.60 μs | 42.13 μs | 43.58 μs | 289.71 μs |
1. Myhtmlex has a C-Node mode as well, but it wasn't benchmarked here because it segfaults on `document-large.html`
2. The slowdown on `fragment-small.html` is due to C-Node overhead. Unlike html5ever and Myhtmlex in NIF mode, `fast_html` has the parser process isolated and communicates with it over the network, so even if a fatal crash in the parser happens, it won't bring down the entire VM.
Full benchmark output can be seen in [this snippet](https://git.pleroma.social/pleroma/elixir-libraries/fast_html/snippets/3128)
## Note about running with [Swarm](https://github.com/bitwalker/swarm)
Since the myhtml worker runs as a separate node, Swarm will try to sync with it. Of course it will fail since it's not a real Erlang node. To prevent it from doing that, you can add the following to your configuration:
```elixir
config :swarm, node_blacklist: [~r/myhtml_.*$/]
```
1. Myhtmlex has a C-Node mode, but it wasn't benchmarked here because it segfaults on `document-large.html`
2. The slowdown on `fragment-small.html` is due to Port overhead. Unlike html5ever and Myhtmlex in NIF mode, `fast_html` has the parser process isolated and communicates with it over stdio, so even if a fatal crash in the parser happens, it won't bring down the entire VM.
## Contribution / Bug Reports
......
......@@ -20,9 +20,9 @@
# include "erl_interface.h"
#endif
#include <myhtml/myhtml.h>
#include <myhtml/mynamespace.h>
#define HEADER_SIZE 4
#include <lexbor/html/html.h>
#include "tstack.h"
#ifdef __GNUC__
......@@ -38,10 +38,6 @@
#endif
typedef struct _state_t {
int fd;
myhtml_t * myhtml;
ei_cnode ec;
bool looping;
ei_x_buff buffer;
} state_t;
......@@ -51,14 +47,14 @@ typedef enum parse_flags_e {
FLAG_COMMENT_TUPLE3 = 1 << 2
} parse_flags_t;
static void handle_emsg(state_t * state, erlang_msg * emsg);
static void handle_send(state_t * state, erlang_msg * emsg);
char* read_packet(int *len);
static void handle_send(state_t * state);
static void err_term(ei_x_buff * response, const char * error_atom);
static parse_flags_t decode_parse_flags(state_t * state, int arity);
static void decode(state_t * state, ei_x_buff * response, const char * bin_data, size_t bin_size, parse_flags_t parse_flags, bool fragment, myhtml_tag_id_t context_tag_id);
static void decode(state_t * state, ei_x_buff * response, lxb_html_document_t *document, bool fragment, lxb_dom_element_t *context_element, lxb_char_t * bin_data, size_t bin_size, parse_flags_t parse_flags);
static void build_tree(ei_x_buff * response, myhtml_tree_t * tree, parse_flags_t parse_flags);
static void prepare_node_attrs(ei_x_buff * response, myhtml_tree_node_t * node);
static void build_tree(ei_x_buff * response, lxb_dom_node_t* tree, parse_flags_t parse_flags);
static void prepare_node_attrs(ei_x_buff * response, lxb_dom_node_t* node);
static inline char * lowercase(char * c);
......@@ -71,21 +67,13 @@ static void panic(const char *fmt, ...) {
vsnprintf (buf, sizeof buf, fmt, va);
va_end (va);
fprintf (stderr, "myhtml worker: error: %s\n", buf);
exit (EXIT_FAILURE);
}
static void usage (void) NORETURN;
static void usage (void) {
fputs ("usage: myhtml_worker sname hostname cookie tname\n\n"
" sname the short name you want this c-node to connect as\n"
" hostname the hostname\n"
" cookie the authentication cookie\n"
" tname the target node short name to connect to\n", stderr);
fprintf (stderr, "fast_html worker: error: %s\n", buf);
exit (EXIT_FAILURE);
}
int main(int argc, const char *argv[]) {
state_t* state = calloc (1, sizeof(state_t));
#ifdef OTP_22_OR_NEWER
// initialize erlang client library
ei_init ();
......@@ -93,87 +81,88 @@ int main(int argc, const char *argv[]) {
erl_init (NULL, -1);
#endif
if (argc != 5)
usage ();
ei_x_new (&state->buffer);
const char *sname = argv[1];
const char *hostname = argv[2];
const char *cookie = argv[3];
const char *tname = argv[4];
fflush (stdout);
char full_name[1024];
char target_node[1024];
while (true) {
int len;
char* buf = read_packet(&len);
ei_x_free(&state->buffer);
state->buffer.index = 0;
state->buffer.buff = buf;
state->buffer.buffsz = len;
handle_send (state);
}
snprintf (full_name, sizeof full_name, "%s@%s", sname, hostname);
snprintf (target_node, sizeof target_node, "%s@%s", tname, hostname);
// shutdown: free all state
ei_x_free (&state->buffer);
free (state);
struct in_addr addr;
addr.s_addr = htonl(INADDR_ANY);
return EXIT_SUCCESS;
}
// fd to erlang node
state_t* state = calloc (1, sizeof(state_t));
state->looping = true;
ei_x_new (&state->buffer);
// initialize this node
printf ("initialising %s\n", full_name);
if (ei_connect_xinit (&state->ec, hostname, sname, full_name, &addr, cookie, 0) == -1)
panic ("ei_connect_xinit failed.");
/*
* Reads a packet from Erlang. The packet must be a standard {packet, 2}
* packet. This function aborts if any error is detected (including EOF).
*
* Returns: The number of bytes in the packet.
*/
// connect to target node
printf ("connecting to %s\n", target_node);
if ((state->fd = ei_connect (&state->ec, target_node)) < 0)
panic ("ei_connect failed.");
char *read_packet(int *len)
{
state->myhtml = myhtml_create ();
myhtml_init (state->myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0);
char* io_buf = NULL; /* Buffer for file i/o. */
unsigned char header[HEADER_SIZE];
uint32_t packet_length; /* Length of current packet. */
uint32_t bytes_read;
uint32_t total_bytes_read;
/*
* Read the packet header.
*/
total_bytes_read = read(STDIN_FILENO, header, HEADER_SIZE);
if (total_bytes_read == 0) {
exit(0);
}
if (total_bytes_read != HEADER_SIZE) {
panic("Failed to read packet header, read: %d\n", total_bytes_read);
}
// signal to stdout that we are ready
printf ("%s ready\n", full_name);
fflush (stdout);
/*
* Get the length of this packet.
*/
packet_length = 0;
while (state->looping)
{
erlang_msg emsg;
for (int i = 0; i < HEADER_SIZE; i++)
packet_length = (packet_length << 8) | header[i];
switch (ei_xreceive_msg (state->fd, &emsg, &state->buffer))
{
case ERL_TICK:
break;
case ERL_ERROR:
panic ("ei_xreceive_msg: %s\n", strerror (erl_errno));
break;
default:
handle_emsg (state, &emsg);
break;
*len=packet_length;
if ((io_buf = (char *) malloc(packet_length)) == NULL) {
panic("insufficient memory for i/o buffer of size %d\n", packet_length);
}
}
// shutdown: free all state
ei_x_free (&state->buffer);
myhtml_destroy (state->myhtml);
free (state);
/*
* Read the packet itself.
*/
total_bytes_read = 0;
return EXIT_SUCCESS;
}
while((bytes_read = read(STDIN_FILENO, (io_buf + total_bytes_read), (packet_length - total_bytes_read))))
total_bytes_read += bytes_read;
// handle an erlang_msg structure and call handle_send() if relevant
static void handle_emsg (state_t * state, erlang_msg * emsg)
{
state->buffer.index = 0;
if (total_bytes_read != packet_length) {
free(io_buf);
panic("couldn't read packet of length %d, read: %d\r\n",
packet_length, total_bytes_read);
}
switch (emsg->msgtype)
{
case ERL_REG_SEND:
case ERL_SEND:
handle_send (state, emsg);
break;
case ERL_LINK:
case ERL_UNLINK:
break;
case ERL_EXIT:
break;
}
return io_buf;
}
// handle ERL_SEND message type.
......@@ -184,7 +173,7 @@ static void handle_emsg (state_t * state, erlang_msg * emsg)
// In case of `decode_fragment`, the fourth argument should be
// the context tag name.
// any other message: respond with an {error, unknown_call} tuple.
static void handle_send (state_t * state, erlang_msg * emsg)
static void handle_send (state_t * state)
{
// response holds our response, prepare it
ei_x_buff response;
......@@ -256,8 +245,10 @@ static void handle_send (state_t * state, erlang_msg * emsg)
if (ei_decode_list_header (state->buffer.buff, &state->buffer.index, &arity) < 0)
panic ("failed to decode empty list header after option list in message");
lxb_html_document_t *document = lxb_html_document_create();
lxb_dom_element_t *context_element = NULL;
// if we are parsing a fragment, context tag name should come next
myhtml_tag_id_t context_tag_id = 0;
if (fragment) {
int context_bin_type, context_bin_size;
if (ei_get_type (state->buffer.buff, &state->buffer.index, &context_bin_type, &context_bin_size) < 0)
......@@ -271,32 +262,36 @@ static void handle_send (state_t * state, erlang_msg * emsg)
}
// decode the binary
char * context_bin_data = calloc (1, context_bin_size + 1);
char* context_bin_data = calloc (1, context_bin_size + 1);
if (ei_decode_binary (state->buffer.buff, &state->buffer.index, context_bin_data, NULL) < 0)
panic ("failed to decode binary in message");
const myhtml_tag_context_t * context_tag_context = myhtml_tag_static_search(context_bin_data, context_bin_size);
panic ("failed to decode context binary in message");
context_element = lxb_dom_document_create_element(&document->dom_document, (lxb_char_t*) context_bin_data, context_bin_size, NULL);
free (context_bin_data);
if (context_tag_context == NULL) {
err_term (&response, "unknown_context_tag");
goto out;
} else {
context_tag_id = context_tag_context->id;
}
}
decode (state, &response, bin_data, bin_size, parse_flags, fragment, context_tag_id);
if (context_element && lxb_dom_element_tag_id(context_element) >= LXB_TAG__LAST_ENTRY) {
err_term (&response, "unknown_context_tag");
} else {
decode (state, &response, document, fragment, context_element, (lxb_char_t *) bin_data, bin_size, parse_flags);
}
lxb_html_document_destroy(document);
free (bin_data);
out:
out: ;
// send response
ei_send (state->fd, &emsg->from, response.buff, response.buffsz);
unsigned char header[HEADER_SIZE];
uint32_t size = (uint32_t) response.index;
for (int i = HEADER_SIZE-1; i != -1; i--) {
header[i] = (unsigned char) size & 0xFF;
size = size >> 8;
}
write(STDOUT_FILENO, header, sizeof(header));
write(STDOUT_FILENO, response.buff, response.index);
// free response
ei_x_free (&response);
return;
}
......@@ -304,8 +299,6 @@ static void err_term (ei_x_buff * response, const char * error_atom)
{
response->index = 0;
ei_x_encode_version (response);
ei_x_encode_tuple_header(response, 2);
ei_x_encode_atom(response, "myhtml_worker");
ei_x_encode_tuple_header (response, 2);
ei_x_encode_atom (response, "error");
ei_x_encode_atom (response, error_atom);
......@@ -333,29 +326,28 @@ static parse_flags_t decode_parse_flags (state_t * state, int arity)
return parse_flags;
}
static void decode (state_t * state, ei_x_buff * response, const char * bin_data, size_t bin_size, parse_flags_t parse_flags, bool fragment, myhtml_tag_id_t context_tag_id)
static void decode(state_t * state, ei_x_buff * response, lxb_html_document_t *document, bool fragment, lxb_dom_element_t *context_element, lxb_char_t * bin_data, size_t bin_size, parse_flags_t parse_flags)
{
myhtml_tree_t * tree = myhtml_tree_create ();
myhtml_tree_init (tree, state->myhtml);
myhtml_tree_parse_flags_set (tree, MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE);
// parse tree
mystatus_t status;
lxb_status_t status;
lxb_dom_node_t *node;
if (fragment) {
status = myhtml_parse_fragment(tree, MyENCODING_UTF_8, bin_data, bin_size, context_tag_id, 0);
node = lxb_html_document_parse_fragment(document, context_element, bin_data, bin_size);
status = (node == NULL)? LXB_STATUS_ERROR : LXB_STATUS_OK;
} else {
status = myhtml_parse (tree, MyENCODING_UTF_8, bin_data, bin_size);
status = lxb_html_document_parse(document, bin_data, bin_size);
node = lxb_dom_interface_node(document);
}
if (status != MyHTML_STATUS_OK)
if (status != LXB_STATUS_OK)
{
err_term (response, "myhtml_parse_failed");
err_term (response, "parse_failed");
return;
}
// build tree
build_tree (response, tree, parse_flags);
myhtml_tree_destroy (tree);
build_tree (response, node, parse_flags);
}
// a tag is sent as a tuple:
......@@ -363,14 +355,13 @@ static void decode (state_t * state, ei_x_buff * response, const char * bin_data
// - an attribute list
// - a children list
// in this function, we prepare the atom and complete attribute list
static void prepare_tag_header (ei_x_buff * response, const char * tag_string, myhtml_tree_node_t * node, parse_flags_t parse_flags)
static void prepare_tag_header (ei_x_buff * response, const char * tag_string, lxb_dom_node_t* node, parse_flags_t parse_flags)
{
myhtml_tag_id_t tag_id = myhtml_node_tag_id (node);
myhtml_namespace_t tag_ns = myhtml_node_namespace (node);
lxb_tag_id_t tag_id = lxb_dom_node_tag_id(node);
ei_x_encode_tuple_header (response, 3);
if (! (parse_flags & FLAG_HTML_ATOMS) || (tag_id == MyHTML_TAG__UNDEF || tag_id == MyHTML_TAG_LAST_ENTRY || tag_ns != MyHTML_NAMESPACE_HTML))
if (! (parse_flags & FLAG_HTML_ATOMS) || (tag_id == LXB_TAG__UNDEF || tag_id >= LXB_TAG__LAST_ENTRY))
ei_x_encode_binary (response, tag_string, strlen (tag_string));
else
ei_x_encode_atom (response, tag_string);
......@@ -379,16 +370,16 @@ static void prepare_tag_header (ei_x_buff * response, const char * tag_string, m
}
// prepare an attribute node
static void prepare_node_attrs(ei_x_buff * response, myhtml_tree_node_t * node)
static void prepare_node_attrs(ei_x_buff * response, lxb_dom_node_t* node)
{
myhtml_tree_attr_t * attr;
lxb_dom_attr_t *attr;
for (attr = myhtml_node_attribute_first (node); attr != NULL; attr = myhtml_attribute_next (attr))
for (attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node)); attr != NULL; attr = lxb_dom_element_next_attribute(attr))
{
size_t attr_name_len;
const char *attr_name = myhtml_attribute_key (attr, &attr_name_len);
char *attr_name = (char*) lxb_dom_attr_qualified_name(attr, &attr_name_len);
size_t attr_value_len;
const char *attr_value = myhtml_attribute_value (attr, &attr_value_len);
const char *attr_value = (char*) lxb_dom_attr_value(attr, &attr_value_len);
/* guard against poisoned attribute nodes */
if (! attr_name_len)
......@@ -441,88 +432,60 @@ static void prepare_comment (ei_x_buff * response, const char * node_comment, si
#endif
static void build_tree (ei_x_buff * response, myhtml_tree_t * tree, parse_flags_t parse_flags)
static void build_tree (ei_x_buff * response, lxb_dom_node_t* node, parse_flags_t parse_flags)
{
myhtml_tree_node_t * node = myhtml_tree_get_document (tree);
tstack stack;
tstack_init (&stack, 30);
tstack_push (&stack, node);
myhtml_tree_node_t * current_node = node->child;
lxb_dom_node_t* current_node = node->first_child;
// ok we're going to send an actual response so start encoding it
response->index = 0;
ei_x_encode_version (response);
ei_x_encode_tuple_header(response, 2);
ei_x_encode_atom(response, "myhtml_worker");
ei_x_encode_tuple_header(response, 2);
ei_x_encode_atom(response, "ok");
if (current_node == NULL) {
EMIT_EMPTY_LIST_HDR;
EMIT_LIST_TAIL;
}
while (current_node != NULL)
{
myhtml_tag_id_t tag_id = myhtml_node_tag_id (current_node);
myhtml_namespace_t tag_ns = myhtml_node_namespace (current_node);
if (tag_id == MyHTML_TAG__TEXT)
if (current_node->type == LXB_DOM_NODE_TYPE_TEXT)
{
size_t text_len;
const char * node_text = myhtml_node_text (current_node, &text_len);
const char * node_text = (char*) lxb_dom_node_text_content(current_node, &text_len);
EMIT_LIST_HDR;
ei_x_encode_binary (response, node_text, text_len);
}
else if (tag_id == MyHTML_TAG__COMMENT)
else if (current_node->type == LXB_DOM_NODE_TYPE_COMMENT)
{
size_t comment_len;
const char* node_comment = myhtml_node_text (current_node, &comment_len);
const char* node_comment = (char*) lxb_dom_node_text_content(current_node, &comment_len);
EMIT_LIST_HDR;
prepare_comment (response, node_comment, comment_len, parse_flags);
prepare_comment(response, node_comment, comment_len, parse_flags);
}
else
else if(current_node->type == LXB_DOM_NODE_TYPE_ELEMENT)
{
// get name of tag
size_t tag_name_len;
const char *tag_name = myhtml_tag_name_by_id (tree, tag_id, &tag_name_len);
// get namespace of tag
size_t tag_ns_len;
const char *tag_ns_name_ptr = myhtml_namespace_name_by_id (tag_ns, &tag_ns_len);
char buffer [tag_ns_len + tag_name_len + 2];
char *tag_string = buffer;
if (tag_ns != MyHTML_NAMESPACE_HTML)
{
// tag_ns_name_ptr is unmodifyable, copy it in our tag_ns_buffer to make it modifyable.
// +1 because myhtml uses strlen for length returned, which doesn't include the null-byte
// https://github.com/lexborisov/myhtml/blob/0ade0e564a87f46fd21693a7d8c8d1fa09ffb6b6/source/myhtml/mynamespace.c#L80
char tag_ns_buffer[tag_ns_len + 1];
strncpy (tag_ns_buffer, tag_ns_name_ptr, sizeof tag_ns_buffer);
lowercase (tag_ns_buffer);
snprintf (tag_string, sizeof buffer, "%s:%s", tag_ns_buffer, tag_name);
}
else
{
// strncpy length does not contain null, so blank the buffer before copying
// and limit the copy length to buffer size minus one for safety.
memset (tag_string, '\0', sizeof buffer);
strncpy (tag_string, tag_name, sizeof buffer - 1);
}
const char *tag_name = (char*) lxb_dom_element_qualified_name(lxb_dom_interface_element(current_node), &tag_name_len);
EMIT_LIST_HDR;
prepare_tag_header (response, tag_string, current_node, parse_flags);
prepare_tag_header (response, tag_name, current_node, parse_flags);
if (current_node->child)
if (current_node->first_child)
{
tstack_push (&stack, current_node);
current_node = current_node->child;
current_node = current_node->first_child;
continue;
}
else
{
if (parse_flags & FLAG_NIL_SELF_CLOSING && (myhtml_node_is_close_self(current_node) || myhtml_node_is_void_element(current_node)))
{
if (parse_flags & FLAG_NIL_SELF_CLOSING && lxb_html_tag_is_void(lxb_dom_node_tag_id(current_node))) {
#ifdef DEBUG_LIST_MANIP
printf ("self-closing tag %s emit nil?\n", tag_string); fflush (stdout);
#endif
......
Subproject commit 7cfc4b48aa8ffba251c249eb343aad94d16f9a59
Subproject commit fe2cf577570666d058a2b7167c26d3384a758e19
......@@ -4,13 +4,13 @@
#define GROW_BY 30
typedef struct {
myhtml_tree_node_t **data;
lxb_dom_node_t **data;
size_t used;
size_t size;
} tstack;
void tstack_init(tstack *stack, size_t initial_size) {
stack->data = (myhtml_tree_node_t **) malloc(initial_size * sizeof(myhtml_tree_node_t *));
stack->data = (lxb_dom_node_t **) malloc(initial_size * sizeof(lxb_dom_node_t *));
stack->used = 0;
stack->size = initial_size;
}
......@@ -20,18 +20,18 @@ void tstack_free(tstack *stack) {
}
void tstack_resize(tstack *stack, size_t new_size) {
stack->data = (myhtml_tree_node_t **) realloc(stack->data, new_size * sizeof(myhtml_tree_node_t *));
stack->data = (lxb_dom_node_t **) realloc(stack->data, new_size * sizeof(lxb_dom_node_t *));
stack->size = new_size;
}
void tstack_push(tstack *stack, myhtml_tree_node_t * element) {
void tstack_push(tstack *stack, lxb_dom_node_t * element) {
if(stack->used == stack->size) {
tstack_resize(stack, stack->size + GROW_BY);
}
stack->data[stack->used++] = element;
}
myhtml_tree_node_t* tstack_pop(tstack *stack) {
lxb_dom_node_t * tstack_pop(tstack *stack) {
return stack->data[--(stack->used)];
}
......
......@@ -19,13 +19,14 @@ defmodule :fast_html do
Returns a tree representation from the given html string.
`opts` is a keyword list of options, the options available:
* `timeout` - Call timeout
* `format` - Format flags for the tree
* `timeout` - Call timeout. If pooling is used and the worker doesn't return
the result in time, the worker will be killed with a warning.
* `format` - Format flags for the tree.
The following format flags are available:
* `:html_atoms` uses atoms for known html tags (faster), binaries for everything else.
* `:nil_self_closing` uses `nil` to designate self-closing tags and void elements.
* `:nil_self_closing` uses `nil` to designate void elements.
For example `<br>` is then being represented like `{"br", [], nil}`.
See http://w3c.github.io/html-reference/syntax.html#void-elements for a full list of void elements.
* `:comment_tuple3` uses 3-tuple elements for comments, instead of the default 2-tuple element.
......@@ -61,7 +62,7 @@ defmodule :fast_html do
iex> :fast_html.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3])
{:ok, [{:html, [],
[{:head, [], []},
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], nil}]}]}]}
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], []}]}]}]}
"""
@spec decode(String.t(), format: [format_flag()]) ::
......@@ -69,7 +70,8 @@ defmodule :fast_html do
def decode(bin, opts \\ []) do
flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000)
FastHtml.Cnode.call({:decode, bin, flags}, timeout)
find_and_use_port({:decode, bin, flags}, timeout, opts)
end
@doc """
......@@ -77,22 +79,69 @@ defmodule :fast_html do
`opts` is a keyword list of options, the options available are the same as in `decode/2` with addition of:
* `context` - Name of the context element, defaults to `div`
* `format` - Format flags for the tree
Example:
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl")
{:ok, [{"html", [], ["rin is the ", {"i", [], ["best"]}, " girl"]}]}
{:ok, ["rin is the ", {"i", [], ["best"]}, " girl"]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "title")
{:ok, [{"html", [], ["rin is the <i>best</i> girl"]}]}
{:ok, ["rin is the <i>best</i> girl"]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "objective_truth")
{:error, :unknown_context_tag}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", format: [:html_atoms])
{:ok, [{:html, [], ["rin is the ", {:i, [], ["best"]}, " girl"]}]}
{:ok, ["rin is the ", {:i, [], ["best"]}, " girl"]}
"""
def decode_fragment(bin, opts \\ []) do
flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000)
context = Keyword.get(opts, :context, "div")
FastHtml.Cnode.call({:decode_fragment, bin, flags, context}, timeout)
find_and_use_port({:decode_fragment, bin, flags, context}, timeout, opts)
end
@default_pool FastHtml.Pool
defp find_and_use_port(term_command, timeout, opts) do
command = :erlang.term_to_binary(term_command)
pool =
cond do
pool = Keyword.get(opts, :pool) -> pool
Application.get_env(:fast_html, :pool, enabled: true)[:enabled] -> @default_pool
true -> nil
end
execute_command_fun = fn port ->
send(port, {self(), {:command, command}})
receive do
{^port, {:data, res}} -> {:ok, res}
after
timeout ->
{:error, :timeout}
end
end
result =
if pool do
FastHtml.Pool.get_port(pool, execute_command_fun)
else
port = open_port()
result = execute_command_fun.(port)
Port.close(port)
result
end
case result do
{:ok, result} -> :erlang.binary_to_term(result)
{:error, _} = e -> e
end
end
def open_port do
Port.open({:spawn_executable, Path.join([:code.priv_dir(:fast_html), "fasthtml_worker"])}, [
:binary,
{:packet, 4},
:use_stdio,
:exit_status
])
end
end
......@@ -3,54 +3,13 @@ defmodule FastHtml.Application do
use Application
def random_sname, do: :crypto.strong_rand_bytes(4) |> Base.encode16(case: :lower)
def start(_type, _args) do
case maybe_setup_node() do
{:error, message} -> raise message
_ -> :ok
end
default_pool_config = Application.get_env(:fast_html, :pool, enabled: true)
children = if default_pool_config[:enabled], do: [FastHtml.Pool], else: []
Supervisor.start_link([{FastHtml.Cnode, Application.get_env(:fast_html, :cnode, [])}],
Supervisor.start_link(children,
strategy: :one_for_one,
name: FastHtml.Supervisor
)
end
defp maybe_setup_node() do
with {_, false} <- {:alive, Node.alive?()},
{:ok, epmd_path} <- find_epmd(),
:ok <- start_epmd(epmd_path),
{:ok, _pid} = pid_tuple <- start_node() do
pid_tuple
else
{:alive, _} ->
:ok
{:error, _} = e ->
e
end
end
defp find_epmd() do
case System.find_executable("epmd") do
nil ->
{:error,
"Could not find epmd executable. Please ensure the location it's in is present in your PATH or start epmd manually beforehand"}
executable ->
{:ok, executable}
end
end
defp start_epmd(path) do
case System.cmd(path, ["-daemon"]) do
{_result, 0} -> :ok
{_result, exit_code} -> {:error, "Could not start epmd, exit code: #{exit_code}"}
end
end
defp start_node() do
Node.start(:"master_#{random_sname()}@127.0.0.1")
end
end
defmodule FastHtml.Cnode do
@moduledoc """
Manages myhtml c-node.
## Configuration
```elixir
config :fast_html, :cnode,
sname: "myhtml_worker", # Defaults to myhtml_<random bytes>
spawn_inactive_timeout: 5000 # Defaults to 10000
```
"""
@spawn_inactive_timeout 10000
application = Mix.Project.config()[:app]
use GenServer
require Logger
@doc false
def start_link(args) do
GenServer.start_link(__MODULE__, args, name: __MODULE__)
end
@doc false
def init(args) do
exec_path = Path.join(:code.priv_dir(unquote(application)), "myhtml_worker")
sname = Keyword.get_lazy(args, :sname, &default_sname/0)
hostname = master_hostname()
addr = :"#{sname}@#{hostname}"
spawn_inactive_timeout = Keyword.get(args, :spawn_inactive_timeout, @spawn_inactive_timeout)
state = %{
exec_path: exec_path,
sname: sname,
addr: addr,
hostname: hostname,
spawn_inactive_timeout: spawn_inactive_timeout
}
connect_or_spawn_cnode(state)
end
defp default_sname, do: "myhtml_#{FastHtml.Application.random_sname()}"
defp master_sname, do: Node.self() |> to_string |> String.split("@") |> List.first()
defp master_hostname, do: Node.self() |> to_string |> String.split("@") |> List.last()
defp connect_or_spawn_cnode(state) do
case connect_cnode(state) do
{:stop, _} -> spawn_cnode(state)
{:ok, state} -> state
end
end
defp connect_cnode(%{addr: addr} = state) do
if Node.connect(addr) do
Logger.debug("connected to #{addr}")
Node.monitor(addr, true)
{:ok, state}
else
Logger.debug("connecting to #{addr} failed")
{:stop, :cnode_connection_fail}
end
end
defp spawn_cnode(%{exec_path: exec_path, sname: sname, hostname: hostname} = state) do
Logger.debug("Spawning #{sname}@#{hostname}")
cookie = :erlang.get_cookie()
port =
Port.open({:spawn_executable, exec_path}, [
:binary,
:exit_status,
:stderr_to_stdout,
line: 4096,
args: [sname, hostname, cookie, master_sname()]
])
pid = Keyword.get(Port.info(port), :os_pid)
state = Map.put(state, :pid, pid)
await_cnode_ready(port, state)
end
defp await_cnode_ready(
port,
%{spawn_inactive_timeout: timeout, addr: addr} = state
) do
ready_line = to_string(addr) <> " ready"
receive do
{^port, {:data, {:eol, ^ready_line}}} ->
connect_cnode(state)
{^port, {:data, {:eol, line}}} ->
Logger.debug("c-node is saying: #{line}")
await_cnode_ready(port, state)
{^port, {:exit_status, exit_status}} ->
Logger.debug("unexpected c-node exit: #{exit_status}")
{:stop, :cnode_unexpected_exit}
message ->
Logger.warn("unhandled message while waiting for cnode to be ready:\n#{inspect(message)}")
await_cnode_ready(port, state)
after
timeout ->
{:stop, :spawn_inactive_timeout}
end
end
@doc false
def handle_info({:nodedown, _cnode}, state) do
{:stop, :nodedown, state}
end
@doc false
def handle_info(msg, state) do
Logger.warn("unhandled handle_info: #{inspect(msg)}")
{:noreply, state}
end
@doc false
def handle_call(:addr, _from, %{addr: addr} = state) do
{:reply, addr, state}
end
@doc false
def terminate(_reason, %{pid: pid}) when pid != nil do
System.cmd("kill", ["-9", to_string(pid)])
:normal
end
@doc "Call into myhtml cnode"
def call(msg, timeout \\ 10000) do
node = GenServer.call(__MODULE__, :addr)
send({nil, node}, msg)
receive do
{:myhtml_worker, res} -> res
after
timeout -> {:error, :timeout}
end
end
end
defmodule FastHtml.Pool do
@behaviour NimblePool
@moduledoc """
"""
require Logger
@doc false
def child_spec(opts) do
%{
id: __MODULE__,
start: {__MODULE__, :start_link, [opts]},
type: :worker,
restart: :permanent
}
end
@doc """
Starts the port pool.
### Options
- `:size` - Number of ports in the pool. Defaults to `System.schedulers_online/0` if not set.
- `:name` - Registered name of the pool. Defaults to `#{__MODULE__}` if not set, set to `false` to not register the process.
"""
@type option :: {:size, pos_integer()} | {:name, atom()}
@spec start_link([option()]) :: term()
def start_link(options) do
{size, options} = Keyword.pop(options, :size, System.schedulers_online())
NimblePool.start_link(worker: {__MODULE__, options}, pool_size: size)
end
@type pool :: atom() | pid()
@type result :: {:ok, term()} | {:error, atom()}
@spec get_port(pool(), (port() -> result())) :: result()
def get_port(pool, fun) do
NimblePool.checkout!(pool, :checkout, fn _from, port ->
result = fun.(port)
client_state =
case result do
{:ok, _} ->
:ok
{:error, reason} ->
reason
end
send(port, {self(), {:connect, GenServer.whereis(pool)}})
client_state =
receive do
{^port, :connected} -> client_state
{:EXIT, ^port, reason} -> {:EXIT, reason}
end
{result, client_state}
end)
end
@impl NimblePool
@doc false
def init_pool(state) do
{name, options} =
case Keyword.pop(state, :name) do
{nil, state} -> {__MODULE__, state}
{name, state} when is_atom(name) -> {name, state}
{_, state} -> {nil, state}
end
if name, do: Process.register(self(), name)
{:ok, options}
end
@impl NimblePool
@doc false
def init_worker(pool_state) do
port = :fast_html.open_port()
{:ok, port, pool_state}
end
@impl NimblePool
@doc false
def terminate_worker({:EXIT, reason}, port, pool_state) do
Logger.warn(fn ->
"[#{__MODULE__}]: Port #{port} unexpectedly exited with reason: #{reason}"
end)
{:ok, pool_state}
end
@impl NimblePool
@doc false
def terminate_worker(_reason, port, pool_state) do
Port.close(port)
{:ok, pool_state}
end
@impl NimblePool
@doc false
def handle_checkout(:checkout, {client_pid, _}, port) do
send(port, {self(), {:connect, client_pid}})
receive do
{^port, :connected} -> {:ok, port, port}
{:EXIT, ^port, reason} -> {:remove, {:EXIT, reason}}
end
end
@impl NimblePool
@doc false
def handle_checkin(:timeout, _, _), do: {:remove, :timeout}
@impl NimblePool
@doc false
def handle_checkin(_, _, port), do: {:ok, port}
@impl NimblePool
@doc false
def handle_info({:EXIT, port, reason}, port), do: {:remove, {:EXIT, reason}}
@impl NimblePool
@doc false
def handle_info({:EXIT, _, _}, port), do: {:ok, port}
# Port sent data to the pool, this happens when the timeout was reached
# and the port got disconnected from the client, but not yet killed by the pool.
# Just discard the message.
@impl NimblePool
@doc false
def handle_info({_sending_port, {:data, _}}, port), do: {:ok, port}
end
......@@ -4,11 +4,12 @@ defmodule FastHtml.Mixfile do
def project do
[
app: :fast_html,
version: "1.0.3",
version: "2.0.0",
elixir: "~> 1.5",
deps: deps(),
package: package(),
compilers: [:fast_html_cnode_make] ++ Mix.compilers(),
compilers: [:elixir_make] ++ Mix.compilers(),
make_env: make_env(),
build_embedded: Mix.env() == :prod,
start_permanent: Mix.env() == :prod,
name: "FastHtml",
......@@ -68,9 +69,13 @@ defmodule FastHtml.Mixfile do
{:ex_doc, "~> 0.19", only: :dev},
# benchmarking helpers
{:benchee, "~> 1.0", only: :bench, optional: true},
{:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false},
{:myhtmlex, "~> 0.2.0", only: :bench, runtime: false, optional: true},
{:mochiweb, "~> 2.18", only: :bench, optional: true},
{:html5ever, "~> 0.7.0", only: :bench, optional: true}
{:html5ever,
git: "https://github.com/rusterlium/html5ever_elixir.git", only: :bench, optional: true},
{:nimble_pool, "~> 0.1"},
{:elixir_make, "~> 0.4", runtime: false}
]
end
......@@ -80,24 +85,6 @@ defmodule FastHtml.Mixfile do
extras: ["README.md"]
]
end
end
defmodule Mix.Tasks.Compile.FastHtmlCnodeMake do
@artifacts [
"priv/myhtml_worker"
]
def find_make do
_make_cmd =
System.get_env("MAKE") ||
case :os.type() do
{:unix, :freebsd} -> "gmake"
{:unix, :openbsd} -> "gmake"
{:unix, :netbsd} -> "gmake"
{:unix, :dragonfly} -> "gmake"
_ -> "make"
end
end
defp otp_version do
:erlang.system_info(:otp_release)
......@@ -109,49 +96,14 @@ defmodule Mix.Tasks.Compile.FastHtmlCnodeMake do
otp_version() >= 22
end
def run(_) do
make_cmd = find_make()
exit_code =
if match?({:win32, _}, :os.type()) do
IO.warn("Windows is not yet a target.")
1
else
{result, exit_code} =
System.cmd(
make_cmd,
@artifacts,
stderr_to_stdout: true,
env: [
{"MIX_ENV", to_string(Mix.env())},
{"OTP22_DEF", (otp_22_or_newer?() && "YES") || "NO"}
]
)
IO.binwrite(result)
exit_code
end
if exit_code == 0 do
:ok
else
{:error,
[
%Mix.Task.Compiler.Diagnostic{
compiler_name: "FastHtml Cnode",
message: "Make exited with #{exit_code}",
severity: :error,
file: nil,
position: nil
}
]}
end
end
def clean() do
make_cmd = find_make()
{result, _error_code} = System.cmd(make_cmd, ["clean"], stderr_to_stdout: true)
Mix.shell().info(result)
:ok
defp make_env do
%{
"OTP22_DEF" =>
if otp_22_or_newer?() do
"YES"
else
"NO"
end
}
end
end
%{
"benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm"},
"deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm"},
"earmark": {:hex, :earmark, "1.4.2", "3aa0bd23bc4c61cf2f1e5d752d1bb470560a6f8539974f767a38923bb20e1d7f", [:mix], [], "hexpm"},
"ex_doc": {:hex, :ex_doc, "0.21.2", "caca5bc28ed7b3bdc0b662f8afe2bee1eedb5c3cf7b322feeeb7c6ebbde089d6", [:mix], [{:earmark, "~> 1.3.3 or ~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
"html5ever": {:hex, :html5ever, "0.7.0", "9f63ec1c783b2dc9f326840fcc993c01e926dbdef4e51ba1bbe5355993c258b4", [:mix], [{:rustler, "~> 0.18.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm"},
"makeup": {:hex, :makeup, "1.0.0", "671df94cf5a594b739ce03b0d0316aa64312cee2574b6a44becb83cd90fb05dc", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
"makeup_elixir": {:hex, :makeup_elixir, "0.14.0", "cf8b7c66ad1cff4c14679698d532f0b5d45a3968ffbcbfd590339cb57742f1ae", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
"mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm"},
"benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm", "3ad58ae787e9c7c94dd7ceda3b587ec2c64604563e049b2a0e8baafae832addb"},
"deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"},
"dialyxir": {:hex, :dialyxir, "1.0.0", "6a1fa629f7881a9f5aaf3a78f094b2a51a0357c843871b8bc98824e7342d00a5", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "aeb06588145fac14ca08d8061a142d52753dbc2cf7f0d00fc1013f53f8654654"},
"earmark": {:hex, :earmark, "1.4.2", "3aa0bd23bc4c61cf2f1e5d752d1bb470560a6f8539974f767a38923bb20e1d7f", [:mix], [], "hexpm", "5e8806285d8a3a8999bd38e4a73c58d28534c856bc38c44818e5ba85bbda16fb"},
"elixir_make": {:hex, :elixir_make, "0.6.0", "38349f3e29aff4864352084fc736fa7fa0f2995a819a737554f7ebd28b85aaab", [:mix], [], "hexpm", "d522695b93b7f0b4c0fcb2dfe73a6b905b1c301226a5a55cb42e5b14d509e050"},
"erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
"ex_doc": {:hex, :ex_doc, "0.21.2", "caca5bc28ed7b3bdc0b662f8afe2bee1eedb5c3cf7b322feeeb7c6ebbde089d6", [:mix], [{:earmark, "~> 1.3.3 or ~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "f1155337ae17ff7a1255217b4c1ceefcd1860b7ceb1a1874031e7a861b052e39"},
"html5ever": {:git, "https://github.com/rusterlium/html5ever_elixir.git", "f6743865c353aaebaec1959ae4025596f8344587", []},
"makeup": {:hex, :makeup, "1.0.0", "671df94cf5a594b739ce03b0d0316aa64312cee2574b6a44becb83cd90fb05dc", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "a10c6eb62cca416019663129699769f0c2ccf39428b3bb3c0cb38c718a0c186d"},
"makeup_elixir": {:hex, :makeup_elixir, "0.14.0", "cf8b7c66ad1cff4c14679698d532f0b5d45a3968ffbcbfd590339cb57742f1ae", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "d4b316c7222a85bbaa2fd7c6e90e37e953257ad196dc229505137c5e505e9eff"},
"mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm", "b93e2b1e564bdbadfecc297277f9e6d0902da645b417d6c9210f6038ac63489a"},
"myhtml": {:git, "https://github.com/lexborisov/myhtml.git", "fe2cf577570666d058a2b7167c26d3384a758e19", [branch: "master"]},
"myhtmlex": {:hex, :myhtmlex, "0.2.1", "d6f3eb1826f7cdaa0225a996569da0930d1a334405510845c905ae59295ab226", [:make, :mix], [{:nodex, "~> 0.1.1", [hex: :nodex, repo: "hexpm", optional: false]}], "hexpm"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.1", "c90796ecee0289dbb5ad16d3ad06f957b0cd1199769641c961cfe0b97db190e0", [:mix], [], "hexpm"},
"nodex": {:hex, :nodex, "0.1.1", "ed2f7bbe19ea62a43ad4b7ad332eb3f9ca12c64a35a5802a0eb545b93ebe32af", [:mix], [], "hexpm"},
"rustler": {:hex, :rustler, "0.18.0", "db4bd0c613d83a1badc31be90ddada6f9821de29e4afd15c53a5da61882e4f2d", [:mix], [], "hexpm"},
"myhtmlex": {:hex, :myhtmlex, "0.2.1", "d6f3eb1826f7cdaa0225a996569da0930d1a334405510845c905ae59295ab226", [:make, :mix], [{:nodex, "~> 0.1.1", [hex: :nodex, repo: "hexpm", optional: false]}], "hexpm", "1c22e79731cc492365279d111fcaf7f4f17ca845d2b9660d2285b91c56cbd1c2"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.1", "c90796ecee0289dbb5ad16d3ad06f957b0cd1199769641c961cfe0b97db190e0", [:mix], [], "hexpm", "00e3ebdc821fb3a36957320d49e8f4bfa310d73ea31c90e5f925dc75e030da8f"},
"nimble_pool": {:hex, :nimble_pool, "0.1.0", "ffa9d5be27eee2b00b0c634eb649aa27f97b39186fec3c493716c2a33e784ec6", [:mix], [], "hexpm", "343a1eaa620ddcf3430a83f39f2af499fe2370390d4f785cd475b4df5acaf3f9"},
"nodex": {:hex, :nodex, "0.1.1", "ed2f7bbe19ea62a43ad4b7ad332eb3f9ca12c64a35a5802a0eb545b93ebe32af", [:mix], [], "hexpm", "0e6804b0138a71e33e99a2e6423019db01de34fe341f765220bce38830779548"},
"rustler": {:hex, :rustler, "0.21.1", "5299980be32da997c54382e945bacaa015ed97a60745e1e639beaf6a7b278c65", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6ee1651e10645b2b2f3bb70502bf180341aa058709177e9bc28c105934094bc6"},
"toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm", "f1e3dabef71fb510d015fad18c0e05e7c57281001141504c6b69d94e99750a07"},
}
......@@ -43,7 +43,7 @@ defmodule :fast_html_test do
{"body", [],
[
{"br", [], nil},
{"esi:include", [], nil}
{"esi:include", [], []}
]}
]}
]} = :fast_html.decode("<br><esi:include />", format: [:nil_self_closing])
......@@ -125,24 +125,6 @@ defmodule :fast_html_test do
]} = :fast_html.decode(~s'<a <> asdf', format: [:html_atoms])
end
test "namespaced tags" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{"svg:svg", [],
[
{"svg:path", [], []},
{"svg:a", [], []}
]}
]}
]}
]} = :fast_html.decode(~s'<svg><path></path><a></a></svg>', format: [:html_atoms])
end
test "custom namespaced tags" do
assert {:ok,
[
......@@ -151,7 +133,7 @@ defmodule :fast_html_test do
{:head, [], []},
{:body, [],
[
{"esi:include", [], nil}
{"esi:include", [], []}
]}
]}
]} = :fast_html.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing])
......@@ -178,6 +160,6 @@ defmodule :fast_html_test do
test "doesn't return attribute name in attribute value when the latter is empty" do
assert :fast_html.decode_fragment("<meta content=\"\"/>") ==
{:ok, [{"html", [], [{"meta", [{"content", ""}], []}]}]}
{:ok, [{"meta", [{"content", ""}], []}]}
end
end