...
 
Commits (3)
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [1.0.0] - 2019-12-02
### Changed
- **BREAKING:** `:fast_html.decode` now returns an array of nodes at the top level, instead of a single node. This was done because it's possible to have more than one root node, for example in (`<!-- a comment --> <html> </html>` both the comment and the `html` tag are root nodes).
### Fixed
- Worker going into infinite loop when decoding a document with more than one root node.
......@@ -57,7 +57,7 @@ static void err_term(ei_x_buff * response, const char * error_atom);
static parse_flags_t decode_parse_flags(state_t * state, int arity);
static void decode(state_t * state, ei_x_buff * response, const char * bin_data, size_t bin_size, parse_flags_t parse_flags);
static void build_tree(ei_x_buff * response, myhtml_tree_t * tree, myhtml_tree_node_t * node, parse_flags_t parse_flags);
static void build_tree(ei_x_buff * response, myhtml_tree_t * tree, parse_flags_t parse_flags);
static void prepare_node_attrs(ei_x_buff * response, myhtml_tree_node_t * node);
static inline char * lowercase(char * c);
......@@ -298,8 +298,7 @@ static void decode (state_t * state, ei_x_buff * response, const char * bin_data
}
// build tree
myhtml_tree_node_t * root = myhtml_tree_get_document (tree);
build_tree (response, tree, root->child, parse_flags);
build_tree (response, tree, parse_flags);
myhtml_tree_destroy (tree);
}
......@@ -389,12 +388,15 @@ static void prepare_comment (ei_x_buff * response, const char * node_comment, si
#endif
static void build_tree (ei_x_buff * response, myhtml_tree_t * tree, myhtml_tree_node_t * node, parse_flags_t parse_flags)
static void build_tree (ei_x_buff * response, myhtml_tree_t * tree, parse_flags_t parse_flags)
{
myhtml_tree_node_t * current_node = node;
myhtml_tree_node_t * node = myhtml_tree_get_document (tree);
tstack stack;
tstack_init (&stack, 30);
tstack_push (&stack, node);
myhtml_tree_node_t * current_node = node->child;
// ok we're going to send an actual response so start encoding it
response->index = 0;
......@@ -411,7 +413,6 @@ static void build_tree (ei_x_buff * response, myhtml_tree_t * tree, myhtml_tree_
{
size_t text_len;
const char * node_text = myhtml_node_text (current_node, &text_len);
EMIT_LIST_HDR;
ei_x_encode_binary (response, node_text, text_len);
}
......@@ -453,11 +454,7 @@ static void build_tree (ei_x_buff * response, myhtml_tree_t * tree, myhtml_tree_
strncpy (tag_string, tag_name, sizeof buffer - 1);
}
if (stack.used > 0)
{
EMIT_LIST_HDR;
}
EMIT_LIST_HDR;
prepare_tag_header (response, tag_string, current_node, parse_flags);
if (current_node->child)
......
......@@ -32,36 +32,36 @@ defmodule :fast_html do
## Examples
iex> :fast_html.decode("<h1>Hello world</h1>")
{:ok, {"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}}
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}]}
iex> :fast_html.decode("Hello world", timeout: 0)
{:error, :timeout}
iex> :fast_html.decode("<span class='hello'>Hi there</span>")
{:ok, {"html", [],
{:ok, [{"html", [],
[{"head", [], []},
{"body", [], [{"span", [{"class", "hello"}], ["Hi there"]}]}]}}
{"body", [], [{"span", [{"class", "hello"}], ["Hi there"]}]}]}]}
iex> :fast_html.decode("<body><!-- a comment --!></body>")
{:ok, {"html", [], [{"head", [], []}, {"body", [], [comment: " a comment "]}]}}
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [comment: " a comment "]}]}]}
iex> :fast_html.decode("<br>")
{:ok, {"html", [], [{"head", [], []}, {"body", [], [{"br", [], []}]}]}}
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"br", [], []}]}]}]}
iex> :fast_html.decode("<h1>Hello world</h1>", format: [:html_atoms])
{:ok, {:html, [], [{:head, [], []}, {:body, [], [{:h1, [], ["Hello world"]}]}]}}
{:ok, [{:html, [], [{:head, [], []}, {:body, [], [{:h1, [], ["Hello world"]}]}]}]}
iex> :fast_html.decode("<br>", format: [:nil_self_closing])
{:ok, {"html", [], [{"head", [], []}, {"body", [], [{"br", [], nil}]}]}}
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"br", [], nil}]}]}]}
iex> :fast_html.decode("<body><!-- a comment --!></body>", format: [:comment_tuple3])
{:ok, {"html", [], [{"head", [], []}, {"body", [], [{:comment, [], " a comment "}]}]}}
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{:comment, [], " a comment "}]}]}]}
iex> html = "<body><!-- a comment --!><unknown /></body>"
iex> :fast_html.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3])
{:ok, {:html, [],
{:ok, [{:html, [],
[{:head, [], []},
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], nil}]}]}}
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], nil}]}]}]}
"""
@spec decode(String.t(), format: [format_flag()]) ::
......
......@@ -4,7 +4,7 @@ defmodule FastHtml.Mixfile do
def project do
[
app: :fast_html,
version: "0.99.4",
version: "1.0.0",
elixir: "~> 1.5",
deps: deps(),
package: package(),
......
......@@ -3,68 +3,78 @@ defmodule :fast_html_test do
doctest :fast_html
test "doesn't segfault when <!----> is encountered" do
assert {:ok, {"html", _attrs, _children}} = :fast_html.decode("<div> <!----> </div>")
assert {:ok, [{"html", _attrs, _children}]} = :fast_html.decode("<div> <!----> </div>")
end
test "builds a tree, formatted like mochiweb by default" do
assert {:ok,
{"html", [],
[
{"head", [], []},
{"body", [],
[
{"br", [], []}
]}
]}} = :fast_html.decode("<br>")
[
{"html", [],
[
{"head", [], []},
{"body", [],
[
{"br", [], []}
]}
]}
]} = :fast_html.decode("<br>")
end
test "builds a tree, html tags as atoms" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:br, [], []}
]}
]}} = :fast_html.decode("<br>", format: [:html_atoms])
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:br, [], []}
]}
]}
]} = :fast_html.decode("<br>", format: [:html_atoms])
end
test "builds a tree, nil self closing" do
assert {:ok,
{"html", [],
[
{"head", [], []},
{"body", [],
[
{"br", [], nil},
{"esi:include", [], nil}
]}
]}} = :fast_html.decode("<br><esi:include />", format: [:nil_self_closing])
[
{"html", [],
[
{"head", [], []},
{"body", [],
[
{"br", [], nil},
{"esi:include", [], nil}
]}
]}
]} = :fast_html.decode("<br><esi:include />", format: [:nil_self_closing])
end
test "builds a tree, multiple format options" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:br, [], nil}
]}
]}} = :fast_html.decode("<br>", format: [:html_atoms, :nil_self_closing])
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:br, [], nil}
]}
]}
]} = :fast_html.decode("<br>", format: [:html_atoms, :nil_self_closing])
end
test "attributes" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:span, [{"id", "test"}, {"class", "foo garble"}], []}
]}
]}} =
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:span, [{"id", "test"}, {"class", "foo garble"}], []}
]}
]}
]} =
:fast_html.decode(~s'<span id="test" class="foo garble"></span>',
format: [:html_atoms]
)
......@@ -72,14 +82,16 @@ defmodule :fast_html_test do
test "single attributes" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:button, [{"disabled", "disabled"}, {"class", "foo garble"}], []}
]}
]}} =
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:button, [{"disabled", "disabled"}, {"class", "foo garble"}], []}
]}
]}
]} =
:fast_html.decode(~s'<button disabled class="foo garble"></span>',
format: [:html_atoms]
)
......@@ -87,66 +99,80 @@ defmodule :fast_html_test do
test "text nodes" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
"text node"
]}
]}} = :fast_html.decode(~s'<body>text node</body>', format: [:html_atoms])
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
"text node"
]}
]}
]} = :fast_html.decode(~s'<body>text node</body>', format: [:html_atoms])
end
test "broken input" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:a, [{"<", "<"}], [" asdf"]}
]}
]}} = :fast_html.decode(~s'<a <> asdf', format: [:html_atoms])
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:a, [{"<", "<"}], [" asdf"]}
]}
]}
]} = :fast_html.decode(~s'<a <> asdf', format: [:html_atoms])
end
test "namespaced tags" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
{"svg:svg", [],
[
{"svg:path", [], []},
{"svg:a", [], []}
]}
]}
]}} = :fast_html.decode(~s'<svg><path></path><a></a></svg>', format: [:html_atoms])
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{"svg:svg", [],
[
{"svg:path", [], []},
{"svg:a", [], []}
]}
]}
]}
]} = :fast_html.decode(~s'<svg><path></path><a></a></svg>', format: [:html_atoms])
end
test "custom namespaced tags" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
{"esi:include", [], nil}
]}
]}} =
:fast_html.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing])
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{"esi:include", [], nil}
]}
]}
]} = :fast_html.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing])
end
test "html comments" do
assert {:ok,
{:html, [],
[
{:head, [], []},
{:body, [],
[
comment: " a comment "
]}
]}} = :fast_html.decode(~s'<body><!-- a comment --></body>', format: [:html_atoms])
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
comment: " a comment "
]}
]}
]} = :fast_html.decode(~s'<body><!-- a comment --></body>', format: [:html_atoms])
end
test "doesn't go into an infinite loop when there are more than one root tags" do
assert {:ok, [{:comment, " a comment "}, {"html", [], [{"head", [], []}, {"body", [], []}]}]} ==
:fast_html.decode("<!-- a comment --> <html> </html>")
end
end