Commit 73338a46 authored by rinpatch's avatar rinpatch

Merge branch 'chore/rebrand' into 'master'

dirty rebrand

See merge request pleroma/myhtmlex!6
parents 564532c0 e0c1914a
Pipeline #19613 passed with stage
in 54 seconds
MIX = mix
MYHTMLEX_CFLAGS = -g -O2 -std=c99 -pedantic -Wcomment -Wextra -Wno-old-style-declaration -Wall
MYHTML_WORKER_CFLAGS = -g -O2 -std=c99 -pedantic -Wcomment -Wextra -Wno-old-style-declaration -Wall
# we need to compile position independent code
MYHTMLEX_CFLAGS += -fpic -DPIC
MYHTML_WORKER_CFLAGS += -fpic -DPIC
# For some reason __erl_errno is undefined unless _REENTRANT is defined
MYHTMLEX_CFLAGS += -D_REENTRANT
MYHTML_WORKER_CFLAGS += -D_REENTRANT
# myhtmlex is using stpcpy, as defined in gnu string.h
# MYHTMLEX_CFLAGS += -D_GNU_SOURCE
# MYHTML_WORKER_CFLAGS += -D_GNU_SOURCE
# base on the same posix c source as myhtml
# MYHTMLEX_CFLAGS += -D_POSIX_C_SOURCE=199309
# MYHTML_WORKER_CFLAGS += -D_POSIX_C_SOURCE=199309
# turn warnings into errors
# MYHTMLEX_CFLAGS += -Werror
# MYHTML_WORKER_CFLAGS += -Werror
# ignore unused variables
# MYHTMLEX_CFLAGS += -Wno-unused-variable
# MYHTML_WORKER_CFLAGS += -Wno-unused-variable
# ignore unused parameter warnings
MYHTMLEX_CFLAGS += -Wno-unused-parameter
MYHTML_WORKER_CFLAGS += -Wno-unused-parameter
# set erlang include path
ERLANG_PATH = $(shell erl -eval 'io:format("~s", [lists:concat([code:root_dir(), "/erts-", erlang:system_info(version)])])' -s init stop -noshell)
MYHTMLEX_CFLAGS += -I$(ERLANG_PATH)/include
MYHTML_WORKER_CFLAGS += -I$(ERLANG_PATH)/include
# expecting myhtml as a submodule in c_src/
# that way we can pin a version and package the whole thing in hex
# hex does not allow for non-app related dependencies.
MYHTML_PATH = c_src/myhtml
MYHTML_STATIC = $(MYHTML_PATH)/lib/libmyhtml_static.a
MYHTMLEX_CFLAGS += -I$(MYHTML_PATH)/include
MYHTML_WORKER_CFLAGS += -I$(MYHTML_PATH)/include
# avoid undefined reference errors to phtread_mutex_trylock
MYHTMLEX_CFLAGS += -lpthread
MYHTML_WORKER_CFLAGS += -lpthread
# that would be used for a dynamically linked build
# MYHTMLEX_CFLAGS += -L$(MYHTML_PATH)/lib
# MYHTML_WORKER_CFLAGS += -L$(MYHTML_PATH)/lib
MYHTMLEX_LDFLAGS = -shared
MYHTML_WORKER_LDFLAGS = -shared
# C-Node
ERL_INTERFACE = $(wildcard $(ERLANG_PATH)/../lib/erl_interface-*)
CNODE_CFLAGS = $(MYHTMLEX_CFLAGS)
CNODE_CFLAGS = $(MYHTML_WORKER_CFLAGS)
CNODE_CFLAGS += -L$(ERL_INTERFACE)/lib
CNODE_CFLAGS += -I$(ERL_INTERFACE)/include
......@@ -55,12 +55,12 @@ BUILD_TESTS := $(patsubst %.dockerfile, %.dockerfile.PHONY, $(wildcard ./build-t
# platform specific environment
UNAME = $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
MYHTMLEX_LDFLAGS += -dynamiclib -undefined dynamic_lookup
MYHTML_WORKER_LDFLAGS += -dynamiclib -undefined dynamic_lookup
else
# myhtmlex is using stpcpy, as defined in gnu string.h
MYHTMLEX_CFLAGS += -D_GNU_SOURCE
MYHTML_WORKER_CFLAGS += -D_GNU_SOURCE
# base on the same posix c source as myhtml
# MYHTMLEX_CFLAGS += -D_POSIX_C_SOURCE=199309
# MYHTML_WORKER_CFLAGS += -D_POSIX_C_SOURCE=199309
endif
.PHONY: all
......
# Myhtmlex
# FastHTML
Bindings for lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
A C Node wrapping lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
Primarily used with [FastSanitize](https://git.pleroma.social/pleroma/fast_sanitize).
* Available as a hex package: `{:myhtmlex, "~> 0.2.0"}`
* [Documentation](https://hexdocs.pm/myhtmlex/Myhtmlex.html)
* Available as a hex package: `{:fast_html, "~> 0.1.0"}`
* [Documentation](https://hexdocs.pm/fast_html/FastHTML.html)
## Example
iex> Myhtmlex.decode("<h1>Hello world</h1>")
iex> :fast_html.decode("<h1>Hello world</h1>")
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}
Benchmark results (removed Nif calling mode) on various file sizes on a 2,5Ghz Core i7:
......@@ -31,17 +32,4 @@ Bindings for lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
## Contribution / Bug Reports
* Please make sure you do `git submodule update` after a checkout/pull
* If you have problems building the project, please consider adding a Dockerfile to `build-tests/` to replicate the build error
* The project aims to be fully tested
## Roadmap
The exposed functions on `Myhtmlex` are not subject to change.
This project is under active development.
* [ ] Expose node-retrieval functions
* [x] Parse a HTML-document into a tree
* [x] Investigate safety and calling options
* [x] Call as dirty-nif
* [x] Call as C-Node (check branch `c-node`)
defmodule BasicHtmlBench do
use Benchfella
setup_all do
html = File.read!("bench/w3c_html5.html")
context = {html, Myhtmlex.open(html)}
{:ok, context}
end
bench "decode" do
{html, _} = bench_context
Myhtmlex.decode(html)
:fast_html.decode(html)
end
bench "decode w/ html_atoms" do
{html, _} = bench_context
Myhtmlex.decode(html, format: [:html_atoms])
:fast_html.decode(html, format: [:html_atoms])
end
bench "decode w/ nil_self_closing" do
{html, _} = bench_context
Myhtmlex.decode(html, format: [:nil_self_closing])
:fast_html.decode(html, format: [:nil_self_closing])
end
bench "decode w/ html_atoms, nil_self_closing" do
{html, _} = bench_context
Myhtmlex.decode(html, format: [:html_atoms, :nil_self_closing])
end
bench "decode_tree" do
{_, ref} = bench_context
Myhtmlex.decode_tree(ref)
:fast_html.decode(html, format: [:html_atoms, :nil_self_closing])
end
bench "decode_tree w/ html_atoms" do
{_, ref} = bench_context
Myhtmlex.decode_tree(ref, format: [:html_atoms])
end
end
......@@ -14,17 +14,17 @@ defmodule CnodeFileSizesBench do
bench "github_trending_js.html 341k" do
{ref, _, _} = bench_context
Myhtmlex.Safe.decode(ref)
:fast_html.decode(ref)
end
bench "w3c_html5.html 131k" do
{_, ref, _} = bench_context
Myhtmlex.Safe.decode(ref)
:fast_html.decode(ref)
end
bench "wikipedia_hyperlink.html 97k" do
{_, _, ref} = bench_context
Myhtmlex.Safe.decode(ref)
:fast_html.decode(ref)
end
end
......
defmodule FileSizesBench do
use Benchfella
setup_all do
refs = {
File.read!("bench/github_trending_js.html") |> Myhtmlex.open,
File.read!("bench/w3c_html5.html") |> Myhtmlex.open,
File.read!("bench/wikipedia_hyperlink.html") |> Myhtmlex.open
}
{:ok, refs}
end
bench "github_trending_js.html 341k" do
{ref, _, _} = bench_context
Myhtmlex.decode_tree(ref)
end
bench "w3c_html5.html 131k" do
{_, ref, _} = bench_context
Myhtmlex.decode_tree(ref)
end
bench "wikipedia_hyperlink.html 97k" do
{_, _, ref} = bench_context
Myhtmlex.decode_tree(ref)
end
end
#!/bin/sh
start_dir=`pwd`
abort()
{
cd $start_dir
exit 1
}
trap 'abort' 0
set -e
make clean
test -d package-test || mkdir package-test
rm -rf package-test/*
mkdir package-test/myhtmlex-local
mix hex.build --unpack -o package-test/myhtmlex-local
cd package-test
mix new myhtmlex_pkg_test
cd myhtmlex_pkg_test
# Default operation
sed -i -e 's/^.*dep_from_hexpm.*$/ {:myhtmlex, path: "..\/myhtmlex-local"}/' mix.exs
mix deps.get
mix compile
mix run -e 'IO.inspect {"html", [], [{"head", [], []}, {"body", [], ["foo"]}]} = Myhtmlex.decode("foo")'
# Nif operation
sed -i -e 's/^.*myhtmlex-local.*$/ {:myhtmlex, path: "..\/myhtmlex-local", runtime: false}/' mix.exs
echo "config :myhtmlex, mode: Myhtmlex.Nif" >> config/config.exs
mix run -e 'IO.inspect {"html", [], [{"head", [], []}, {"body", [], ["foo"]}]} = Myhtmlex.decode("foo")'
trap : 0
cd $start_dir
echo "ok"
FROM ubuntu:trusty
RUN mkdir myhtmlex
WORKDIR myhtmlex
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update \
&& apt-get install -y curl \
&& curl -LO https://packages.erlang-solutions.com/erlang-solutions_1.0_all.deb \
&& dpkg -i erlang-solutions_1.0_all.deb \
&& apt-get update \
&& apt-get install -y \
git \
esl-erlang \
elixir \
build-essential \
&& mix local.hex --force \
&& echo 'LANG=en_US.UTF-8' > /etc/default/locale \
&& echo 'LANGUAGE=en_US' >> /etc/default/locale
COPY . ./
# Test build
RUN mix deps.get \
&& make \
&& mix test \
&& mix bench \
&& make clean
# Test that it works as a dependency
RUN build-test/package.sh
FROM ubuntu:xenial
RUN mkdir myhtmlex
WORKDIR myhtmlex
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update \
&& apt-get install -y curl \
&& curl -LO https://packages.erlang-solutions.com/erlang-solutions_1.0_all.deb \
&& dpkg -i erlang-solutions_1.0_all.deb \
&& apt-get update \
&& apt-get install -y \
git \
esl-erlang \
elixir \
build-essential \
&& mix local.hex --force \
&& echo 'LANG=en_US.UTF-8' > /etc/default/locale \
&& echo 'LANGUAGE=en_US' >> /etc/default/locale
COPY . ./
# Test build
RUN mix deps.get \
&& make \
&& mix test \
&& mix bench \
&& make clean
# Test that it works as a dependency
RUN build-test/package.sh
defmodule Myhtmlex do
defmodule :fast_html do
@moduledoc """
A module to decode html into a tree structure.
......@@ -7,7 +7,7 @@ defmodule Myhtmlex do
## Example
iex> Myhtmlex.decode("<h1>Hello world</h1>")
iex> :fast_html.decode("<h1>Hello world</h1>")
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}
Benchmark results (removed Nif calling mode) on various file sizes on a 2,5Ghz Core i7:
......@@ -46,18 +46,18 @@ defmodule Myhtmlex do
## Examples
iex> Myhtmlex.decode("<h1>Hello world</h1>")
iex> :fast_html.decode("<h1>Hello world</h1>")
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}
iex> Myhtmlex.decode("<span class='hello'>Hi there</span>")
iex> :fast_html.decode("<span class='hello'>Hi there</span>")
{"html", [],
[{"head", [], []},
{"body", [], [{"span", [{"class", "hello"}], ["Hi there"]}]}]}
iex> Myhtmlex.decode("<body><!-- a comment --!></body>")
iex> :fast_html.decode("<body><!-- a comment --!></body>")
{"html", [], [{"head", [], []}, {"body", [], [comment: " a comment "]}]}
iex> Myhtmlex.decode("<br>")
iex> :fast_html.decode("<br>")
{"html", [], [{"head", [], []}, {"body", [], [{"br", [], []}]}]}
"""
@spec decode(String.t()) :: tree()
......@@ -78,17 +78,17 @@ defmodule Myhtmlex do
## Examples
iex> Myhtmlex.decode("<h1>Hello world</h1>", format: [:html_atoms])
iex> :fast_html.decode("<h1>Hello world</h1>", format: [:html_atoms])
{:html, [], [{:head, [], []}, {:body, [], [{:h1, [], ["Hello world"]}]}]}
iex> Myhtmlex.decode("<br>", format: [:nil_self_closing])
iex> :fast_html.decode("<br>", format: [:nil_self_closing])
{"html", [], [{"head", [], []}, {"body", [], [{"br", [], nil}]}]}
iex> Myhtmlex.decode("<body><!-- a comment --!></body>", format: [:comment_tuple3])
iex> :fast_html.decode("<body><!-- a comment --!></body>", format: [:comment_tuple3])
{"html", [], [{"head", [], []}, {"body", [], [{:comment, [], " a comment "}]}]}
iex> html = "<body><!-- a comment --!><unknown /></body>"
iex> Myhtmlex.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3])
iex> :fast_html.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3])
{:html, [],
[{:head, [], []},
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], nil}]}]}
......
defmodule Myhtmlex.Mixfile do
defmodule FastHTML.Mixfile do
use Mix.Project
def project do
[
app: :myhtmlex,
app: :fast_html,
version: "0.2.1",
elixir: "~> 1.5",
deps: deps(),
package: package(),
compilers: [:myhtmlex_make] ++ Mix.compilers(),
compilers: [:my_html_worker_make] ++ Mix.compilers(),
build_embedded: Mix.env() == :prod,
start_permanent: Mix.env() == :prod,
name: "Myhtmlex",
name: "FastHTML",
description: """
A module to decode HTML into a tree,
porting all properties of the underlying
library myhtml, being fast and correct
in regards to the html spec.
Originally based on Myhtmlex.
""",
docs: docs()
]
......@@ -24,11 +26,11 @@ defmodule Myhtmlex.Mixfile do
def package do
[
maintainers: ["Lukas Rieder"],
maintainers: ["Ariadne Conill"],
licenses: ["GNU LGPL"],
links: %{
"Github" => "https://git.pleroma.social/pleroma/myhtmlex",
"Issues" => "https://git.pleroma.social/pleroma/myhtmlex/issues",
"GitLab" => "https://git.pleroma.social/pleroma/fast_html",
"Issues" => "https://git.pleroma.social/pleroma/fast_html/issues",
"MyHTML" => "https://github.com/lexborisov/myhtml"
},
files: [
......@@ -71,12 +73,12 @@ defmodule Myhtmlex.Mixfile do
defp docs do
[
main: "Myhtmlex"
main: "FastHTML"
]
end
end
defmodule Mix.Tasks.Compile.MyhtmlexMake do
defmodule Mix.Tasks.Compile.MyHtmlWorkerMake do
@artifacts [
"priv/myhtml_worker"
]
......
defmodule Myhtmlex.SafeTest do
defmodule :fast_html_test do
use ExUnit.Case
doctest Myhtmlex
doctest :fast_html
test "doesn't segfault when <!----> is encountered" do
assert {"html", _attrs, _children} = Myhtmlex.decode("<div> <!----> </div>")
assert {"html", _attrs, _children} = :fast_html.decode("<div> <!----> </div>")
end
test "builds a tree, formatted like mochiweb by default" do
......@@ -14,7 +14,7 @@ defmodule Myhtmlex.SafeTest do
[
{"br", [], []}
]}
]} = Myhtmlex.decode("<br>")
]} = :fast_html.decode("<br>")
end
test "builds a tree, html tags as atoms" do
......@@ -25,7 +25,7 @@ defmodule Myhtmlex.SafeTest do
[
{:br, [], []}
]}
]} = Myhtmlex.decode("<br>", format: [:html_atoms])
]} = :fast_html.decode("<br>", format: [:html_atoms])
end
test "builds a tree, nil self closing" do
......@@ -37,7 +37,7 @@ defmodule Myhtmlex.SafeTest do
{"br", [], nil},
{"esi:include", [], nil}
]}
]} = Myhtmlex.decode("<br><esi:include />", format: [:nil_self_closing])
]} = :fast_html.decode("<br><esi:include />", format: [:nil_self_closing])
end
test "builds a tree, multiple format options" do
......@@ -48,7 +48,7 @@ defmodule Myhtmlex.SafeTest do
[
{:br, [], nil}
]}
]} = Myhtmlex.decode("<br>", format: [:html_atoms, :nil_self_closing])
]} = :fast_html.decode("<br>", format: [:html_atoms, :nil_self_closing])
end
test "attributes" do
......@@ -60,7 +60,7 @@ defmodule Myhtmlex.SafeTest do
{:span, [{"id", "test"}, {"class", "foo garble"}], []}
]}
]} =
Myhtmlex.decode(~s'<span id="test" class="foo garble"></span>',
:fast_html.decode(~s'<span id="test" class="foo garble"></span>',
format: [:html_atoms]
)
end
......@@ -74,7 +74,7 @@ defmodule Myhtmlex.SafeTest do
{:button, [{"disabled", "disabled"}, {"class", "foo garble"}], []}
]}
]} =
Myhtmlex.decode(~s'<button disabled class="foo garble"></span>',
:fast_html.decode(~s'<button disabled class="foo garble"></span>',
format: [:html_atoms]
)
end
......@@ -87,7 +87,7 @@ defmodule Myhtmlex.SafeTest do
[
"text node"
]}
]} = Myhtmlex.decode(~s'<body>text node</body>', format: [:html_atoms])
]} = :fast_html.decode(~s'<body>text node</body>', format: [:html_atoms])
end
test "broken input" do
......@@ -98,7 +98,7 @@ defmodule Myhtmlex.SafeTest do
[
{:a, [{"<", "<"}], [" asdf"]}
]}
]} = Myhtmlex.decode(~s'<a <> asdf', format: [:html_atoms])
]} = :fast_html.decode(~s'<a <> asdf', format: [:html_atoms])
end
test "namespaced tags" do
......@@ -113,7 +113,7 @@ defmodule Myhtmlex.SafeTest do
{"svg:a", [], []}
]}
]}
]} = Myhtmlex.decode(~s'<svg><path></path><a></a></svg>', format: [:html_atoms])
]} = :fast_html.decode(~s'<svg><path></path><a></a></svg>', format: [:html_atoms])
end
test "custom namespaced tags" do
......@@ -124,7 +124,7 @@ defmodule Myhtmlex.SafeTest do
[
{"esi:include", [], nil}
]}
]} = Myhtmlex.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing])
]} = :fast_html.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing])
end
test "html comments" do
......@@ -135,6 +135,6 @@ defmodule Myhtmlex.SafeTest do
[
comment: " a comment "
]}
]} = Myhtmlex.decode(~s'<body><!-- a comment --></body>', format: [:html_atoms])
]} = :fast_html.decode(~s'<body><!-- a comment --></body>', format: [:html_atoms])
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment