Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Pleroma
pleroma
Commits
47213221
Commit
47213221
authored
Feb 16, 2020
by
rinpatch
Browse files
Use floki's new APIs for parsing fragments
parent
0b5a2bbe
Pipeline
#22902
passed with stages
in 5 minutes and 37 seconds
Changes
8
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
lib/pleroma/html.ex
View file @
47213221
...
...
@@ -108,6 +108,7 @@ def extract_first_external_url(object, content) do
Cachex
.
fetch!
(
:scrubber_cache
,
key
,
fn
_key
->
result
=
content
|>
Floki
.
parse_fragment!
()
|>
Floki
.
filter_out
(
"a.mention,a.hashtag,a[rel~=
\"
tag
\"
]"
)
|>
Floki
.
attribute
(
"a"
,
"href"
)
|>
Enum
.
at
(
0
)
...
...
lib/pleroma/web/activity_pub/mrf/anti_link_spam_policy.ex
View file @
47213221
...
...
@@ -17,6 +17,7 @@ defp old_user?(%User{} = u) do
# does the post contain links?
defp
contains_links?
(%{
"content"
=>
content
}
=
_object
)
do
content
|>
Floki
.
parse_fragment!
()
|>
Floki
.
filter_out
(
"a.mention,a.hashtag,a[rel~=
\"
tag
\"
],a.zrl"
)
|>
Floki
.
attribute
(
"a"
,
"href"
)
|>
length
()
>
0
...
...
lib/pleroma/web/metadata/rel_me.ex
View file @
47213221
...
...
@@ -8,8 +8,10 @@ defmodule Pleroma.Web.Metadata.Providers.RelMe do
@impl
Provider
def
build_tags
(%{
user:
user
})
do
(
Floki
.
attribute
(
user
.
bio
,
"link[rel~=me]"
,
"href"
)
++
Floki
.
attribute
(
user
.
bio
,
"a[rel~=me]"
,
"href"
))
bio_tree
=
Floki
.
parse_fragment!
(
user
.
bio
)
(
Floki
.
attribute
(
bio_tree
,
"link[rel~=me]"
,
"href"
)
++
Floki
.
attribute
(
bio_tree
,
"a[rel~=me]"
,
"href"
))
|>
Enum
.
map
(
fn
link
->
{
:link
,
[
rel:
"me"
,
href:
link
],
[]}
end
)
...
...
lib/pleroma/web/rel_me.ex
View file @
47213221
...
...
@@ -27,9 +27,10 @@ def parse(_), do: {:error, "No URL provided"}
defp
parse_url
(
url
)
do
with
{
:ok
,
%
Tesla
.
Env
{
body:
html
,
status:
status
}}
when
status
in
200
..
299
<-
Pleroma
.
HTTP
.
get
(
url
,
[],
adapter:
@hackney_options
),
{
:ok
,
html_tree
}
<-
Floki
.
parse_document
(
html
),
data
<-
Floki
.
attribute
(
html
,
"link[rel~=me]"
,
"href"
)
++
Floki
.
attribute
(
html
,
"a[rel~=me]"
,
"href"
)
do
Floki
.
attribute
(
html
_tree
,
"link[rel~=me]"
,
"href"
)
++
Floki
.
attribute
(
html
_tree
,
"a[rel~=me]"
,
"href"
)
do
{
:ok
,
data
}
end
rescue
...
...
lib/pleroma/web/rich_media/parser.ex
View file @
47213221
...
...
@@ -81,18 +81,18 @@ defp parse_url(url) do
{
:ok
,
%
Tesla
.
Env
{
body:
html
}}
=
Pleroma
.
HTTP
.
get
(
url
,
[],
adapter:
@hackney_options
)
html
|>
parse_html
|>
parse_html
()
|>
maybe_parse
()
|>
Map
.
put
(
:url
,
url
)
|>
clean_parsed_data
()
|>
check_parsed_data
()
rescue
e
->
{
:error
,
"Parsing error:
#{
inspect
(
e
)
}
"
}
{
:error
,
"Parsing error:
#{
inspect
(
e
)
}
#{
inspect
(
__STACKTRACE__
)
}
"
}
end
end
defp
parse_html
(
html
),
do
:
Floki
.
parse
(
html
)
defp
parse_html
(
html
),
do
:
Floki
.
parse
_document!
(
html
)
defp
maybe_parse
(
html
)
do
Enum
.
reduce_while
(
parsers
(),
%{},
fn
parser
,
acc
->
...
...
mix.exs
View file @
47213221
...
...
@@ -139,8 +139,8 @@ defp deps do
{
:phoenix_swoosh
,
"~> 0.2"
},
{
:gen_smtp
,
"~> 0.13"
},
{
:websocket_client
,
git:
"https://github.com/jeremyong/websocket_client.git"
,
only:
:test
},
{
:floki
,
"~> 0.23.0"
},
{
:ex_syslogger
,
"~> 1.4"
},
{
:floki
,
"~> 0.25"
},
{
:timex
,
"~> 3.5"
},
{
:ueberauth
,
"~> 0.4"
},
{
:auto_linker
,
...
...
mix.lock
View file @
47213221
This diff is collapsed.
Click to expand it.
test/web/rich_media/parsers/twitter_card_test.exs
View file @
47213221
...
...
@@ -7,11 +7,14 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
alias
Pleroma
.
Web
.
RichMedia
.
Parsers
.
TwitterCard
test
"returns error when html not contains twitter card"
do
assert
TwitterCard
.
parse
(
""
,
%{})
==
{
:error
,
"No twitter card metadata found"
}
assert
TwitterCard
.
parse
([{
"html"
,
[],
[{
"head"
,
[],
[]},
{
"body"
,
[],
[]}]}],
%{})
==
{
:error
,
"No twitter card metadata found"
}
end
test
"parses twitter card with only name attributes"
do
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers3.html"
)
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers3.html"
)
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
...
...
@@ -26,7 +29,9 @@ test "parses twitter card with only name attributes" do
end
test
"parses twitter card with only property attributes"
do
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers2.html"
)
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers2.html"
)
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
...
...
@@ -45,7 +50,9 @@ test "parses twitter card with only property attributes" do
end
test
"parses twitter card with name & property attributes"
do
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers.html"
)
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers.html"
)
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
...
...
@@ -73,7 +80,8 @@ test "respect only first title tag on the page" do
"YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9"
<>
"yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
html
=
File
.
read!
(
"test/fixtures/margaret-corbin-grave-west-point.html"
)
html
=
File
.
read!
(
"test/fixtures/margaret-corbin-grave-west-point.html"
)
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
...
...
@@ -87,7 +95,9 @@ test "respect only first title tag on the page" do
end
test
"takes first founded title in html head if there is html markup error"
do
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers4.html"
)
html
=
File
.
read!
(
"test/fixtures/nypd-facial-recognition-children-teenagers4.html"
)
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment