Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Pleroma
pleroma
Commits
1e49bfa9
Commit
1e49bfa9
authored
Jun 15, 2020
by
lain
Browse files
Merge branch 'merge-ogp-twitter-parsers' into 'develop'
Merge OGP parser with TwitterCard Closes
#1835
See merge request
!2642
parents
448e93ce
bd63089a
Pipeline
#27246
passed with stages
in 57 minutes and 22 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
CHANGELOG.md
View file @
1e49bfa9
...
...
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Changed
-
MFR policy to set global expiration for all local Create activities
-
OGP rich media parser merged with TwitterCard
<details>
<summary>
API Changes
</summary>
-
**Breaking:**
Emoji API: changed methods and renamed routes.
...
...
config/config.exs
View file @
1e49bfa9
...
...
@@ -387,7 +387,6 @@
ignore_tld:
[
"local"
,
"localdomain"
,
"lan"
],
parsers:
[
Pleroma
.
Web
.
RichMedia
.
Parsers
.
TwitterCard
,
Pleroma
.
Web
.
RichMedia
.
Parsers
.
OGP
,
Pleroma
.
Web
.
RichMedia
.
Parsers
.
OEmbed
],
ttl_setters:
[
Pleroma
.
Web
.
RichMedia
.
Parser
.
TTL
.
AwsSignedUrl
]
...
...
config/description.exs
View file @
1e49bfa9
...
...
@@ -2104,9 +2104,7 @@
description:
"List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name."
,
suggestions:
[
Pleroma
.
Web
.
RichMedia
.
Parsers
.
MetaTagsParser
,
Pleroma
.
Web
.
RichMedia
.
Parsers
.
OEmbed
,
Pleroma
.
Web
.
RichMedia
.
Parsers
.
OGP
,
Pleroma
.
Web
.
RichMedia
.
Parsers
.
TwitterCard
]
},
...
...
lib/pleroma/web/rich_media/parser.ex
View file @
1e49bfa9
...
...
@@ -105,8 +105,8 @@ defp parse_html(html), do: Floki.parse_document!(html)
defp
maybe_parse
(
html
)
do
Enum
.
reduce_while
(
parsers
(),
%{},
fn
parser
,
acc
->
case
parser
.
parse
(
html
,
acc
)
do
{
:ok
,
data
}
->
{
:halt
,
data
}
{
:error
,
_msg
}
->
{
:cont
,
acc
}
data
when
data
!=
%{
}
->
{
:halt
,
data
}
_
->
{
:cont
,
acc
}
end
end
)
end
...
...
lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
View file @
1e49bfa9
...
...
@@ -3,22 +3,15 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule
Pleroma
.
Web
.
RichMedia
.
Parsers
.
MetaTagsParser
do
def
parse
(
html
,
data
,
prefix
,
error_message
,
key_name
,
value_name
\\
"content"
)
do
meta_data
=
html
|>
get_elements
(
key_name
,
prefix
)
|>
Enum
.
reduce
(
data
,
fn
el
,
acc
->
attributes
=
normalize_attributes
(
el
,
prefix
,
key_name
,
value_name
)
Map
.
merge
(
acc
,
attributes
)
end
)
|>
maybe_put_title
(
html
)
if
Enum
.
empty?
(
meta_data
)
do
{
:error
,
error_message
}
else
{
:ok
,
meta_data
}
end
def
parse
(
data
,
html
,
prefix
,
key_name
,
value_name
\\
"content"
)
do
html
|>
get_elements
(
key_name
,
prefix
)
|>
Enum
.
reduce
(
data
,
fn
el
,
acc
->
attributes
=
normalize_attributes
(
el
,
prefix
,
key_name
,
value_name
)
Map
.
merge
(
acc
,
attributes
)
end
)
|>
maybe_put_title
(
html
)
end
defp
get_elements
(
html
,
key_name
,
prefix
)
do
...
...
lib/pleroma/web/rich_media/parsers/oembed_parser.ex
View file @
1e49bfa9
...
...
@@ -7,9 +7,9 @@ def parse(html, _data) do
with
elements
=
[
_
|
_
]
<-
get_discovery_data
(
html
),
oembed_url
when
is_binary
(
oembed_url
)
<-
get_oembed_url
(
elements
),
{
:ok
,
oembed_data
}
<-
get_oembed_data
(
oembed_url
)
do
{
:ok
,
oembed_data
}
oembed_data
else
_e
->
{
:error
,
"No OEmbed data found"
}
_e
->
%
{}
end
end
...
...
lib/pleroma/web/rich_media/parsers/ogp.ex
View file @
1e49bfa9
...
...
@@ -3,13 +3,8 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule
Pleroma
.
Web
.
RichMedia
.
Parsers
.
OGP
do
def
parse
(
html
,
data
)
do
Pleroma
.
Web
.
RichMedia
.
Parsers
.
MetaTagsParser
.
parse
(
html
,
data
,
"og"
,
"No OGP metadata found"
,
"property"
)
@deprecated
"OGP parser is deprecated. Use TwitterCard instead."
def
parse
(
_html
,
_data
)
do
%{}
end
end
lib/pleroma/web/rich_media/parsers/twitter_card.ex
View file @
1e49bfa9
...
...
@@ -5,18 +5,11 @@
defmodule
Pleroma
.
Web
.
RichMedia
.
Parsers
.
TwitterCard
do
alias
Pleroma
.
Web
.
RichMedia
.
Parsers
.
MetaTagsParser
@spec
parse
(
String
.
t
(),
map
())
::
{
:ok
,
map
()}
|
{
:error
,
String
.
t
()}
@spec
parse
(
lis
t
(),
map
())
::
map
()
def
parse
(
html
,
data
)
do
data
|>
parse_name_attrs
(
html
)
|>
parse_property_attrs
(
html
)
end
defp
parse_name_attrs
(
data
,
html
)
do
MetaTagsParser
.
parse
(
html
,
data
,
"twitter"
,
%{},
"name"
)
end
defp
parse_property_attrs
({
_
,
data
},
html
)
do
MetaTagsParser
.
parse
(
html
,
data
,
"twitter"
,
"No twitter card metadata found"
,
"property"
)
|>
MetaTagsParser
.
parse
(
html
,
"og"
,
"property"
)
|>
MetaTagsParser
.
parse
(
html
,
"twitter"
,
"name"
)
|>
MetaTagsParser
.
parse
(
html
,
"twitter"
,
"property"
)
end
end
test/web/rich_media/parsers/twitter_card_test.exs
View file @
1e49bfa9
...
...
@@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
alias
Pleroma
.
Web
.
RichMedia
.
Parsers
.
TwitterCard
test
"returns error when html not contains twitter card"
do
assert
TwitterCard
.
parse
([{
"html"
,
[],
[{
"head"
,
[],
[]},
{
"body"
,
[],
[]}]}],
%{})
==
{
:error
,
"No twitter card metadata found"
}
assert
TwitterCard
.
parse
([{
"html"
,
[],
[{
"head"
,
[],
[]},
{
"body"
,
[],
[]}]}],
%{})
==
%{}
end
test
"parses twitter card with only name attributes"
do
...
...
@@ -17,15 +16,21 @@ test "parses twitter card with only name attributes" do
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
%{
"app:id:googleplay"
=>
"com.nytimes.android"
,
"app:name:googleplay"
=>
"NYTimes"
,
"app:url:googleplay"
=>
"nytimes://reader/id/100000006583622"
,
"site"
=>
nil
,
"title"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
}}
%{
"app:id:googleplay"
=>
"com.nytimes.android"
,
"app:name:googleplay"
=>
"NYTimes"
,
"app:url:googleplay"
=>
"nytimes://reader/id/100000006583622"
,
"site"
=>
nil
,
"description"
=>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers."
,
"image"
=>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg"
,
"type"
=>
"article"
,
"url"
=>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
,
"title"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
}
end
test
"parses twitter card with only property attributes"
do
...
...
@@ -34,19 +39,19 @@ test "parses twitter card with only property attributes" do
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
%{
"card"
=>
"summary_large_image"
,
"description"
=>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers."
,
"image"
=>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg
"
,
"image:alt"
=>
""
,
"title"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
,
"url"
=>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html
"
}
}
%
{
"card"
=>
"summary_large_image"
,
"description"
=>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers."
,
"image"
=>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg"
,
"image:alt"
=>
"
"
,
"title"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
,
"url"
=>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
,
"type"
=>
"article
"
}
end
test
"parses twitter card with name & property attributes"
do
...
...
@@ -55,23 +60,23 @@ test "parses twitter card with name & property attributes" do
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
%{
"app:
id
:googleplay"
=>
"
com.nytimes.android
"
,
"app:
name
:googleplay"
=>
"
NYT
imes"
,
"app:url:googleplay"
=>
"nytimes://reader/id/100000006583622
"
,
"card"
=>
"summary_large_image"
,
"description"
=>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers."
,
"image"
=>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg
"
,
"image:alt
"
=>
""
,
"s
ite"
=>
nil
,
"title"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
,
"url"
=>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html
"
}
}
%
{
"app:id:googleplay"
=>
"com.nytimes.android"
,
"app:
name
:googleplay"
=>
"
NYTimes
"
,
"app:
url
:googleplay"
=>
"
nyt
imes
://reader/id/100000006583622
"
,
"card"
=>
"summary_large_image
"
,
"description"
=>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers."
,
"image"
=>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg"
,
"image:alt"
=>
"
"
,
"site
"
=>
nil
,
"t
it
l
e"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
,
"url"
=>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
,
"type"
=>
"article
"
}
end
test
"respect only first title tag on the page"
do
...
...
@@ -84,14 +89,17 @@ test "respect only first title tag on the page" do
File
.
read!
(
"test/fixtures/margaret-corbin-grave-west-point.html"
)
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
%{
"site"
=>
"@atlasobscura"
,
"title"
=>
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura"
,
"card"
=>
"summary_large_image"
,
"image"
=>
image_path
}}
%{
"site"
=>
"@atlasobscura"
,
"title"
=>
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran"
,
"card"
=>
"summary_large_image"
,
"image"
=>
image_path
,
"description"
=>
"She's the only woman veteran honored with a monument at West Point. But where was she buried?"
,
"site_name"
=>
"Atlas Obscura"
,
"type"
=>
"article"
,
"url"
=>
"http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
}
end
test
"takes first founded title in html head if there is html markup error"
do
...
...
@@ -100,14 +108,20 @@ test "takes first founded title in html head if there is html markup error" do
|>
Floki
.
parse_document!
()
assert
TwitterCard
.
parse
(
html
,
%{})
==
{
:ok
,
%{
"site"
=>
nil
,
"title"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
,
"app:id:googleplay"
=>
"com.nytimes.android"
,
"app:name:googleplay"
=>
"NYTimes"
,
"app:url:googleplay"
=>
"nytimes://reader/id/100000006583622"
}}
%{
"site"
=>
nil
,
"title"
=>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
,
"app:id:googleplay"
=>
"com.nytimes.android"
,
"app:name:googleplay"
=>
"NYTimes"
,
"app:url:googleplay"
=>
"nytimes://reader/id/100000006583622"
,
"description"
=>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers."
,
"image"
=>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg"
,
"type"
=>
"article"
,
"url"
=>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment