FastHTML failed to extract <title>
No preview is rendered:
https://royalsocietypublishing.org/doi/10.1098/rsos.160254
The page does have a title element:
However, it does not render at all in the AST:
iex(1)> url = "https://royalsocietypublishing.org/doi/10.1098/rsos.160254"
"https://royalsocietypublishing.org/doi/10.1098/rsos.160254"
iex(2)> {:ok, %Tesla.Env{body: html}} = Pleroma.Web.RichMedia.Helpers.rich_media_get(url)
{:ok, %Tesla.Env{...}}
iex(3)> {:ok, html} = Floki.parse_document(html)
{:ok,
[
{"html",
[
{"lang", "en"},
{"class", "pb-page"},
{"data-request-id", "e13702cd-1628-4965-91b9-897820085093"}
],
[
{"head", [{"data-pb-dropzone", "head"}],
[
{"meta",
[
{"name", "pbContext"},
{"content",
";website:website:rsj-site;wgroup:string:Publication Websites;page:string:Cookie Absent"}
], []},
"\n ",
{"link",
[
{"type", "text/css"},
{"rel", "stylesheet"},
{"href", "/pb-assets/css/pbCss-1595579284627.css"}
], []},
"\n\n\n\n\n\n\n\n\n",
{"meta", [{"charset", "UTF-8"}], []},
"\n\n\n\n\n",
{"meta",
[{"name", "robots"}, {"content", "noarchive,noindex,nofollow"}], []},
"\n",
{"meta", [{"name", "pb-robots-disabled"}], []},
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ",
{"meta", [{"property", "og:url"}, {"content", ""}], []},
"\n \n\n\n\n\n\n\n ",
{"meta",
[
{"name", "viewport"},
{"content", "width=device-width,initial-scale=1"}
], []},
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n \n\n \n \n \n ",
{"link",
[
{"rel", "stylesheet"},
{"type", "text/css"},
{"href", "/wro/k6we~article-metrics-phase3.css"}
], []},
"\n \n\n\n",
{"link",
[
{"rel", "stylesheet"},
{"type", "text/css"},
{"href", "/wro/k6we~product.css"}
], []},
"\n\n",
{"link",
[
{"rel", "stylesheet"},
{"href",
"/products/rsj/releasedAssets/css/build-6ead77cc1fb988c27b05.css"}
], []},
{"link",
[
{"rel", "stylesheet"},
{"href",
"/products/rsj/releasedAssets/css/print-6ead77cc1fb988c27b05.css"},
{"media", "print"}
], []},
"\n\n\n\n\n",
{"meta", [{"http-equiv", "X-UA-Compatible"}, {"content", "IE=edge"}],
[]},
"\n\n ",
{:comment, " Google Tag Manager "},
"\n \n ",
{"script", [],
["(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M8ZZ5LD');"]},
"\n ",
{:comment, " End Google Tag Manager "},
"\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n \n \n \n ",
{"link",
[
{"rel", "canonical"},
{"href", "https://royalsocietypublishing.org/action/cookieAbsent"}
], []},
"\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n \n ",
{"script",
[
{"src",
"https://js.braintreegateway.com/web/dropin/1.17.2/js/dropin.min.js"}
], []},
"\n ",
{"script",
[
{"src",
"https://js.braintreegateway.com/web/3.44.2-beta-3ds.6/js/three-d-secure.min.js"}
], []},
"\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n"
]},
"\n",
{"body", [{"class", "pb-ui"}],
[
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ",
{:comment, " Google Tag Manager (noscript) "},
"\n ",
{"noscript", [],
[
{"iframe",
[
{"src",
"https://www.googletagmanager.com/ns.html?id=GTM-M8ZZ5LD"},
{"height", "0"},
{"width", "0"},
{"style", "display:none;visibility:hidden"}
], []}
]},
"\n ",
{:comment, " End Google Tag Manager (noscript) "},
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
{"script", [{"type", "text/javascript"}],
["\n\n if(false) {\n document.getElementById(\"skipNavigationLink\").onclick =function skipElement () {\n var element = document.getElementById('');\n if(element == null || element == undefined) {\n element = document.getElementsByClassName('').item(0);\n }\n element.setAttribute('tabindex','0');\n element.focus();\n\n }\n\n }\n\n"]},
"\n\n\n\n\n\n ",
{"div", [{"id", "pb-page-content"}, {"data-ng-non-bindable", ""}],
[
"\n ",
{"div",
[{"data-pb-dropzone", "main"}, {"data-pb-dropzone-name", "Main"}],
[
"\n\n\n\n\n \n ",
{"div", [],
[
"\n\n\n\n \n ",
{"div", [],
[
"\n\n\n\n \n \n\n\n\n\n\n\n\n\n\n\n\n\n\n",
{"div", [{"class", "popup login-popup hidden"}],
["\n ", {"div", [{...}], [...]}, "\n"]},
"\n\n\n\n\n \n \n\n\n\n\n\n\n",
{"div", [{"class", "popup change-password-drawer hidden"}],
["\n ", {"a", ...}, "\n ", ...]},
"\n\n\n\n\n\n \n \n\n\n\n\n\n",
{"div", [{"class", ...}], ["\n ", ...]},
"\n\n\n\n\n \n \n\n\n\n",
{"div", [...], ...},
"\n\n\n\n\n \n \n\n\n\n\n",
{...},
...
]},
"\n\n\n\n\n \n ",
{"header",
[
{"data-db-parent-of", "sb1"},
{"class", "header fixed base pageHeader"}
],
[
{"div", [{"class", "institution--res"}],
[{"div", [{...}], [...]}]},
{"div", [{"class", "header--first-row"}],
[{"div", [...], ...}]},
{"div", [{"class", "header--second-row"}], [{"div", ...}]}
]},
"\n"
]},
"\n\n\n\n\n \n ",
{"main", [{"class", "content"}],
[
"\n\n\n\n \n ",
{"div", [{"class", "container"}],
[{"div", [{"class", "row"}], [{"div", [], [...]}]}]},
"\n"
]},
"\n\n\n\n\n \n ",
{"div", [],
[
"\n\n\n\n \n ",
{"footer",
[
{"data-accordion-vport", "screen-sm"},
{"data-accordion-option", "with-arrow"}
],
[
{"div", [{"class", "footer-top"}], [{"div", ...}]},
{"div", [{"class", ...}], [{...}]}
]},
"\n"
]},
"\n "
]},
"\n "
]},
"\n\n\n\n\n\n\t\n ",
{"script",
[
{"src",
"/products/rsj/releasedAssets/js/build.lazyload.bundle-77839196e532a2a939b3.js"}
], []},
{"script",
[
{"src",
"/products/rsj/releasedAssets/js/main.bundle-d4bbe8421415dbe345a3.js"}
], []},
"\n\n\n\n\n\n\n \n \n \n \n ",
{"script",
[
{"type", "text/javascript"},
{"src", "/wro/k6we~article-metrics-phase2.js"}
], []},
"\n \n\n\n",
{"script",
[{"type", "text/javascript"}, {"src", "/wro/k6we~product.js"}], []},
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
]}
]}
]}
iex(4)> html |> Floki.find("title")
[]