mirror of
https://git.pleroma.social/pleroma/pleroma.git
synced 2026-02-15 17:16:57 +00:00
Refactor rich media parser to store %Embed{} instead of %Card{}
This commit is contained in:
parent
8c4599c1dd
commit
ebeb9c6bc9
5 changed files with 108 additions and 80 deletions
|
|
@ -20,6 +20,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
|||
alias Pleroma.Web.MastodonAPI.StatusView
|
||||
alias Pleroma.Web.MediaProxy
|
||||
alias Pleroma.Web.PleromaAPI.EmojiReactionController
|
||||
alias Pleroma.Web.RichMedia.Parser.Card
|
||||
alias Pleroma.Web.RichMedia.Parser.Embed
|
||||
|
||||
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
|
||||
|
||||
|
|
@ -367,10 +369,13 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
|||
nil
|
||||
end
|
||||
|
||||
def render("card.json", %{rich_media: rich_media, page_url: _page_url}) do
|
||||
rich_media
|
||||
def render("card.json", %Embed{url: _, meta: _} = embed) do
|
||||
embed
|
||||
|> Card.parse()
|
||||
|> Card.to_map()
|
||||
end
|
||||
|
||||
def render("card.json", %Card{} = card), do: Card.to_map(card)
|
||||
def render("card.json", _), do: nil
|
||||
|
||||
def render("attachment.json", %{attachment: attachment}) do
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
|||
alias Pleroma.HTML
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Web.RichMedia.Parser
|
||||
alias Pleroma.Web.RichMedia.Parser.Card
|
||||
alias Pleroma.Web.RichMedia.Parser.Embed
|
||||
|
||||
@options [
|
||||
pool: :media,
|
||||
|
|
@ -58,26 +58,15 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
|||
|> hd
|
||||
end
|
||||
|
||||
defp strip_card(%Card{} = card) do
|
||||
card
|
||||
|> Map.from_struct()
|
||||
|> Map.new(fn {k, v} -> {Atom.to_string(k), v} end)
|
||||
end
|
||||
|
||||
defp strip_card(%{} = card) do
|
||||
Map.new(card, fn {k, v} -> {Atom.to_string(k), v} end)
|
||||
end
|
||||
|
||||
def fetch_data_for_object(object) do
|
||||
with true <- Config.get([:rich_media, :enabled]),
|
||||
{:ok, page_url} <-
|
||||
HTML.extract_first_external_url_from_object(object),
|
||||
:ok <- validate_page_url(page_url),
|
||||
{:ok, rich_media} <- Parser.parse(page_url),
|
||||
rich_media <- strip_card(rich_media) do
|
||||
%{page_url: page_url, rich_media: rich_media}
|
||||
{:ok, %Embed{} = embed} <- Parser.parse(page_url) do
|
||||
embed
|
||||
else
|
||||
_ -> %{}
|
||||
_ -> nil
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -86,7 +75,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
|||
%Object{} = object <- Object.normalize(activity, fetch: false) do
|
||||
fetch_data_for_object(object)
|
||||
else
|
||||
_ -> %{}
|
||||
_ -> nil
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
defmodule Pleroma.Web.RichMedia.Parser do
|
||||
require Logger
|
||||
alias Pleroma.Web.RichMedia.Parser.Card
|
||||
alias Pleroma.Web.RichMedia.Parser.Embed
|
||||
|
||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||
|
||||
|
|
@ -133,7 +134,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
|
|||
|
||||
def parse_url(url) do
|
||||
case maybe_fetch_oembed(url) do
|
||||
{:ok, %Card{} = card} -> {:ok, card}
|
||||
{:ok, %Embed{} = embed} -> {:ok, embed}
|
||||
_ -> fetch_document(url)
|
||||
end
|
||||
end
|
||||
|
|
@ -143,8 +144,9 @@ defmodule Pleroma.Web.RichMedia.Parser do
|
|||
{:ok, %Tesla.Env{body: json}} <-
|
||||
Pleroma.Web.RichMedia.Helpers.oembed_get(oembed_url),
|
||||
{:ok, data} <- Jason.decode(json),
|
||||
%Card{} = card <- Card.from_oembed(data, url) do
|
||||
{:ok, card}
|
||||
embed <- %Embed{url: url, oembed: data},
|
||||
{:ok, %Card{}} <- Card.validate(embed) do
|
||||
{:ok, embed}
|
||||
else
|
||||
{:error, error} -> {:error, error}
|
||||
error -> {:error, error}
|
||||
|
|
@ -153,36 +155,19 @@ defmodule Pleroma.Web.RichMedia.Parser do
|
|||
|
||||
defp fetch_document(url) do
|
||||
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
|
||||
{:ok, html} <- Floki.parse_document(html) do
|
||||
html
|
||||
|> maybe_parse()
|
||||
|> Map.put("url", url)
|
||||
|> clean_parsed_data()
|
||||
|> Card.from_discovery(url)
|
||||
|> check_card()
|
||||
{:ok, html} <- Floki.parse_document(html),
|
||||
%Embed{} = embed <- parse_embed(html, url),
|
||||
{:ok, %Card{}} <- Card.validate(embed) do
|
||||
{:ok, embed}
|
||||
else
|
||||
{:error, error} -> {:error, error}
|
||||
error -> {:error, error}
|
||||
end
|
||||
end
|
||||
|
||||
defp maybe_parse(html) do
|
||||
Enum.reduce(parsers(), %{}, fn parser, acc ->
|
||||
defp parse_embed(html, url) do
|
||||
Enum.reduce(parsers(), %Embed{url: url}, fn parser, acc ->
|
||||
parser.parse(html, acc)
|
||||
end)
|
||||
end
|
||||
|
||||
defp check_card(%Card{title: title} = card)
|
||||
when is_binary(title) and title != "" do
|
||||
{:ok, card}
|
||||
end
|
||||
|
||||
defp check_card(card) do
|
||||
{:error, {:invalid_metadata, card}}
|
||||
end
|
||||
|
||||
defp clean_parsed_data(data) do
|
||||
data
|
||||
|> Enum.reject(fn {key, val} ->
|
||||
not match?({:ok, _}, Jason.encode(%{key => val}))
|
||||
end)
|
||||
|> Map.new()
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.Parser.Card do
|
||||
alias Pleroma.Web.RichMedia.Parser.Card
|
||||
alias Pleroma.Web.RichMedia.Parser.Embed
|
||||
|
||||
@types ["link", "photo", "video", "rich"]
|
||||
|
||||
# https://docs.joinmastodon.org/entities/card/
|
||||
|
|
@ -21,12 +24,13 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do
|
|||
embed_url: "",
|
||||
blurhash: nil
|
||||
|
||||
def from_oembed(%{"type" => type, "title" => title} = oembed, url) when type in @types do
|
||||
%__MODULE__{
|
||||
def parse(%{url: url, oembed: %{"type" => type, "title" => title} = oembed} = embed)
|
||||
when type in @types do
|
||||
%Card{
|
||||
url: url,
|
||||
title: title,
|
||||
description: "",
|
||||
type: type,
|
||||
description: get_description(embed),
|
||||
type: oembed["type"],
|
||||
author_name: oembed["author_name"],
|
||||
author_url: oembed["author_url"],
|
||||
provider_name: oembed["provider_name"],
|
||||
|
|
@ -39,39 +43,74 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do
|
|||
}
|
||||
end
|
||||
|
||||
def from_oembed(_oembed, _url), do: nil
|
||||
def parse(%{url: url} = embed) do
|
||||
title = get_title(embed)
|
||||
|
||||
def from_discovery(%{"type" => "link"} = rich_media, page_url) do
|
||||
page_url_data = URI.parse(page_url)
|
||||
|
||||
page_url_data =
|
||||
if is_binary(rich_media["url"]) do
|
||||
URI.merge(page_url_data, URI.parse(rich_media["url"]))
|
||||
else
|
||||
page_url_data
|
||||
end
|
||||
|
||||
page_url = page_url_data |> to_string
|
||||
|
||||
image_url =
|
||||
if is_binary(rich_media["image"]) do
|
||||
URI.merge(page_url_data, URI.parse(rich_media["image"]))
|
||||
|> to_string
|
||||
end
|
||||
|
||||
%__MODULE__{
|
||||
type: "link",
|
||||
provider_name: page_url_data.host,
|
||||
provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
|
||||
url: page_url,
|
||||
image: image_url |> proxy(),
|
||||
title: rich_media["title"] || "",
|
||||
description: rich_media["description"] || ""
|
||||
}
|
||||
if is_binary(title) do
|
||||
%Card{
|
||||
url: url,
|
||||
title: title,
|
||||
description: get_description(embed),
|
||||
type: "link",
|
||||
image: get_image(embed) |> proxy()
|
||||
}
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def from_discovery(rich_media, url), do: from_oembed(rich_media, url)
|
||||
def parse(_), do: nil
|
||||
|
||||
defp get_title(embed) do
|
||||
case embed do
|
||||
%{meta: %{"twitter:title" => title}} when is_binary(title) and title != "" -> title
|
||||
%{meta: %{"og:title" => title}} when is_binary(title) and title != "" -> title
|
||||
%{title: title} when is_binary(title) and title != "" -> title
|
||||
_ -> ""
|
||||
end
|
||||
end
|
||||
|
||||
defp get_description(%{meta: meta}) do
|
||||
case meta do
|
||||
%{"twitter:description" => desc} when is_binary(desc) and desc != "" -> desc
|
||||
%{"og:description" => desc} when is_binary(desc) and desc != "" -> desc
|
||||
%{"description" => desc} when is_binary(desc) and desc != "" -> desc
|
||||
_ -> ""
|
||||
end
|
||||
end
|
||||
|
||||
defp get_image(%{meta: meta}) do
|
||||
case meta do
|
||||
%{"twitter:image" => image} when is_binary(image) and image != "" -> image
|
||||
%{"og:image" => image} when is_binary(image) and image != "" -> image
|
||||
_ -> ""
|
||||
end
|
||||
end
|
||||
|
||||
def to_map(%Card{} = card) do
|
||||
card
|
||||
|> Map.from_struct()
|
||||
|> stringify_keys()
|
||||
end
|
||||
|
||||
def to_map(%{} = card), do: stringify_keys(card)
|
||||
|
||||
defp stringify_keys(%{} = map), do: Map.new(map, fn {k, v} -> {Atom.to_string(k), v} end)
|
||||
|
||||
defp proxy(url) when is_binary(url), do: Pleroma.Web.MediaProxy.url(url)
|
||||
defp proxy(_), do: nil
|
||||
|
||||
def validate(%Card{type: type, title: title} = card)
|
||||
when type in @types and is_binary(title) and title != "" do
|
||||
{:ok, card}
|
||||
end
|
||||
|
||||
def validate(%Embed{} = embed) do
|
||||
case Card.parse(embed) do
|
||||
%Card{} = card -> validate(card)
|
||||
card -> {:error, {:invalid_metadata, card}}
|
||||
end
|
||||
end
|
||||
|
||||
def validate(card), do: {:error, {:invalid_metadata, card}}
|
||||
end
|
||||
|
|
|
|||
10
lib/pleroma/web/rich_media/parser/embed.ex
Normal file
10
lib/pleroma/web/rich_media/parser/embed.ex
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.Parser.Embed do
|
||||
@moduledoc """
|
||||
Represents embedded content, including scraped markup and OEmbed.
|
||||
"""
|
||||
defstruct url: nil, meta: nil, oembed: nil
|
||||
end
|
||||
Loading…
Add table
Add a link
Reference in a new issue