1
0
Fork 0
mirror of https://git.pleroma.social/pleroma/pleroma.git synced 2026-02-15 17:16:57 +00:00

Refactor rich media parser to store %Embed{} instead of %Card{}

This commit is contained in:
Alex Gleason 2021-05-04 15:42:24 -05:00
parent 8c4599c1dd
commit ebeb9c6bc9
No known key found for this signature in database
GPG key ID: 7211D1F99744FBB7
5 changed files with 108 additions and 80 deletions

View file

@ -20,6 +20,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.MediaProxy
alias Pleroma.Web.PleromaAPI.EmojiReactionController
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
@ -367,10 +369,13 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
nil
end
def render("card.json", %{rich_media: rich_media, page_url: _page_url}) do
rich_media
def render("card.json", %Embed{url: _, meta: _} = embed) do
embed
|> Card.parse()
|> Card.to_map()
end
def render("card.json", %Card{} = card), do: Card.to_map(card)
def render("card.json", _), do: nil
def render("attachment.json", %{attachment: attachment}) do

View file

@ -8,7 +8,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.HTML
alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
@options [
pool: :media,
@ -58,26 +58,15 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|> hd
end
defp strip_card(%Card{} = card) do
card
|> Map.from_struct()
|> Map.new(fn {k, v} -> {Atom.to_string(k), v} end)
end
defp strip_card(%{} = card) do
Map.new(card, fn {k, v} -> {Atom.to_string(k), v} end)
end
def fetch_data_for_object(object) do
with true <- Config.get([:rich_media, :enabled]),
{:ok, page_url} <-
HTML.extract_first_external_url_from_object(object),
:ok <- validate_page_url(page_url),
{:ok, rich_media} <- Parser.parse(page_url),
rich_media <- strip_card(rich_media) do
%{page_url: page_url, rich_media: rich_media}
{:ok, %Embed{} = embed} <- Parser.parse(page_url) do
embed
else
_ -> %{}
_ -> nil
end
end
@ -86,7 +75,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
%Object{} = object <- Object.normalize(activity, fetch: false) do
fetch_data_for_object(object)
else
_ -> %{}
_ -> nil
end
end

View file

@ -5,6 +5,7 @@
defmodule Pleroma.Web.RichMedia.Parser do
require Logger
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@ -133,7 +134,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
def parse_url(url) do
case maybe_fetch_oembed(url) do
{:ok, %Card{} = card} -> {:ok, card}
{:ok, %Embed{} = embed} -> {:ok, embed}
_ -> fetch_document(url)
end
end
@ -143,8 +144,9 @@ defmodule Pleroma.Web.RichMedia.Parser do
{:ok, %Tesla.Env{body: json}} <-
Pleroma.Web.RichMedia.Helpers.oembed_get(oembed_url),
{:ok, data} <- Jason.decode(json),
%Card{} = card <- Card.from_oembed(data, url) do
{:ok, card}
embed <- %Embed{url: url, oembed: data},
{:ok, %Card{}} <- Card.validate(embed) do
{:ok, embed}
else
{:error, error} -> {:error, error}
error -> {:error, error}
@ -153,36 +155,19 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp fetch_document(url) do
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
{:ok, html} <- Floki.parse_document(html) do
html
|> maybe_parse()
|> Map.put("url", url)
|> clean_parsed_data()
|> Card.from_discovery(url)
|> check_card()
{:ok, html} <- Floki.parse_document(html),
%Embed{} = embed <- parse_embed(html, url),
{:ok, %Card{}} <- Card.validate(embed) do
{:ok, embed}
else
{:error, error} -> {:error, error}
error -> {:error, error}
end
end
defp maybe_parse(html) do
Enum.reduce(parsers(), %{}, fn parser, acc ->
defp parse_embed(html, url) do
Enum.reduce(parsers(), %Embed{url: url}, fn parser, acc ->
parser.parse(html, acc)
end)
end
defp check_card(%Card{title: title} = card)
when is_binary(title) and title != "" do
{:ok, card}
end
defp check_card(card) do
{:error, {:invalid_metadata, card}}
end
defp clean_parsed_data(data) do
data
|> Enum.reject(fn {key, val} ->
not match?({:ok, _}, Jason.encode(%{key => val}))
end)
|> Map.new()
end
end

View file

@ -3,6 +3,9 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.Card do
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
@types ["link", "photo", "video", "rich"]
# https://docs.joinmastodon.org/entities/card/
@ -21,12 +24,13 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do
embed_url: "",
blurhash: nil
def from_oembed(%{"type" => type, "title" => title} = oembed, url) when type in @types do
%__MODULE__{
def parse(%{url: url, oembed: %{"type" => type, "title" => title} = oembed} = embed)
when type in @types do
%Card{
url: url,
title: title,
description: "",
type: type,
description: get_description(embed),
type: oembed["type"],
author_name: oembed["author_name"],
author_url: oembed["author_url"],
provider_name: oembed["provider_name"],
@ -39,39 +43,74 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do
}
end
def from_oembed(_oembed, _url), do: nil
def parse(%{url: url} = embed) do
title = get_title(embed)
def from_discovery(%{"type" => "link"} = rich_media, page_url) do
page_url_data = URI.parse(page_url)
page_url_data =
if is_binary(rich_media["url"]) do
URI.merge(page_url_data, URI.parse(rich_media["url"]))
else
page_url_data
end
page_url = page_url_data |> to_string
image_url =
if is_binary(rich_media["image"]) do
URI.merge(page_url_data, URI.parse(rich_media["image"]))
|> to_string
end
%__MODULE__{
type: "link",
provider_name: page_url_data.host,
provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
url: page_url,
image: image_url |> proxy(),
title: rich_media["title"] || "",
description: rich_media["description"] || ""
}
if is_binary(title) do
%Card{
url: url,
title: title,
description: get_description(embed),
type: "link",
image: get_image(embed) |> proxy()
}
else
nil
end
end
def from_discovery(rich_media, url), do: from_oembed(rich_media, url)
def parse(_), do: nil
defp get_title(embed) do
case embed do
%{meta: %{"twitter:title" => title}} when is_binary(title) and title != "" -> title
%{meta: %{"og:title" => title}} when is_binary(title) and title != "" -> title
%{title: title} when is_binary(title) and title != "" -> title
_ -> ""
end
end
defp get_description(%{meta: meta}) do
case meta do
%{"twitter:description" => desc} when is_binary(desc) and desc != "" -> desc
%{"og:description" => desc} when is_binary(desc) and desc != "" -> desc
%{"description" => desc} when is_binary(desc) and desc != "" -> desc
_ -> ""
end
end
defp get_image(%{meta: meta}) do
case meta do
%{"twitter:image" => image} when is_binary(image) and image != "" -> image
%{"og:image" => image} when is_binary(image) and image != "" -> image
_ -> ""
end
end
def to_map(%Card{} = card) do
card
|> Map.from_struct()
|> stringify_keys()
end
def to_map(%{} = card), do: stringify_keys(card)
defp stringify_keys(%{} = map), do: Map.new(map, fn {k, v} -> {Atom.to_string(k), v} end)
defp proxy(url) when is_binary(url), do: Pleroma.Web.MediaProxy.url(url)
defp proxy(_), do: nil
def validate(%Card{type: type, title: title} = card)
when type in @types and is_binary(title) and title != "" do
{:ok, card}
end
def validate(%Embed{} = embed) do
case Card.parse(embed) do
%Card{} = card -> validate(card)
card -> {:error, {:invalid_metadata, card}}
end
end
def validate(card), do: {:error, {:invalid_metadata, card}}
end

View file

@ -0,0 +1,10 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.Embed do
@moduledoc """
Represents embedded content, including scraped markup and OEmbed.
"""
defstruct url: nil, meta: nil, oembed: nil
end