Add collection fetching module

This commit is contained in:
FloatingGhost 2022-07-03 19:20:59 +01:00
parent 0a3a552696
commit 05081cd81b
9 changed files with 328 additions and 1 deletions

View file

@ -363,7 +363,8 @@ config :pleroma, :activitypub,
follow_handshake_timeout: 500, follow_handshake_timeout: 500,
note_replies_output_limit: 5, note_replies_output_limit: 5,
sign_object_fetches: true, sign_object_fetches: true,
authorized_fetch_mode: false authorized_fetch_mode: false,
max_collection_objects: 50
config :pleroma, :streamer, config :pleroma, :streamer,
workers: 3, workers: 3,

View file

@ -0,0 +1,68 @@
# Akkoma: The cooler fediverse server
# Copyright © 2022- Akkoma Authors <https://akkoma.dev/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Akkoma.Collections.Fetcher do
@moduledoc """
Activitypub Collections fetching functions
see: https://www.w3.org/TR/activitystreams-core/#paging
"""
alias Pleroma.Object.Fetcher
alias Pleroma.Config
def fetch_collection_by_ap_id(ap_id) when is_binary(ap_id) do
fetch_collection(ap_id)
end
defp fetch_collection(ap_id) do
with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do
{:ok, objects_from_collection(page)}
end
end
defp items_in_page(%{"type" => type, "orderedItems" => items})
when is_list(items) and type in ["OrderedCollection", "OrderedCollectionPage"],
do: items
defp items_in_page(%{"type" => type, "items" => items})
when is_list(items) and type in ["Collection", "CollectionPage"],
do: items
defp objects_from_collection(%{"type" => "OrderedCollection", "orderedItems" => items})
when is_list(items),
do: items
defp objects_from_collection(%{"type" => "Collection", "items" => items}) when is_list(items),
do: items
defp objects_from_collection(%{"type" => type, "first" => first})
when is_binary(first) and type in ["Collection", "OrderedCollection"] do
fetch_page_items(first)
end
defp objects_from_collection(%{"type" => type, "first" => %{"id" => id}})
when is_binary(id) and type in ["Collection", "OrderedCollection"] do
fetch_page_items(id)
end
defp fetch_page_items(id, items \\ []) do
if Enum.count(items) >= Config.get([:activitypub, :max_collection_objects]) do
items
else
{:ok, page} = Fetcher.fetch_and_contain_remote_object_from_id(id)
objects = items_in_page(page)
if Enum.count(objects) > 0 do
maybe_next_page(page, items ++ objects)
else
items
end
end
end
defp maybe_next_page(%{"next" => id}, items) when is_binary(id) do
fetch_page_items(id, items)
end
defp maybe_next_page(_, items), do: items
end

View file

@ -0,0 +1,19 @@
{
"@context": "https://www.w3.org/ns/activitystreams",
"id": "https://example.com/collection/ordered_array",
"summary": "Object history",
"type": "OrderedCollection",
"totalItems": 2,
"orderedItems": [
{
"type": "Create",
"actor": "http://www.test.example/sally",
"object": "http://example.org/foo"
},
{
"type": "Like",
"actor": "http://www.test.example/joe",
"object": "http://example.org/foo"
}
]
}

View file

@ -0,0 +1,19 @@
{
"@context": "https://www.w3.org/ns/activitystreams",
"id": "https://example.com/collection/unordered_array",
"summary": "Object history",
"type": "Collection",
"totalItems": 2,
"items": [
{
"type": "Create",
"actor": "http://www.test.example/sally",
"object": "http://example.org/foo"
},
{
"type": "Like",
"actor": "http://www.test.example/joe",
"object": "http://example.org/foo"
}
]
}

View file

@ -0,0 +1,20 @@
{
"@context": "https://www.w3.org/ns/activitystreams",
"summary": "Sally's recent activities",
"type": "Collection",
"id": "http://example.org/foo",
"totalItems": 10,
"first": {
"type": "CollectionPage",
"id": "http://example.org/foo?page=1",
"partOf": "http://example.org/foo",
"next": "http://example.org/foo?page=2",
"items": [
{
"type": "Create",
"actor": "http://www.test.example/sally",
"object": "http://example.org/foo"
}
]
}
}

View file

@ -0,0 +1,13 @@
{
"type": "CollectionPage",
"id": "https://example.com/collection/unordered_page_reference?page=1",
"partOf": "https://example.com/collection/unordered_page_reference",
"next": "https://example.com/collection/unordered_page_reference?page=2",
"items": [
{
"type": "Create",
"actor": "http://www.test.example/sally",
"object": "http://example.org/foo"
}
]
}

View file

@ -0,0 +1,8 @@
{
"@context": "https://www.w3.org/ns/activitystreams",
"summary": "Sally's recent activities",
"type": "Collection",
"id": "https://example.com/collection/unordered_page_reference",
"totalItems": 10,
"first": "https://example.com/collection/unordered_page_reference?page=1"
}

View file

@ -0,0 +1,12 @@
{
"type": "CollectionPage",
"id": "https://example.com/collection/unordered_page_reference?page=2",
"partOf": "https://example.com/collection/unordered_page_reference",
"items": [
{
"type": "Like",
"actor": "http://www.test.example/sally",
"object": "http://example.org/foo"
}
]
}

View file

@ -0,0 +1,167 @@
defmodule Akkoma.Collections.FetcherTest do
use Pleroma.DataCase
use Oban.Testing, repo: Pleroma.Repo
alias Akkoma.Collections.Fetcher
import Tesla.Mock
setup do
mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
:ok
end
test "it should extract items from an embedded array in a Collection" do
unordered_collection =
"test/fixtures/collections/unordered_array.json"
|> File.read!()
ap_id = "https://example.com/collection/ordered_array"
Tesla.Mock.mock(fn
%{
method: :get,
url: ^ap_id
} ->
%Tesla.Env{
status: 200,
body: unordered_collection,
headers: [{"content-type", "application/activity+json"}]
}
end)
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
end
test "it should extract items from an embedded array in an OrderedCollection" do
ordered_collection =
"test/fixtures/collections/ordered_array.json"
|> File.read!()
ap_id = "https://example.com/collection/ordered_array"
Tesla.Mock.mock(fn
%{
method: :get,
url: ^ap_id
} ->
%Tesla.Env{
status: 200,
body: ordered_collection,
headers: [{"content-type", "application/activity+json"}]
}
end)
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
end
test "it should extract items from an referenced first page in a Collection" do
unordered_collection =
"test/fixtures/collections/unordered_page_reference.json"
|> File.read!()
first_page =
"test/fixtures/collections/unordered_page_first.json"
|> File.read!()
second_page =
"test/fixtures/collections/unordered_page_second.json"
|> File.read!()
ap_id = "https://example.com/collection/unordered_page_reference"
first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
Tesla.Mock.mock(fn
%{
method: :get,
url: ^ap_id
} ->
%Tesla.Env{
status: 200,
body: unordered_collection,
headers: [{"content-type", "application/activity+json"}]
}
%{
method: :get,
url: ^first_page_id
} ->
%Tesla.Env{
status: 200,
body: first_page,
headers: [{"content-type", "application/activity+json"}]
}
%{
method: :get,
url: ^second_page_id
} ->
%Tesla.Env{
status: 200,
body: second_page,
headers: [{"content-type", "application/activity+json"}]
}
end)
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
end
test "it should stop fetching when we hit :max_collection_objects" do
clear_config([:activitypub, :max_collection_objects], 1)
unordered_collection =
"test/fixtures/collections/unordered_page_reference.json"
|> File.read!()
first_page =
"test/fixtures/collections/unordered_page_first.json"
|> File.read!()
second_page =
"test/fixtures/collections/unordered_page_second.json"
|> File.read!()
ap_id = "https://example.com/collection/unordered_page_reference"
first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
Tesla.Mock.mock(fn
%{
method: :get,
url: ^ap_id
} ->
%Tesla.Env{
status: 200,
body: unordered_collection,
headers: [{"content-type", "application/activity+json"}]
}
%{
method: :get,
url: ^first_page_id
} ->
%Tesla.Env{
status: 200,
body: first_page,
headers: [{"content-type", "application/activity+json"}]
}
%{
method: :get,
url: ^second_page_id
} ->
%Tesla.Env{
status: 200,
body: second_page,
headers: [{"content-type", "application/activity+json"}]
}
end)
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
assert [%{"type" => "Create"}] = objects
end
end