kopia lustrzana https://github.com/fediversespace/fediverse.space
add mastodon tests, handle auth'd timelines
rodzic
8f4193e43f
commit
a4eaf75c70
|
@ -14,6 +14,9 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
|||
# We might already know that this is a Pleroma instance from nodeinfo
|
||||
if result != nil do
|
||||
cond do
|
||||
# for pleroma and smithereen, the instance_type will get overwritten
|
||||
# with the correct value -- but we still want to return true here
|
||||
# since they are compatible with the mastodon API
|
||||
Map.get(result, :instance_type) == :pleroma -> true
|
||||
Map.get(result, :instance_type) == :smithereen -> true
|
||||
Map.get(result, :instance_type) == :mastodon -> true
|
||||
|
@ -97,16 +100,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
|||
interactions \\ %{},
|
||||
statuses_seen \\ 0
|
||||
) do
|
||||
# If `statuses_seen == 0`, it's the first call of this function, which means we want to query the database for the
|
||||
# most recent status we have.
|
||||
min_timestamp =
|
||||
if statuses_seen == 0 do
|
||||
get_last_crawl_timestamp(domain)
|
||||
else
|
||||
min_timestamp
|
||||
end
|
||||
|
||||
endpoint = "https://#{domain}/api/v1/timelines/public?local=true"
|
||||
endpoint = "https://#{domain}/api/v1/timelines/public?local=true&limit=40"
|
||||
|
||||
endpoint =
|
||||
if max_id do
|
||||
|
@ -117,7 +111,26 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
|||
|
||||
Logger.debug("Crawling #{endpoint}")
|
||||
|
||||
statuses = http_client().get_and_decode!(endpoint)
|
||||
case http_client().get_and_decode(endpoint) do
|
||||
{:ok, statuses} ->
|
||||
handle_statuses(statuses, domain, min_timestamp, interactions, statuses_seen)
|
||||
|
||||
# if there's an error (e.g. because the timeline prevents unauthenticated access)
|
||||
# then stop here
|
||||
{:error, _} ->
|
||||
{interactions, statuses_seen}
|
||||
end
|
||||
end
|
||||
|
||||
defp handle_statuses(statuses, domain, min_timestamp, interactions, statuses_seen) do
|
||||
# If `statuses_seen == 0`, it's the first call of this function, which means we want to query the database for the
|
||||
# most recent status we have.
|
||||
min_timestamp =
|
||||
if statuses_seen == 0 do
|
||||
get_last_crawl_timestamp(domain)
|
||||
else
|
||||
min_timestamp
|
||||
end
|
||||
|
||||
filtered_statuses =
|
||||
statuses
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
defmodule Backend.Crawler.Crawlers.MastodonTest do
|
||||
use Backend.DataCase
|
||||
|
||||
alias Backend.Crawler.Crawlers.Mastodon
|
||||
alias Backend.Crawler.ApiCrawler
|
||||
alias Backend.HttpBehaviour
|
||||
import Mox
|
||||
|
||||
setup :verify_on_exit!
|
||||
|
||||
describe "is_instance_type?/2" do
|
||||
test "returns true for pleroma and smithereen" do
|
||||
assert Mastodon.is_instance_type?("example.com", %{instance_type: :pleroma})
|
||||
assert Mastodon.is_instance_type?("example.com", %{instance_type: :smithereen})
|
||||
end
|
||||
|
||||
test "returns true for mastodon instance" do
|
||||
expect(HttpMock, :get_and_decode, fn "https://example.com/api/v1/instance" ->
|
||||
{:ok, TestHelpers.load_json("mastodon/instance.json")}
|
||||
end)
|
||||
|
||||
assert Mastodon.is_instance_type?("example.com", nil)
|
||||
end
|
||||
end
|
||||
|
||||
describe "crawl/2" do
|
||||
test "does nothing for small instances" do
|
||||
expect(HttpMock, :get_and_decode!, fn "https://example.com/api/v1/instance" ->
|
||||
TestHelpers.load_json("mastodon/instance.json")
|
||||
|> Map.merge(%{"stats" => %{"user_count" => 1}})
|
||||
end)
|
||||
|
||||
result = Mastodon.crawl("example.com", ApiCrawler.get_default())
|
||||
|
||||
assert result ==
|
||||
ApiCrawler.get_default() |> Map.merge(%{instance_type: :mastodon, user_count: 1})
|
||||
end
|
||||
|
||||
test "crawls large instance" do
|
||||
expect(HttpMock, :get_and_decode!, fn "https://example.com/api/v1/instance" ->
|
||||
TestHelpers.load_json("mastodon/instance.json")
|
||||
end)
|
||||
|
||||
expect(HttpMock, :get_and_decode, fn "https://example.com/api/v1/instance/peers" ->
|
||||
{:ok, TestHelpers.load_json("mastodon/peers.json")}
|
||||
end)
|
||||
|
||||
expect(
|
||||
HttpMock,
|
||||
:get_and_decode,
|
||||
fn "https://example.com/api/v1/timelines/public?local=true&limit=40" ->
|
||||
{:ok, TestHelpers.load_json("mastodon/timeline.json")}
|
||||
end
|
||||
)
|
||||
|
||||
expect(
|
||||
HttpMock,
|
||||
:get_and_decode,
|
||||
4,
|
||||
fn "https://example.com/api/v1/timelines/public?local=true&limit=40&max_id=123" ->
|
||||
{:ok, TestHelpers.load_json("mastodon/timeline.json")}
|
||||
end
|
||||
)
|
||||
|
||||
result = Mastodon.crawl("example.com", ApiCrawler.get_default())
|
||||
|
||||
assert result == %{
|
||||
description: "long description",
|
||||
federation_restrictions: [],
|
||||
instance_type: :mastodon,
|
||||
interactions: %{},
|
||||
peers: ["other.com"],
|
||||
user_count: 100,
|
||||
status_count: 100,
|
||||
statuses_seen: 5,
|
||||
version: "1.2.3"
|
||||
}
|
||||
end
|
||||
|
||||
test "handles timelines that require auth" do
|
||||
expect(HttpMock, :get_and_decode!, fn "https://example.com/api/v1/instance" ->
|
||||
TestHelpers.load_json("mastodon/instance.json")
|
||||
end)
|
||||
|
||||
expect(HttpMock, :get_and_decode, fn "https://example.com/api/v1/instance/peers" ->
|
||||
{:ok, TestHelpers.load_json("mastodon/peers.json")}
|
||||
end)
|
||||
|
||||
expect(
|
||||
HttpMock,
|
||||
:get_and_decode,
|
||||
fn "https://example.com/api/v1/timelines/public?local=true&limit=40" ->
|
||||
{:error,
|
||||
%HttpBehaviour.Error{
|
||||
message: "HTTP request failed with status code 422",
|
||||
status_code: 422,
|
||||
body: "{\"error\":\"This method requires an authenticated user\"}"
|
||||
}}
|
||||
end
|
||||
)
|
||||
|
||||
result = Mastodon.crawl("example.com", ApiCrawler.get_default())
|
||||
|
||||
assert result == %{
|
||||
description: "long description",
|
||||
federation_restrictions: [],
|
||||
instance_type: :mastodon,
|
||||
interactions: %{},
|
||||
peers: ["other.com"],
|
||||
user_count: 100,
|
||||
status_count: 100,
|
||||
statuses_seen: 0,
|
||||
version: "1.2.3"
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,137 @@
|
|||
{
|
||||
"uri": "mastodon.social",
|
||||
"title": "Mastodon",
|
||||
"short_description": "short description",
|
||||
"description": "long description",
|
||||
"email": "staff@mastodon.social",
|
||||
"version": "1.2.3",
|
||||
"urls": {
|
||||
"streaming_api": "wss://streaming.mastodon.social"
|
||||
},
|
||||
"stats": {
|
||||
"user_count": 100,
|
||||
"status_count": 100,
|
||||
"domain_count": 55958
|
||||
},
|
||||
"thumbnail": "https://files.mastodon.social/site_uploads/files/000/000/001/@1x/57c12f441d083cde.png",
|
||||
"languages": ["en"],
|
||||
"registrations": true,
|
||||
"approval_required": false,
|
||||
"invites_enabled": true,
|
||||
"configuration": {
|
||||
"accounts": {
|
||||
"max_featured_tags": 10
|
||||
},
|
||||
"statuses": {
|
||||
"max_characters": 500,
|
||||
"max_media_attachments": 4,
|
||||
"characters_reserved_per_url": 23
|
||||
},
|
||||
"media_attachments": {
|
||||
"supported_mime_types": [
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/gif",
|
||||
"image/heic",
|
||||
"image/heif",
|
||||
"image/webp",
|
||||
"image/avif",
|
||||
"video/webm",
|
||||
"video/mp4",
|
||||
"video/quicktime",
|
||||
"video/ogg",
|
||||
"audio/wave",
|
||||
"audio/wav",
|
||||
"audio/x-wav",
|
||||
"audio/x-pn-wave",
|
||||
"audio/vnd.wave",
|
||||
"audio/ogg",
|
||||
"audio/vorbis",
|
||||
"audio/mpeg",
|
||||
"audio/mp3",
|
||||
"audio/webm",
|
||||
"audio/flac",
|
||||
"audio/aac",
|
||||
"audio/m4a",
|
||||
"audio/x-m4a",
|
||||
"audio/mp4",
|
||||
"audio/3gpp",
|
||||
"video/x-ms-asf"
|
||||
],
|
||||
"image_size_limit": 16777216,
|
||||
"image_matrix_limit": 33177600,
|
||||
"video_size_limit": 103809024,
|
||||
"video_frame_rate_limit": 120,
|
||||
"video_matrix_limit": 8294400
|
||||
},
|
||||
"polls": {
|
||||
"max_options": 4,
|
||||
"max_characters_per_option": 50,
|
||||
"min_expiration": 300,
|
||||
"max_expiration": 2629746
|
||||
}
|
||||
},
|
||||
"contact_account": {
|
||||
"id": "13179",
|
||||
"username": "Mastodon",
|
||||
"acct": "Mastodon",
|
||||
"display_name": "Mastodon",
|
||||
"locked": false,
|
||||
"bot": false,
|
||||
"discoverable": true,
|
||||
"group": false,
|
||||
"created_at": "2016-11-23T00:00:00.000Z",
|
||||
"note": "<p>Official account of the Mastodon project. News, releases, announcements! Learn more on our website!</p>",
|
||||
"url": "https://mastodon.social/@Mastodon",
|
||||
"avatar": "https://files.mastodon.social/accounts/avatars/000/013/179/original/b4ceb19c9c54ec7e.png",
|
||||
"avatar_static": "https://files.mastodon.social/accounts/avatars/000/013/179/original/b4ceb19c9c54ec7e.png",
|
||||
"header": "https://files.mastodon.social/accounts/headers/000/013/179/original/878f382e7dd9fb84.png",
|
||||
"header_static": "https://files.mastodon.social/accounts/headers/000/013/179/original/878f382e7dd9fb84.png",
|
||||
"followers_count": 778859,
|
||||
"following_count": 8,
|
||||
"statuses_count": 237,
|
||||
"last_status_at": "2023-05-13",
|
||||
"noindex": false,
|
||||
"emojis": [],
|
||||
"roles": [],
|
||||
"fields": [
|
||||
{
|
||||
"name": "Homepage",
|
||||
"value": "<a href=\"https://joinmastodon.org\" target=\"_blank\" rel=\"nofollow noopener noreferrer me\"><span class=\"invisible\">https://</span><span class=\"\">joinmastodon.org</span><span class=\"invisible\"></span></a>",
|
||||
"verified_at": "2018-10-31T04:11:00.076+00:00"
|
||||
},
|
||||
{
|
||||
"name": "Patreon",
|
||||
"value": "<a href=\"https://patreon.com/mastodon\" target=\"_blank\" rel=\"nofollow noopener noreferrer me\"><span class=\"invisible\">https://</span><span class=\"\">patreon.com/mastodon</span><span class=\"invisible\"></span></a>",
|
||||
"verified_at": null
|
||||
},
|
||||
{
|
||||
"name": "GitHub",
|
||||
"value": "<a href=\"https://github.com/mastodon\" target=\"_blank\" rel=\"nofollow noopener noreferrer me\"><span class=\"invisible\">https://</span><span class=\"\">github.com/mastodon</span><span class=\"invisible\"></span></a>",
|
||||
"verified_at": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"rules": [
|
||||
{
|
||||
"id": "1",
|
||||
"text": "Sexually explicit or violent media must be marked as sensitive when posting"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"text": "No racism, sexism, homophobia, transphobia, xenophobia, or casteism"
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"text": "No incitement of violence or promotion of violent ideologies"
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"text": "No harassment, dogpiling or doxxing of other users"
|
||||
},
|
||||
{
|
||||
"id": "7",
|
||||
"text": "Do not share intentionally false or misleading information"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
["other.com"]
|
|
@ -0,0 +1,55 @@
|
|||
[
|
||||
{
|
||||
"id": "123",
|
||||
"created_at": "2023-06-10T18:59:36.207Z",
|
||||
"in_reply_to_id": null,
|
||||
"in_reply_to_account_id": null,
|
||||
"sensitive": false,
|
||||
"spoiler_text": "",
|
||||
"visibility": "public",
|
||||
"language": "de",
|
||||
"uri": "https://mastodon.social/users/someuser/statuses/110521455489577427",
|
||||
"url": "https://mastodon.social/@someuser/110521455489577427",
|
||||
"replies_count": 0,
|
||||
"reblogs_count": 0,
|
||||
"favourites_count": 0,
|
||||
"edited_at": null,
|
||||
"content": "<p>New post</p>",
|
||||
"reblog": null,
|
||||
"application": {
|
||||
"name": "IFTTT",
|
||||
"website": "https://www.ifttt.com"
|
||||
},
|
||||
"account": {
|
||||
"id": "108265572384945996",
|
||||
"username": "someuser",
|
||||
"acct": "someuser",
|
||||
"display_name": "Some User",
|
||||
"locked": false,
|
||||
"bot": false,
|
||||
"discoverable": true,
|
||||
"group": false,
|
||||
"created_at": "2022-05-08T00:00:00.000Z",
|
||||
"note": "<p>My account</p>",
|
||||
"url": "https://mastodon.social/@someuser",
|
||||
"avatar": "https://example.com/picture.jpg",
|
||||
"avatar_static": "https://example.com/picture.jpg",
|
||||
"header": "https://example.com/picture.jpg",
|
||||
"header_static": "https://example.com/picture.jpg",
|
||||
"followers_count": 7,
|
||||
"following_count": 73,
|
||||
"statuses_count": 256,
|
||||
"last_status_at": "2023-06-10",
|
||||
"noindex": false,
|
||||
"emojis": [],
|
||||
"roles": [],
|
||||
"fields": []
|
||||
},
|
||||
"media_attachments": [],
|
||||
"mentions": [],
|
||||
"tags": [],
|
||||
"emojis": [],
|
||||
"card": {},
|
||||
"poll": null
|
||||
}
|
||||
]
|
Ładowanie…
Reference in New Issue