diff --git a/backend/config/test.exs b/backend/config/test.exs index d533a8f..c45f422 100644 --- a/backend/config/test.exs +++ b/backend/config/test.exs @@ -18,3 +18,5 @@ config :backend, Backend.Repo, pool: Ecto.Adapters.SQL.Sandbox config :appsignal, :config, active: false + +config :backend, :crawler, status_count_limit: 5 diff --git a/backend/lib/backend/crawler/crawlers/misskey.ex b/backend/lib/backend/crawler/crawlers/misskey.ex index 9ee734b..092d48f 100644 --- a/backend/lib/backend/crawler/crawlers/misskey.ex +++ b/backend/lib/backend/crawler/crawlers/misskey.ex @@ -98,7 +98,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do endpoint = "https://#{domain}/api/notes/local-timeline" params = %{ - limit: 20 + limit: 100 } params = @@ -110,7 +110,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do Logger.debug("Crawling #{endpoint} with untilId=#{until_id}") - statuses = http_client().post_and_decode!(endpoint, Jason.encode!(params)) + statuses = http_client().post_and_decode!(endpoint, params) filtered_statuses = statuses diff --git a/backend/lib/backend/http.ex b/backend/lib/backend/http.ex index 876de0f..7ad7db4 100644 --- a/backend/lib/backend/http.ex +++ b/backend/lib/backend/http.ex @@ -23,7 +23,7 @@ defmodule Backend.Http do ) do {:ok, %HTTPoison.Response{body: body, status_code: status_code}} when status_code >= 200 and status_code <= 299 -> - Jason.decode(body) + decode_body(body) {:ok, %HTTPoison.Response{body: body, status_code: status_code}} -> if not is_nil(default) do @@ -50,11 +50,18 @@ defmodule Backend.Http do end end + @doc """ + POSTs to the given URL with the given body and returns the JSON-decoded response. + The given body is JSON-encoded before sending. + """ @impl true - def post_and_decode(url, body \\ nil) do - case HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}]) do + def post_and_decode(url, body \\ %{}) do + case HTTPoison.post(url, Jason.encode!(body), [ + {"User-Agent", get_config(:user_agent)}, + {"Content-Type", "application/json"} + ]) do {:ok, %HTTPoison.Response{body: body}} -> - Jason.decode(body) + decode_body(body) {:error, %HTTPoison.Error{} = error} -> {:error, %Error{message: HTTPoison.Error.message(error)}} @@ -62,10 +69,25 @@ defmodule Backend.Http do end @impl true - def post_and_decode!(url, body \\ nil) do + def post_and_decode!(url, body \\ %{}) do case post_and_decode(url, body) do - {:ok, decoded} -> decoded - {:error, error} -> raise error + {:ok, decoded} -> + decoded + + {:error, error} -> + raise error + end + end + + defp decode_body(body) do + with {:ok, decoded} <- Jason.decode(body) do + if is_map(decoded) and (Map.has_key?(decoded, "errors") or Map.has_key?(decoded, "error")) do + {:error, %Error{message: "API error: " <> body}} + else + {:ok, decoded} + end + else + {:error, error} -> {:error, error} end end end diff --git a/backend/test/backend/crawler/crawlers/misskey_test.exs b/backend/test/backend/crawler/crawlers/misskey_test.exs new file mode 100644 index 0000000..0a971b8 --- /dev/null +++ b/backend/test/backend/crawler/crawlers/misskey_test.exs @@ -0,0 +1,68 @@ +defmodule Backend.Crawler.Crawlers.MisskeyTest do + use Backend.DataCase + + alias Backend.Crawler.Crawlers.Misskey + alias Backend.Crawler.ApiCrawler + import Mox + + setup :verify_on_exit! + + describe "is_instance_type?/2" do + test "returns true for misskey instance" do + expect(HttpMock, :post_and_decode, fn "https://example.com/api/meta" -> + {:ok, TestHelpers.load_json("misskey/meta.json")} + end) + + assert Misskey.is_instance_type?("example.com", nil) + end + end + + describe "crawl/2" do + test "does nothing for small instances" do + expect(HttpMock, :post_and_decode, fn "https://example.com/api/stats" -> + stats = + TestHelpers.load_json("misskey/stats.json") |> Map.merge(%{"originalUsersCount" => 1}) + + {:ok, stats} + end) + + result = Misskey.crawl("example.com", ApiCrawler.get_default()) + + assert result == ApiCrawler.get_default() |> Map.merge(%{type: :misskey, user_count: 1}) + end + + test "crawls large instances" do + expect(HttpMock, :post_and_decode, fn "https://example.com/api/stats" -> + {:ok, TestHelpers.load_json("misskey/stats.json")} + end) + + expect(HttpMock, :post_and_decode, fn "https://example.com/api/meta" -> + {:ok, TestHelpers.load_json("misskey/meta.json")} + end) + + expect(HttpMock, :get_and_decode, fn "https://example.com/api/v1/instance/peers" -> + {:ok, TestHelpers.load_json("misskey/peers.json")} + end) + + # status_count_limit is 5, response has 1 post per page, so we expect 5 requests + expect(HttpMock, :post_and_decode!, 5, fn "https://example.com/api/notes/local-timeline", + %{limit: 100} -> + TestHelpers.load_json("misskey/notes.json") + end) + + result = Misskey.crawl("example.com", ApiCrawler.get_default()) + + assert result == %{ + description: "some description", + federation_restrictions: [], + instance_type: :misskey, + interactions: %{}, + peers: ["other.com"], + status_count: 20, + statuses_seen: 5, + user_count: 20, + version: "13.12.2" + } + end + end +end diff --git a/backend/test/backend/crawler/crawlers/nodeinfo_test.exs b/backend/test/backend/crawler/crawlers/nodeinfo_test.exs index 92fe547..a6ca1f5 100644 --- a/backend/test/backend/crawler/crawlers/nodeinfo_test.exs +++ b/backend/test/backend/crawler/crawlers/nodeinfo_test.exs @@ -1,5 +1,5 @@ defmodule Backend.Crawler.Crawlers.NodeinfoTest do - use Backend.DataCase + use ExUnit.Case alias Backend.Crawler.Crawlers.Nodeinfo import Mox @@ -134,5 +134,25 @@ defmodule Backend.Crawler.Crawlers.NodeinfoTest do peers: [] } end + + test "handles non-200 response" do + expect(HttpMock, :get_and_decode, fn "https://mastodon.social/.well-known/nodeinfo" -> + {:error, %Backend.HttpBehaviour.Error{status_code: 401}} + end) + + result = Nodeinfo.crawl("mastodon.social", %{}) + + assert result == %{ + description: nil, + user_count: nil, + status_count: nil, + statuses_seen: 0, + instance_type: nil, + version: nil, + federation_restrictions: [], + interactions: %{}, + peers: [] + } + end end end diff --git a/backend/test/support/data/json/misskey/meta.json b/backend/test/support/data/json/misskey/meta.json new file mode 100644 index 0000000..7e05d14 --- /dev/null +++ b/backend/test/support/data/json/misskey/meta.json @@ -0,0 +1,170 @@ +{ + "maintainerName": "MisskeyHQ", + "maintainerEmail": "https://go.misskey.io/support", + "version": "13.12.2", + "name": "Misskey.io", + "uri": "https://misskey.io", + "description": "some description", + "langs": ["ja", "en", "zh", "ko", "fr", "de"], + "tosUrl": "http://go.misskey.io/tos", + "repositoryUrl": "https://github.com/syuilo/misskey", + "feedbackUrl": "https://github.com/syuilo/misskey/issues/new", + "disableRegistration": false, + "emailRequiredForSignup": true, + "enableHcaptcha": false, + "hcaptchaSiteKey": "95d75440-7e37-4419-a693-8f52c377f1c5", + "enableRecaptcha": false, + "recaptchaSiteKey": "6LfW8qQUAAAAAI_1WMThmcj6zO39laasAoEJHfFF", + "enableTurnstile": true, + "turnstileSiteKey": "0x4AAAAAAACJmZyh3LCvo-uf", + "swPublickey": "BHqCPVsCM8pMUo26Fenl6fuLPfuqQTNeo2Rpvt6KFxFEKznKAXZBHI2nk1aAanlJ1Me_PSr-MVkW3ho4RaYmZpk", + "themeColor": "#86b300", + "mascotImageUrl": "/assets/ai.png", + "bannerUrl": "https://s3.arkjp.net/misskey/65b25d3c-2ae4-474f-b1c0-050c8c8962e1.jpg", + "errorImageUrl": "https://s3.arkjp.net/misskey/94aab3c5-0b26-42a7-9fa9-83a69d7253cd.png", + "iconUrl": "https://s3.arkjp.net/misskey/webpublic-0c66b1ca-b8c0-4eaa-9827-47674f4a1580.png", + "backgroundImageUrl": "https://s3.arkjp.net/misskey/e23f6837-c477-4f40-bbc7-b8a06e3bc1cc.jpg", + "logoImageUrl": "https://s3.arkjp.net/misskey/31240fa8-98fa-4750-bfd4-767753d1c48d.png", + "maxNoteTextLength": 3000, + "defaultLightTheme": null, + "defaultDarkTheme": null, + "ads": [ + { + "id": "8riz9d7mt0", + "url": "http://go.misskey.io/nextdns", + "place": "horizontal", + "ratio": 3, + "imageUrl": "https://s3.arkjp.net/misskey/03992473-790d-4e50-9f70-f12ed1a5aabb.png" + }, + { + "id": "8rkte84ghf", + "url": "https://go.misskey.io/vultr", + "place": "horizontal", + "ratio": 3, + "imageUrl": "https://s3.arkjp.net/misskey/fa1421c3-fabc-4dbc-a688-52dfb7491660.webp" + }, + { + "id": "97crkngnt7", + "url": "https://go.misskey.io/ads", + "place": "horizontal", + "ratio": 1, + "imageUrl": "https://s3.arkjp.net/misskey/d85e0e31-522b-4779-8479-b7acb65c86dc.png" + }, + { + "id": "97crxz63al", + "url": "https://go.misskey.io/ads", + "place": "horizontal", + "ratio": 1, + "imageUrl": "https://s3.arkjp.net/misskey/d73e0b21-5910-42f7-9f1b-6983026ee1db.png" + }, + { + "id": "97cspskh3f", + "url": "https://go.misskey.io/ads", + "place": "horizontal", + "ratio": 1, + "imageUrl": "https://s3.arkjp.net/misskey/36fe91e6-5d11-4f12-8bc8-426ab0ebd885.png" + }, + { + "id": "9clihjru6p", + "url": "https://go.misskey.io/maZC", + "place": "horizontal", + "ratio": 40, + "imageUrl": "https://s3.arkjp.net/misskey/ee74935a-a1ec-4385-bb36-2377387118b8.png" + }, + { + "id": "9e5idmskrv", + "url": "https://go.misskey.io/LfNP", + "place": "horizontal-big", + "ratio": 20, + "imageUrl": "https://s3.arkjp.net/misskey/4e03fca5-b25f-4950-974a-313a7d958b6d.png" + }, + { + "id": "9eo9im87s6", + "url": "https://go.misskey.io/jHVi", + "place": "horizontal-big", + "ratio": 40, + "imageUrl": "https://s3.arkjp.net/misskey/7e903951-e7e1-4277-badf-0ea5bc9ab07a.png" + }, + { + "id": "9f00tkswhg", + "url": "https://go.misskey.io/wD6e", + "place": "horizontal-big", + "ratio": 20, + "imageUrl": "https://s3.arkjp.net/misskey/015a3335-b21e-4897-a958-d6879b2a82f1.png" + }, + { + "id": "9f3stos3s7", + "url": "https://go.misskey.io/iMxB", + "place": "horizontal-big", + "ratio": 40, + "imageUrl": "https://s3.arkjp.net/misskey/098ceb69-8238-4c3c-8f99-f9752294cb96.png" + }, + { + "id": "9fgxdm8mpd", + "url": "https://go.misskey.io/dwRP", + "place": "horizontal-big", + "ratio": 60, + "imageUrl": "https://s3.arkjp.net/misskey/c4650ddc-687e-46c0-932a-c1f5ca8c9f83.png" + }, + { + "id": "9fhdbdyevw", + "url": "https://go.misskey.io/pwYv", + "place": "horizontal", + "ratio": 40, + "imageUrl": "https://s3.arkjp.net/misskey/e2f6c692-0d16-4fe9-90c4-25eac1b31731.png" + }, + { + "id": "9fgjjfdr3s", + "url": "https://go.misskey.io/VwEm", + "place": "horizontal-big", + "ratio": 60, + "imageUrl": "https://s3.arkjp.net/misskey/951bda53-3707-480e-ab5a-0000ca9c7578.png" + }, + { + "id": "9fnpd9tsgs", + "url": "https://go.misskey.io/QwId", + "place": "horizontal-big", + "ratio": 80, + "imageUrl": "https://s3.arkjp.net/misskey/261f7323-7a5e-4734-92bc-6ad69a4226df.jpg" + }, + { + "id": "9fmru4ok7f", + "url": "https://go.misskey.io/pwYv", + "place": "horizontal-big", + "ratio": 20, + "imageUrl": "https://s3.arkjp.net/misskey/563a709e-6d5e-4952-9cb5-ac6897f80990.png" + }, + { + "id": "9ew2fhwyfc", + "url": "https://go.misskey.io/sjxJ", + "place": "horizontal-big", + "ratio": 60, + "imageUrl": "https://s3.arkjp.net/misskey/8dff6f2d-444f-459f-80ff-02cad454be91.png" + } + ], + "enableEmail": true, + "enableServiceWorker": true, + "translatorAvailable": true, + "serverRules": [], + "policies": { + "gtlAvailable": true, + "ltlAvailable": true, + "canPublicNote": true, + "canInvite": false, + "canManageCustomEmojis": false, + "canSearchNotes": false, + "canHideAds": false, + "driveCapacityMb": 10240, + "alwaysMarkNsfw": false, + "pinLimit": 3, + "antennaLimit": 5, + "wordMuteLimit": 200, + "webhookLimit": 3, + "clipLimit": 10, + "noteEachClipsLimit": 50, + "userListLimit": 5, + "userEachUserListsLimit": 20, + "rateLimitFactor": 2 + }, + "mediaProxy": "https://nos3.arkjp.net" +} diff --git a/backend/test/support/data/json/misskey/notes.json b/backend/test/support/data/json/misskey/notes.json new file mode 100644 index 0000000..f61387a --- /dev/null +++ b/backend/test/support/data/json/misskey/notes.json @@ -0,0 +1,33 @@ +[ + { + "id": "9ftvwz5kx8no7aeb", + "createdAt": "2023-06-10T18:18:57.656Z", + "userId": "9badbj0vp9", + "user": { + "id": "9badbj0vp9", + "name": "Some Name", + "username": "username", + "host": null, + "avatarUrl": "https://example.com/image.png", + "avatarBlurhash": "foobar", + "avatarColor": null, + "speakAsCat": true, + "emojis": [], + "onlineStatus": "online", + "driveCapacityOverrideMb": null + }, + "text": "My post", + "cw": null, + "visibility": "public", + "renoteCount": 0, + "repliesCount": 0, + "reactions": {}, + "reactionEmojis": [], + "emojis": [], + "tags": ["post"], + "fileIds": [], + "files": [], + "replyId": null, + "renoteId": null + } +] diff --git a/backend/test/support/data/json/misskey/peers.json b/backend/test/support/data/json/misskey/peers.json new file mode 100644 index 0000000..af04900 --- /dev/null +++ b/backend/test/support/data/json/misskey/peers.json @@ -0,0 +1 @@ +["other.com"] diff --git a/backend/test/support/data/json/misskey/stats.json b/backend/test/support/data/json/misskey/stats.json new file mode 100644 index 0000000..9d80c51 --- /dev/null +++ b/backend/test/support/data/json/misskey/stats.json @@ -0,0 +1,10 @@ +{ + "notesCount": 10, + "originalNotesCount": 20, + "usersCount": 10, + "originalUsersCount": 20, + "reactionsCount": 64569657, + "instances": 21184, + "driveUsageLocal": 0, + "driveUsageRemote": 0 +} diff --git a/backend/test/test_helper.exs b/backend/test/test_helper.exs index 292fc2d..287116a 100644 --- a/backend/test/test_helper.exs +++ b/backend/test/test_helper.exs @@ -3,3 +3,12 @@ Application.put_env(:backend, :http, HttpMock) ExUnit.start() Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, :manual) + +defmodule TestHelpers do + @spec load_json(String.t()) :: any() + def load_json(path) do + Path.join([__DIR__, "support", "data", "json", path]) + |> File.read!() + |> Jason.decode!() + end +end