fix nodeinfo and add basic tests

main
Tao Bojlén 2023-06-10 18:53:10 +01:00
rodzic 0970e39dea
commit 12b035780e
16 zmienionych plików z 295 dodań i 96 usunięć

Wyświetl plik

@ -16,10 +16,12 @@ config :backend, BackendWeb.Endpoint,
secret_key_base: System.get_env("SECRET_KEY_BASE"),
render_errors: [view: BackendWeb.ErrorView, accepts: ~w(json)]
config :backend, :http, Backend.Http
config :backend, Backend.Repo, queue_target: 5000
config :backend, Backend.Elasticsearch.Cluster,
url: "http://elastic:9200",
url: "http://localhost:9200",
api: Elasticsearch.API.HTTP,
json_library: Jason

Wyświetl plik

@ -31,9 +31,10 @@ defmodule Backend.Application do
]
children =
case Enum.member?(["true", 1, "1"], System.get_env("SKIP_CRAWL")) do
true -> children
false -> children ++ [Backend.Crawler.StaleInstanceManager]
if Enum.member?(["true", 1, "1"], System.get_env("SKIP_CRAWL")) or Mix.env() == :test do
children
else
children ++ [Backend.Crawler.StaleInstanceManager]
end
add_appsignal_probes()

Wyświetl plik

@ -117,8 +117,8 @@ defmodule Backend.Crawler do
try do
%Crawler{state | result: curr.crawl(domain, result), found_api?: true}
rescue
e in HTTPoison.Error ->
Map.put(state, :error, "HTTPoison error: " <> HTTPoison.Error.message(e))
e in Backend.HttpBehaviour.Error ->
Map.put(state, :error, "HTTP error: " <> e.message)
e in Jason.DecodeError ->
Map.put(state, :error, "Jason DecodeError: " <> Jason.DecodeError.message(e))

Wyświetl plik

@ -51,7 +51,7 @@ defmodule Backend.Crawler.Crawlers.Friendica do
|> Map.merge(nodeinfo_result)
peers =
case get_and_decode("https://#{domain}/poco/@server") do
case http_client().get_and_decode("https://#{domain}/poco/@server") do
{:ok, p} -> p
{:error, _err} -> []
end
@ -71,7 +71,7 @@ defmodule Backend.Crawler.Crawlers.Friendica do
end
defp get_statistics(domain) do
get_and_decode("https://#{domain}/statistics.json")
http_client().get_and_decode("https://#{domain}/statistics.json")
end
defp to_domain(url) do

Wyświetl plik

@ -14,7 +14,7 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
if nodeinfo_result != nil do
Map.get(nodeinfo_result, :instance_type) == :gnusocial
else
case get_and_decode("https://#{domain}/api/statuses/public_timeline.json") do
case http_client().get_and_decode("https://#{domain}/api/statuses/public_timeline.json") do
{:ok, statuses} -> is_list(statuses)
{:error, _other} -> false
end
@ -86,7 +86,7 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
Logger.debug("Crawling #{endpoint}")
statuses = get_and_decode!(endpoint)
statuses = http_client().get_and_decode!(endpoint)
# Filter to statuses that are in the correct timeframe
filtered_statuses =

Wyświetl plik

@ -21,7 +21,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
true -> false
end
else
case get_and_decode("https://#{domain}/api/v1/instance") do
case http_client().get_and_decode("https://#{domain}/api/v1/instance") do
{:ok, %{"title" => _title}} -> true
_other -> false
end
@ -41,7 +41,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
@impl ApiCrawler
def crawl(domain, nodeinfo) do
instance = get_and_decode!("https://#{domain}/api/v1/instance")
instance = http_client().get_and_decode!("https://#{domain}/api/v1/instance")
user_count = get_in(instance, ["stats", "user_count"])
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
@ -117,7 +117,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
Logger.debug("Crawling #{endpoint}")
statuses = get_and_decode!(endpoint)
statuses = http_client().get_and_decode!(endpoint)
filtered_statuses =
statuses
@ -161,7 +161,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
defp get_peers(domain) do
# servers may not publish peers
case get_and_decode("https://#{domain}/api/v1/instance/peers") do
case http_client().get_and_decode("https://#{domain}/api/v1/instance/peers") do
{:ok, peers} -> peers
{:error, _err} -> []
end

Wyświetl plik

@ -3,6 +3,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
Crawler for Misskey servers.
"""
alias Backend.Crawler.ApiCrawler
alias Backend.Http
@behaviour ApiCrawler
import Backend.Crawler.Util
@ -37,7 +38,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
@impl ApiCrawler
def crawl(domain, nodeinfo) do
with {:ok, %{"originalUsersCount" => user_count, "originalNotesCount" => status_count}} <-
post_and_decode("https://#{domain}/api/stats") do
http_client().post_and_decode("https://#{domain}/api/stats") do
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
Map.merge(nodeinfo, crawl_large_instance(domain, user_count, status_count))
else
@ -109,7 +110,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
Logger.debug("Crawling #{endpoint} with untilId=#{until_id}")
statuses = post_and_decode!(endpoint, Jason.encode!(params))
statuses = http_client().post_and_decode!(endpoint, Jason.encode!(params))
filtered_statuses =
statuses
@ -153,9 +154,9 @@ defmodule Backend.Crawler.Crawlers.Misskey do
end
@spec get_version_and_description(String.t()) ::
{:ok, {String.t(), String.t()}} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
{:ok, {String.t(), String.t()}} | {:error, Jason.DecodeError.t() | Http.Error.t()}
defp get_version_and_description(domain) do
case post_and_decode("https://#{domain}/api/meta") do
case http_client().post_and_decode("https://#{domain}/api/meta") do
{:ok, %{"version" => version, "description" => description}} ->
{:ok, {version, description}}
@ -166,7 +167,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
@spec get_peers(String.t()) :: {:ok, [String.t()]} | {:error, Jason.DecodeError.t()}
defp get_peers(domain) do
case get_and_decode("https://#{domain}/api/v1/instance/peers") do
case http_client().get_and_decode("https://#{domain}/api/v1/instance/peers") do
{:ok, peers} -> {:ok, peers}
{:error, _} -> {:ok, []}
end

Wyświetl plik

@ -5,6 +5,7 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
"""
alias Backend.Crawler.ApiCrawler
alias Backend.Http
require Logger
import Backend.Util
import Backend.Crawler.Util
@ -13,7 +14,7 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
@impl ApiCrawler
def allows_crawling?(domain) do
[
".well-known/nodeinfo"
"/.well-known/nodeinfo"
]
|> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end)
|> urls_are_crawlable?()
@ -36,26 +37,40 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
end
@spec get_nodeinfo_url(String.t()) ::
{:ok, String.t()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
{:ok, String.t()} | {:error, Jason.DecodeError.t() | Http.Error.t() | :invalid_body}
defp get_nodeinfo_url(domain) do
case get_and_decode("https://#{domain}/.well-known/nodeinfo") do
{:ok, response} -> {:ok, process_nodeinfo_url(response)}
{:error, err} -> {:error, err}
with {:ok, response} <-
http_client().get_and_decode("https://#{domain}/.well-known/nodeinfo"),
{:ok, nodeinfo_url} <- process_nodeinfo_url(response) do
{:ok, nodeinfo_url}
else
{:error, error} -> {:error, error}
:error -> {:error, :invalid_body}
end
end
@spec process_nodeinfo_url(any()) :: String.t()
@spec process_nodeinfo_url(any()) :: {:ok, String.t()} | :error
defp process_nodeinfo_url(response) do
response
|> Map.get("links")
|> Enum.filter(fn %{"rel" => rel} -> is_compatible_nodeinfo_version?(rel) end)
|> Kernel.hd()
|> Map.get("href")
links =
response
|> Map.get("links", [])
|> Enum.filter(fn %{"rel" => rel} -> is_compatible_nodeinfo_version?(rel) end)
if Enum.empty?(links) do
:error
else
href =
links
|> Kernel.hd()
|> Map.get("href")
{:ok, href}
end
end
@spec get_nodeinfo(String.t()) :: ApiCrawler.t()
defp get_nodeinfo(nodeinfo_url) do
case get_and_decode(nodeinfo_url) do
case http_client().get_and_decode(nodeinfo_url) do
{:ok, nodeinfo} -> {:ok, process_nodeinfo(nodeinfo)}
{:error, err} -> {:error, err}
end

Wyświetl plik

@ -0,0 +1,71 @@
defmodule Backend.Http do
@moduledoc """
A wrapper around HTTPoison. Using this wrapper makes it easy for us
to mock web responses in tests, and we can easily switch out HTTPoison for
another library if we want to.
"""
@behaviour Backend.HttpBehaviour
alias Backend.HttpBehaviour.Error
import Backend.Util
@doc """
GETs from the given URL and returns the JSON-decoded response.
If the response is unsuccessful and a default value is given, this returns the default value.
Otherwise, unsuccessful responses return an error.
"""
@impl true
def get_and_decode(url, pool \\ :default, timeout \\ 15_000, default \\ nil) do
case HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: pool],
recv_timeout: timeout,
timeout: timeout
) do
{:ok, %HTTPoison.Response{body: body, status_code: status_code}}
when status_code >= 200 and status_code <= 299 ->
Jason.decode(body)
{:ok, %HTTPoison.Response{body: body, status_code: status_code}} ->
if not is_nil(default) do
{:ok, default}
else
{:error,
%Error{
message: "HTTP request failed with status code #{status_code}",
status_code: status_code,
body: body
}}
end
{:error, %HTTPoison.Error{} = error} ->
{:error, %Error{message: HTTPoison.Error.message(error)}}
end
end
@impl true
def get_and_decode!(url, pool \\ :default, timeout \\ 15_000, default \\ nil) do
case get_and_decode(url, pool, timeout, default) do
{:ok, decoded} -> decoded
{:error, error} -> raise error
end
end
@impl true
def post_and_decode(url, body \\ nil) do
case HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}]) do
{:ok, %HTTPoison.Response{body: body}} ->
Jason.decode(body)
{:error, %HTTPoison.Error{} = error} ->
{:error, %Error{message: HTTPoison.Error.message(error)}}
end
end
@impl true
def post_and_decode!(url, body \\ nil) do
case post_and_decode(url, body) do
{:ok, decoded} -> decoded
{:error, error} -> raise error
end
end
end

Wyświetl plik

@ -0,0 +1,23 @@
defmodule Backend.HttpBehaviour do
@moduledoc """
This module defines the behavior for HTTP requests.
"""
defmodule Error do
defstruct message: nil, status_code: nil, body: nil
@type t :: %__MODULE__{message: String.t(), status_code: integer | nil, body: term | nil}
end
@type response :: {:ok, Response.t()} | {:error, __MODULE__.Error.t() | Jason.DecodeError.t()}
@callback get_and_decode(String.t()) :: response
@callback get_and_decode(String.t(), Atom.t(), Integer.t(), any()) :: response
@callback get_and_decode!(String.t()) :: Response.t()
@callback get_and_decode!(String.t(), Atom.t(), Integer.t(), any()) :: Response.t()
@callback post_and_decode(String.t()) :: response()
@callback post_and_decode(String.t(), String.t()) :: response()
@callback post_and_decode!(String.t()) :: Response.t()
@callback post_and_decode!(String.t(), String.t()) :: Response.t()
end

Wyświetl plik

@ -143,69 +143,12 @@ defmodule Backend.Util do
map |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
end
@doc """
Gets and decodes a HTTP response.
"""
@spec get_and_decode(String.t(), Atom.t(), Integer.t()) ::
{:ok, any()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
def get_and_decode(url, pool \\ :crawler, timeout \\ 15_000) do
case HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: pool],
recv_timeout: timeout,
timeout: timeout
) do
{:ok, %{status_code: 200, body: body}} ->
Jason.decode(body)
{:ok, %{status_code: 401}} ->
Jason.decode("[]")
{:ok, %{status_code: 404}} ->
Jason.decode("[]")
{:ok, %{body: body}} ->
{:error, %HTTPoison.Error{reason: "Non-200 response. Body: #{body}"}}
{:error, err} ->
{:error, err}
end
end
@spec get_and_decode!(String.t()) :: any()
def get_and_decode!(url) do
case get_and_decode(url) do
{:ok, decoded} -> decoded
{:error, error} -> raise error
end
end
@doc """
POSTS to a HTTP endpoint and decodes the JSON response.
"""
@spec post_and_decode(String.t(), String.t()) ::
{:ok, any()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
def post_and_decode(url, body \\ "") do
case HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15_000,
timeout: 15_000
) do
{:ok, %{status_code: 200, body: response_body}} -> Jason.decode(response_body)
{:ok, _} -> {:error, %HTTPoison.Error{reason: "Non-200 response"}}
{:error, err} -> {:error, err}
end
end
@spec post_and_decode!(String.t(), String.t()) :: any()
def post_and_decode!(url, body \\ "") do
case post_and_decode(url, body) do
{:ok, decoded} -> decoded
{:error, error} -> raise error
end
end
@spec is_valid_domain?(String.t()) :: boolean
def is_valid_domain?(domain) do
Regex.match?(~r/^[\pL\d\.\-_]+\.[a-zA-Z]+$/, domain)
end
def http_client() do
Application.get_env(:backend, :http, Backend.Http)
end
end

Wyświetl plik

@ -5,7 +5,7 @@ defmodule BackendWeb.AdminLoginController do
alias Backend.Mailer.UserEmail
alias Mastodon.Messenger
action_fallback BackendWeb.FallbackController
action_fallback(BackendWeb.FallbackController)
@doc """
Given an instance, looks up the login types (email or admin account) and returns them. The user can then
@ -24,7 +24,7 @@ defmodule BackendWeb.AdminLoginController do
[error: "It is only possible to administer Mastodon and Pleroma instances."]
true ->
case get_and_decode("https://#{cleaned_domain}/api/v1/instance") do
case http_client().get_and_decode("https://#{cleaned_domain}/api/v1/instance") do
{:ok, instance_data} ->
[instance_data: instance_data, cleaned_domain: cleaned_domain]
@ -40,7 +40,7 @@ defmodule BackendWeb.AdminLoginController do
cleaned_domain = clean_domain(domain)
{data_state, instance_data} =
get_and_decode("https://#{cleaned_domain}/api/v1/instance",
http_client().get_and_decode("https://#{cleaned_domain}/api/v1/instance",
pool: :admin_login,
timeout: 20_000
)

Wyświetl plik

@ -72,7 +72,8 @@ defmodule Backend.MixProject do
{:scrivener_ecto, "~> 2.2"},
{:recase, "~> 0.7"},
{:ex_rated, "~> 2.1"},
{:html_sanitize_ex, "~> 1.4"}
{:html_sanitize_ex, "~> 1.4"},
{:mox, "~> 1.0", only: [:test]}
]
end

Wyświetl plik

@ -42,6 +42,7 @@
"mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"},
"mochiweb": {:hex, :mochiweb, "2.22.0", "f104d6747c01a330c38613561977e565b788b9170055c5241ac9dd6e4617cba5", [:rebar3], [], "hexpm", "cbbd1fd315d283c576d1c8a13e0738f6dafb63dc840611249608697502a07655"},
"mox": {:hex, :mox, "1.0.2", "dc2057289ac478b35760ba74165b4b3f402f68803dd5aecd3bfd19c183815d64", [:mix], [], "hexpm", "f9864921b3aaf763c8741b5b8e6f908f44566f1e427b2630e89e9a73b981fef2"},
"nebulex": {:hex, :nebulex, "2.4.2", "b3d2d86d57b15896fb8e6d6dd49b4a9dee2eedd6eddfb3b69bfdb616a09c2817", [:mix], [{:decorator, "~> 1.4", [hex: :decorator, repo: "hexpm", optional: true]}, {:shards, "~> 1.0", [hex: :shards, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "c9f888e5770fd47614c95990d0a02c3515216d51dc72e3c830eaf28f5649ba52"},
"parse_trans": {:hex, :parse_trans, "3.3.1", "16328ab840cc09919bd10dab29e431da3af9e9e7e7e6f0089dd5a2d2820011d8", [:rebar3], [], "hexpm", "07cd9577885f56362d414e8c4c4e6bdf10d43a8767abb92d24cbe8b24c54888b"},
"phoenix": {:hex, :phoenix, "1.7.3", "4d8eca2c020c9ed81a28e7a8c60e0a4f6f9f7f6e12eb91dfd01301eac07424c1", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.4", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "6b1bc308758f95ecf3e0d795389440a2ca88a903e0fda1f921c780918c16d640"},

Wyświetl plik

@ -0,0 +1,138 @@
defmodule Backend.Crawler.Crawlers.NodeinfoTest do
use Backend.DataCase
alias Backend.Crawler.Crawlers.Nodeinfo
import Mox
setup :verify_on_exit!
describe "crawl/2" do
test "handles valid nodeinfo" do
expect(HttpMock, :get_and_decode, fn "https://mastodon.social/.well-known/nodeinfo" ->
{:ok,
%{
"links" => [
%{
"rel" => "http://nodeinfo.diaspora.software/ns/schema/2.0",
"href" => "https://mastodon.social/nodeinfo/2.0"
}
]
}}
end)
expect(HttpMock, :get_and_decode, fn "https://mastodon.social/nodeinfo/2.0" ->
{:ok,
%{
"version" => "2.0",
"software" => %{
"name" => "Mastodon",
"version" => "1.2.3"
},
"protocols" => ["activitypub"],
"services" => %{
"inbound" => [],
"outbound" => []
},
"usage" => %{
"users" => %{
"total" => 100,
"activeMonth" => 1,
"activeHalfYear" => 2
},
"localPosts" => 3
},
"openRegistrations" => true,
"metadata" => %{}
}}
end)
result = Nodeinfo.crawl("mastodon.social", %{})
assert result == %{
description: nil,
user_count: 100,
status_count: 3,
statuses_seen: 0,
instance_type: :mastodon,
version: "1.2.3",
federation_restrictions: [],
interactions: %{},
peers: []
}
end
test "handles small instances" do
expect(HttpMock, :get_and_decode, fn "https://mastodon.social/.well-known/nodeinfo" ->
{:ok,
%{
"links" => [
%{
"rel" => "http://nodeinfo.diaspora.software/ns/schema/2.0",
"href" => "https://mastodon.social/nodeinfo/2.0"
}
]
}}
end)
expect(HttpMock, :get_and_decode, fn "https://mastodon.social/nodeinfo/2.0" ->
{:ok,
%{
"version" => "2.0",
"software" => %{
"name" => "Mastodon",
"version" => "1.2.3"
},
"protocols" => ["activitypub"],
"services" => %{
"inbound" => [],
"outbound" => []
},
"usage" => %{
"users" => %{
"total" => 1,
"activeMonth" => 1,
"activeHalfYear" => 1
},
"localPosts" => 3
},
"openRegistrations" => true,
"metadata" => %{}
}}
end)
result = Nodeinfo.crawl("mastodon.social", %{})
assert result == %{
description: nil,
user_count: 1,
status_count: nil,
statuses_seen: 0,
instance_type: nil,
version: nil,
federation_restrictions: [],
interactions: %{},
peers: []
}
end
test "handles missing nodeinfo" do
expect(HttpMock, :get_and_decode, fn "https://mastodon.social/.well-known/nodeinfo" ->
{:ok, %{}}
end)
result = Nodeinfo.crawl("mastodon.social", %{})
assert result == %{
description: nil,
user_count: nil,
status_count: nil,
statuses_seen: 0,
instance_type: nil,
version: nil,
federation_restrictions: [],
interactions: %{},
peers: []
}
end
end
end

Wyświetl plik

@ -1,2 +1,5 @@
Mox.defmock(HttpMock, for: Backend.HttpBehaviour)
Application.put_env(:backend, :http, HttpMock)
ExUnit.start()
Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, :manual)