fediverse.space/backend/lib/backend/crawler/api_crawler.ex

defmodule Backend.Crawler.ApiCrawler do
  @moduledoc """
  This module is a specification. Crawlers for all instance types must implement its behaviour.

  Make sure to respect the following:
  * You must adhere to the following configuration values:
    * `:status_age_limit_days` specifies that you must only crawl statuses from the most recent N days
    * `:status_count_limit` specifies the max number of statuses to crawl in one go
    * `:personal_instance_threshold` specifies that instances with fewer than this number of users should not be crawled (unless :opt_in is true)
  * profiles with the string "nobot" (case insensitive) in their profile must not be included in any stats
  * Make sure to check the most recent crawl of the instance so you don't re-crawl old statuses
  """

  alias Backend.Crawler.Crawlers.Nodeinfo

  # {domain_mentioned, count}
  @type instance_interactions :: %{String.t() => integer}
  # {domain, type} e.g. {"gab.com", "reject"}
  @type federation_restriction :: {String.t(), String.t()}

  @type instance_type ::
          :mastodon | :pleroma | :gab | :misskey | :gnusocial | :smithereen | :friendica

  defstruct [
    :version,
    :description,
    :user_count,
    :status_count,
    :peers,
    :interactions,
    :statuses_seen,
    :instance_type,
    :federation_restrictions
  ]

  @type t() :: %__MODULE__{
          version: String.t() | nil,
          description: String.t() | nil,
          user_count: integer | nil,
          status_count: integer | nil,
          peers: [String.t()],
          interactions: instance_interactions,
          statuses_seen: integer,
          instance_type: instance_type | nil,
          federation_restrictions: [federation_restriction]
        }

  @empty_result %{
    version: nil,
    description: nil,
    user_count: nil,
    status_count: nil,
    peers: [],
    interactions: %{},
    statuses_seen: 0,
    instance_type: nil,
    federation_restrictions: []
  }

  @doc """
  Check whether the instance at the given domain is of the type that this ApiCrawler implements.
  Arguments are the instance domain and the nodeinfo results.
  """
  @callback is_instance_type?(String.t(), ApiCrawler.t()) :: boolean()

  @doc """
  Check whether the instance allows crawling according to its robots.txt or otherwise.
  """
  @callback allows_crawling?(String.t()) :: boolean()

  @doc """
  Crawl the instance at the given domain.
  Takes two arguments: the domain to crawl and the existing results (from nodeinfo).
  """
  @callback crawl(String.t(), Nodeinfo.t()) :: t()

  @doc """
  Returns the default, empty state
  """
  def get_default do
    @empty_result
  end
end
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`defmodule Backend.Crawler.ApiCrawler do`
			`@moduledoc """`
			`This module is a specification. Crawlers for all instance types must implement its behaviour.`

			`Make sure to respect the following:`
			`* You must adhere to the following configuration values:`
			* `:status_age_limit_days` specifies that you must only crawl statuses from the most recent N days
			* `:status_count_limit` specifies the max number of statuses to crawl in one go
feature/administration 2019-07-26 14:34:23 +00:00			* `:personal_instance_threshold` specifies that instances with fewer than this number of users should not be crawled (unless :opt_in is true)
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`* profiles with the string "nobot" (case insensitive) in their profile must not be included in any stats`
			`* Make sure to check the most recent crawl of the instance so you don't re-crawl old statuses`
			`"""`

Revert "add metadata endpoint" This reverts commit 82153b283b3be8e7a48da92a6d02d05ef28e98c5. 2019-08-09 16:59:51 +00:00			`alias Backend.Crawler.Crawlers.Nodeinfo`

refactor/elixir backend 2019-07-14 11:47:06 +00:00			`# {domain_mentioned, count}`
			`@type instance_interactions :: %{String.t() => integer}`
display federation restrictions 2019-08-29 16:54:34 +00:00			`# {domain, type} e.g. {"gab.com", "reject"}`
			`@type federation_restriction :: {String.t(), String.t()}`
refactor/elixir backend 2019-07-14 11:47:06 +00:00
update backend 2023-04-29 15:52:25 +00:00			`@type instance_type ::`
			`:mastodon \| :pleroma \| :gab \| :misskey \| :gnusocial \| :smithereen \| :friendica`
Color code graph 2019-07-24 15:51:44 +00:00
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`defstruct [`
			`:version,`
			`:description,`
			`:user_count,`
			`:status_count,`
			`:peers,`
			`:interactions,`
Color code graph 2019-07-24 15:51:44 +00:00			`:statuses_seen,`
improved edges 2019-08-27 13:50:16 +00:00			`:instance_type,`
display federation restrictions 2019-08-29 16:54:34 +00:00			`:federation_restrictions`
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`]`

			`@type t() :: %__MODULE__{`
improved edges 2019-08-27 13:50:16 +00:00			`version: String.t() \| nil,`
			`description: String.t() \| nil,`
Revert "add metadata endpoint" This reverts commit 82153b283b3be8e7a48da92a6d02d05ef28e98c5. 2019-08-09 16:59:51 +00:00			`user_count: integer \| nil,`
			`status_count: integer \| nil,`
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`peers: [String.t()],`
			`interactions: instance_interactions,`
Color code graph 2019-07-24 15:51:44 +00:00			`statuses_seen: integer,`
improved edges 2019-08-27 13:50:16 +00:00			`instance_type: instance_type \| nil,`
display federation restrictions 2019-08-29 16:54:34 +00:00			`federation_restrictions: [federation_restriction]`
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`}`

improved edges 2019-08-27 13:50:16 +00:00			`@empty_result %{`
			`version: nil,`
			`description: nil,`
			`user_count: nil,`
			`status_count: nil,`
			`peers: [],`
			`interactions: %{},`
			`statuses_seen: 0,`
			`instance_type: nil,`
display federation restrictions 2019-08-29 16:54:34 +00:00			`federation_restrictions: []`
improved edges 2019-08-27 13:50:16 +00:00			`}`

refactor/elixir backend 2019-07-14 11:47:06 +00:00			`@doc """`
			`Check whether the instance at the given domain is of the type that this ApiCrawler implements.`
Revert "add metadata endpoint" This reverts commit 82153b283b3be8e7a48da92a6d02d05ef28e98c5. 2019-08-09 16:59:51 +00:00			`Arguments are the instance domain and the nodeinfo results.`
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`"""`
improved edges 2019-08-27 13:50:16 +00:00			`@callback is_instance_type?(String.t(), ApiCrawler.t()) :: boolean()`
check robots.txt for permission to crawl 2019-07-19 20:00:28 +00:00
			`@doc """`
			`Check whether the instance allows crawling according to its robots.txt or otherwise.`
			`"""`
			`@callback allows_crawling?(String.t()) :: boolean()`

refactor/elixir backend 2019-07-14 11:47:06 +00:00			`@doc """`
			`Crawl the instance at the given domain.`
Revert "add metadata endpoint" This reverts commit 82153b283b3be8e7a48da92a6d02d05ef28e98c5. 2019-08-09 16:59:51 +00:00			`Takes two arguments: the domain to crawl and the existing results (from nodeinfo).`
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`"""`
Revert "add metadata endpoint" This reverts commit 82153b283b3be8e7a48da92a6d02d05ef28e98c5. 2019-08-09 16:59:51 +00:00			`@callback crawl(String.t(), Nodeinfo.t()) :: t()`
improved edges 2019-08-27 13:50:16 +00:00
			`@doc """`
			`Returns the default, empty state`
			`"""`
			`def get_default do`
			`@empty_result`
			`end`
refactor/elixir backend 2019-07-14 11:47:06 +00:00			`end`