diff --git a/backend/config/config.exs b/backend/config/config.exs index be41b33..d652545 100644 --- a/backend/config/config.exs +++ b/backend/config/config.exs @@ -18,6 +18,11 @@ config :backend, BackendWeb.Endpoint, config :backend, Backend.Repo, queue_target: 5000 +config :backend, Backend.Elasticsearch.Cluster, + url: "http://elastic:9200", + api: Elasticsearch.API.HTTP, + json_library: Jason + # Configures Elixir's Logger config :logger, :console, format: "$time $metadata[$level] $message\n", diff --git a/backend/config/releases.exs b/backend/config/releases.exs index dfd3c76..bd80c1e 100644 --- a/backend/config/releases.exs +++ b/backend/config/releases.exs @@ -14,6 +14,9 @@ config :backend, Backend.Repo, pool_size: String.to_integer(System.get_env("POOL_SIZE") || "10"), ssl: ssl +config :backend, Backend.Elasticsearch.Cluster, + url: System.get_env("ELASTICSEARCH_URL") || "http://localhost:9200" + config :appsignal, :config, otp_app: :backend, revision: System.get_env("GIT_REV") diff --git a/backend/lib/backend/api.ex b/backend/lib/backend/api.ex index 9c6df70..cabd619 100644 --- a/backend/lib/backend/api.ex +++ b/backend/lib/backend/api.ex @@ -148,10 +148,78 @@ defmodule Backend.Api do end def search_instances(query, filters, from \\ 0) do - # TODO: implement w. postgres FTS + page_size = 50 + + search_response = + Elasticsearch.post( + Backend.Elasticsearch.Cluster, + "/instances/_search", + build_es_query(query, filters, page_size, from) + ) + + with {:ok, result} <- search_response do + hits = + get_in(result, ["hits", "hits"]) + |> Enum.map(fn h -> h |> Map.get("_source") |> convert_keys_to_atoms() end) + + next = + if length(hits) < page_size do + nil + else + from + page_size + end + + %{ + hits: hits, + next: next + } + end + end + + defp build_es_query(query, filters, page_size, from) do + opt_out_filter = %{"term" => %{"opt_out" => "false"}} + filters = [opt_out_filter | filters] + %{ - hits: [], - next: nil + "sort" => "_score", + "from" => from, + "size" => page_size, + # This must be >0, otherwise all documents will be returned + "min_score" => 1, + "query" => %{ + "bool" => %{ + "filter" => filters, + "should" => [ + %{ + "multi_match" => %{ + "query" => query, + "fields" => [ + "description.*", + "domain.english" + ] + } + }, + %{ + # If the query exactly matches a domain, that instance should always be the first result. + "wildcard" => %{ + "domain.keyword" => %{ + "value" => query, + "boost" => 100 + } + } + }, + %{ + # Give substring matches in domains a large boost, too. + "wildcard" => %{ + "domain.keyword" => %{ + "value" => "*#{query}*", + "boost" => 10 + } + } + } + ] + } + } } end end diff --git a/backend/lib/backend/application.ex b/backend/lib/backend/application.ex index 055f6f6..2d8ec13 100644 --- a/backend/lib/backend/application.ex +++ b/backend/lib/backend/application.ex @@ -26,6 +26,7 @@ defmodule Backend.Application do ), Supervisor.child_spec({Task, fn -> HTTPoison.start() end}, id: :start_httpoison), Backend.Scheduler, + Backend.Elasticsearch.Cluster, Graph.Cache ] diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index fb21015..9f69866 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -189,6 +189,8 @@ defmodule Backend.Crawler do conflict_target: :domain ) + Elasticsearch.put_document!(Backend.Elasticsearch.Cluster, instance, "instances/_doc") + ## Save details of a new crawl ## curr_crawl = Repo.insert!(%Crawl{ diff --git a/backend/lib/backend/elasticsearch/cluster.ex b/backend/lib/backend/elasticsearch/cluster.ex new file mode 100644 index 0000000..d34a9c8 --- /dev/null +++ b/backend/lib/backend/elasticsearch/cluster.ex @@ -0,0 +1,22 @@ +defmodule Backend.Elasticsearch.Cluster do + @moduledoc false + use Elasticsearch.Cluster, otp_app: :backend + + def init(config) do + indexes = %{ + instances: %{ + settings: Application.app_dir(:backend, "priv/elasticsearch/instances.json"), + store: Backend.Elasticsearch.Store, + sources: [Backend.Instance], + bulk_page_size: 1000, + bulk_wait_interval: 1000 + } + } + + config = + config + |> Map.put(:indexes, indexes) + + {:ok, config} + end +end diff --git a/backend/lib/backend/elasticsearch/store.ex b/backend/lib/backend/elasticsearch/store.ex new file mode 100644 index 0000000..aa5bc98 --- /dev/null +++ b/backend/lib/backend/elasticsearch/store.ex @@ -0,0 +1,17 @@ +defmodule Backend.Elasticsearch.Store do + @moduledoc false + @behaviour Elasticsearch.Store + + alias Backend.Repo + + @impl true + def stream(schema) do + Repo.stream(schema) + end + + @impl true + def transaction(fun) do + {:ok, result} = Repo.transaction(fun, timeout: :infinity) + result + end +end diff --git a/backend/lib/backend/instance.ex b/backend/lib/backend/instance.ex index f690e3c..fa351f5 100644 --- a/backend/lib/backend/instance.ex +++ b/backend/lib/backend/instance.ex @@ -62,4 +62,20 @@ defmodule Backend.Instance do |> validate_required([:domain]) |> put_assoc(:peers, attrs.peers) end + + defimpl Elasticsearch.Document, for: Backend.Instance do + def id(instance), do: instance.id + def routing(_), do: false + + def encode(instance) do + # Make sure this corresponds with priv/elasticseach/instances.json + %{ + domain: instance.domain, + description: instance.description, + type: instance.type, + user_count: instance.user_count, + opt_out: instance.opt_out + } + end + end end diff --git a/backend/lib/backend/release.ex b/backend/lib/backend/release.ex index 7f01eb5..cb5eaaa 100644 --- a/backend/lib/backend/release.ex +++ b/backend/lib/backend/release.ex @@ -9,11 +9,20 @@ defmodule Backend.Release do :ssl, :postgrex, :ecto, + :elasticsearch, @app ] + # Ecto repos to start, if any + @repos Application.compile_env(:backend, :ecto_repos, []) + # Elasticsearch clusters to start + @clusters [Backend.Elasticsearch.Cluster] + # Elasticsearch indexes to build + @indexes [:instances] + def run_all do migrate() + build_elasticsearch_indexes() end def migrate do @@ -26,6 +35,28 @@ defmodule Backend.Release do {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :down, to: version)) end + def build_elasticsearch_indexes do + start_services() + IO.puts("Building indexes...") + Enum.each(@indexes, &Elasticsearch.Index.hot_swap(Backend.Elasticsearch.Cluster, &1)) + stop_services() + end + + # Ensure that all OTP apps, repos used by your Elasticsearch store, + # and your Elasticsearch Cluster(s) are started + defp start_services do + IO.puts("Starting dependencies...") + Enum.each(@start_apps, &Application.ensure_all_started/1) + IO.puts("Starting repos...") + Enum.each(@repos, & &1.start_link(pool_size: 1)) + IO.puts("Starting clusters...") + Enum.each(@clusters, & &1.start_link()) + end + + defp stop_services do + :init.stop() + end + defp repos do Application.load(@app) Application.fetch_env!(@app, :ecto_repos) diff --git a/backend/lib/backend_web/controllers/admin_controller.ex b/backend/lib/backend_web/controllers/admin_controller.ex index 276c416..66ff536 100644 --- a/backend/lib/backend_web/controllers/admin_controller.ex +++ b/backend/lib/backend_web/controllers/admin_controller.ex @@ -19,6 +19,14 @@ defmodule BackendWeb.AdminController do with {:ok, domain} <- Auth.verify_token(token) do %{"optIn" => opt_in, "optOut" => opt_out} = params + # Make sure to update ElasticSearch so that the instance is no longer returned in search results + es_instance = + Api.get_instance(domain) + |> Map.put(:opt_in, opt_in) + |> Map.put(:opt_out, opt_out) + + Elasticsearch.put_document!(Backend.Elasticsearch.Cluster, es_instance, "instances") + ecto_instance = %Instance{ domain: domain, opt_in: opt_in, diff --git a/backend/mix.exs b/backend/mix.exs index 239270f..1ba757e 100644 --- a/backend/mix.exs +++ b/backend/mix.exs @@ -23,6 +23,7 @@ defmodule Backend.MixProject do :logger, :runtime_tools, :gollum, + :elasticsearch, :appsignal, :swoosh, :gen_smtp @@ -63,6 +64,7 @@ defmodule Backend.MixProject do {:public_suffix, git: "https://github.com/axelson/publicsuffix-elixir"}, {:swoosh, "~> 1.0"}, {:gen_smtp, "~> 1.2"}, + {:elasticsearch, "~> 1.0"}, {:appsignal_phoenix, "~> 2.3"}, {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, {:nebulex, "~> 2.4.2"}, diff --git a/backend/mix.lock b/backend/mix.lock index 00e8fd0..b8805b9 100644 --- a/backend/mix.lock +++ b/backend/mix.lock @@ -18,6 +18,7 @@ "decorator": {:hex, :decorator, "1.4.0", "a57ac32c823ea7e4e67f5af56412d12b33274661bb7640ec7fc882f8d23ac419", [:mix], [], "hexpm", "0a07cedd9083da875c7418dea95b78361197cf2bf3211d743f6f7ce39656597f"}, "ecto": {:hex, :ecto, "3.10.1", "c6757101880e90acc6125b095853176a02da8f1afe056f91f1f90b80c9389822", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "d2ac4255f1601bdf7ac74c0ed971102c6829dc158719b94bd30041bbad77f87a"}, "ecto_sql": {:hex, :ecto_sql, "3.10.1", "6ea6b3036a0b0ca94c2a02613fd9f742614b5cfe494c41af2e6571bb034dd94c", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.10.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.6.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.16.0 or ~> 0.17.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f6a25bdbbd695f12c8171eaff0851fa4c8e72eec1e98c7364402dda9ce11c56b"}, + "elasticsearch": {:hex, :elasticsearch, "1.0.1", "8339538d90af6b280f10ecd02b1eae372f09373e629b336a13461babf7366495", [:mix], [{:httpoison, ">= 0.0.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:poison, ">= 0.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:sigaws, "~> 0.7", [hex: :sigaws, repo: "hexpm", optional: true]}, {:vex, "~> 0.6", [hex: :vex, repo: "hexpm", optional: false]}], "hexpm", "83e7d8b8bee3e7e19a06ab4d357d24845ac1da894e79678227fd52c0b7f71867"}, "ex2ms": {:hex, :ex2ms, "1.6.1", "66d472eb14da43087c156e0396bac3cc7176b4f24590a251db53f84e9a0f5f72", [:mix], [], "hexpm", "a7192899d84af03823a8ec2f306fa858cbcce2c2e7fd0f1c49e05168fb9c740e"}, "ex_rated": {:hex, :ex_rated, "2.1.0", "d40e6fe35097b10222df2db7bb5dd801d57211bac65f29063de5f201c2a6aebc", [:mix], [{:ex2ms, "~> 1.5", [hex: :ex2ms, repo: "hexpm", optional: false]}], "hexpm", "936c155337253ed6474f06d941999dd3a9cf0fe767ec99a59f2d2989dc2cc13f"}, "expo": {:hex, :expo, "0.4.1", "1c61d18a5df197dfda38861673d392e642649a9cef7694d2f97a587b2cfb319b", [:mix], [], "hexpm", "2ff7ba7a798c8c543c12550fa0e2cbc81b95d4974c65855d8d15ba7b37a1ce47"}, diff --git a/docker-compose.yaml b/docker-compose.yaml index 8e70009..b302256 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -5,22 +5,32 @@ networks: external: false services: - web: - image: ghcr.io/fediverse-space/fediverse.space:main + phoenix: + build: backend restart: unless-stopped networks: - space depends_on: - db + gephi: + build: gephi + db: image: postgres:15-alpine restart: unless-stopped environment: - - POSTGRES_PASSWORD=${DB_PASSWORD} - - POSTGRES_USER=${DB_USER} - - POSTGRES_DB=${DB_NAME} + - POSTGRES_PASSWORD=postgres + - POSTGRES_USER=postgres networks: - space volumes: - /var/lib/postgresql/data + + elastic: + image: elasticsearch:8.7.0 + restart: unless-stopped + environment: + - discovery.type=single-node + networks: + - space diff --git a/example.env b/example.env index e2d9579..3ae6590 100644 --- a/example.env +++ b/example.env @@ -1,4 +1,4 @@ DATABASE_URL="postgres://postgres:postgres@localhost:5432/backend_dev" PORT=4000 BACKEND_HOSTNAME=localhost -SECRET_KEY_BASE=jLqbBjtQTyZj+1yLwDV8xgZYvZKIBx1MBWbcC2a0mZqB5ivYKQ7GOqNR91g6YnR8 +SECRET_KEY_BASE=jLqbBjtQTyZj+1yLwDV8xgZYvZKIBx1MBWbcC2a0mZqB5ivYKQ7GOqNR91g6YnR8 \ No newline at end of file