Replaced postgresql by elastic search

main
Štěpán Škorpil 2022-09-14 21:16:04 +02:00
rodzic 7eb28e5d84
commit f6f9d758a1
81 zmienionych plików z 5764 dodań i 8406 usunięć

Wyświetl plik

@ -1,20 +1,22 @@
FROM node:16-bullseye AS build
ENV POSTGRES_URL='postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public' \
ENV ELASTIC_URL='http://elastic:9200' \
ELASTIC_USER='elastic' \
ELASTIC_PASSWORD='' \
SEED_NODE_DOMAIN='mastodon.social' \
REATTEMPT_MINUTES='60' \
REFRESH_HOURS='120' \
WAIT_FOR_JOB_MINUTES='60' \
DEFAULT_TIMEOUT_MILLISECONDS='10000' \
BANNED_DOMAINS='' \
TZ='UTC'
WORKDIR /srv
COPY application/package*.json ./
COPY application/prisma ./prisma/
RUN npm install
COPY application/. .
RUN npm run build
FROM build AS dev
CMD npm run dev
CMD npx tsc --watch
FROM node:16-bullseye AS prod
RUN groupadd -g 1001 nodejs
@ -22,7 +24,6 @@ RUN useradd -u 1001 -g 1001 nextjs
WORKDIR /srv
USER nextjs
COPY --from=build /srv/node_modules ./node_modules
COPY --from=build /srv/prisma ./prisma
COPY --from=build /srv/package*.json ./
COPY --from=build /srv/dist ./dist
CMD npm run start:deploy

Wyświetl plik

@ -24,15 +24,18 @@ Data providers for more apps will be probably added soon (Pull requests are welc
Configuration is done using environmental variables:
| Variable | Description | Default value / Example value |
|--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------|
| `POSTGRES_URL` | Postgres database uri | `postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public` |
| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` |
| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` |
| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` |
| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` |
| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` |
| `TZ` | _Optional_, Timezone | `UTC` |
| Variable | Description | Default value / Example value |
|--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------|
| `ELASTIC_URL` | Url address of ElasticSearch server | `http://elastic:9200` |
| `ELASTIC_USER` | Username for EalsticSearch server | `elastic` |
| `ELASTIC_PASSWORD` | Username for EalsticSearch server | empty |
| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` |
| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` |
| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` |
| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` |
| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` |
| `BANNED_DOMAINS` | _Optional_, Domains not to index (even with subdomains) | _empty_ |
| `TZ` | _Optional_, Timezone | `UTC` |
## Deploy
App is designed to be run in docker container and deployed using docker-compose.
More info can be found in [FediSearch example docker-compose](https://github.com/Stopka/fedisearch-compose) project

12062
application/package-lock.json wygenerowano

Plik diff jest za duży Load Diff

Wyświetl plik

@ -19,18 +19,12 @@
"test:cov": "jest --coverage",
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
"test:e2e": "jest --config ./test/jest-e2e.json",
"migrate:dev": "prisma migrate dev",
"migrate:dev:create": "prisma migrate dev --create-only",
"migrate:deploy": "npx prisma migrate deploy",
"migrate:reset": "npx prisma migrate reset",
"migrate:resolve": "npx prisma migrate resolve",
"prisma:generate": "npx prisma generate",
"prisma:studio": "npx prisma studio",
"start:deploy": "npm run migrate:deploy && npm run start"
},
"dependencies": {
"@prisma/client": "^3.6.0",
"@elastic/elasticsearch": "^8.2.1",
"axios": "^0.21.1",
"geoip-lite": "^1.4.6",
"npmlog": "^6.0.0",
"rimraf": "^3.0.2",
"striptags": "^3.2.0",
@ -38,6 +32,7 @@
"zod": "^3.11.6"
},
"devDependencies": {
"@types/geoip-lite": "^1.4.1",
"@types/jest": "^27.0.2",
"@types/node": "^16.11.10",
"@types/npmlog": "^4.1.3",
@ -50,7 +45,6 @@
"eslint-plugin-promise": "^5.1.1",
"eslint-plugin-react": "^7.27.1",
"jest": "^27.3.0",
"prisma": "^3.6.0",
"standard": "*",
"ts-jest": "^27.0.7",
"typescript": "^4.3.5"

Wyświetl plik

@ -1,174 +0,0 @@
-- CreateEnum
CREATE TYPE "FeedType" AS ENUM ('account', 'channel');
-- CreateTable
CREATE TABLE "Tag" (
"id" UUID NOT NULL,
"name" TEXT NOT NULL,
CONSTRAINT "Tag_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Email" (
"id" UUID NOT NULL,
"address" TEXT NOT NULL,
"feedId" UUID NOT NULL,
CONSTRAINT "Email_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "FeedToTag" (
"feedId" UUID NOT NULL,
"tagId" UUID NOT NULL,
CONSTRAINT "FeedToTag_pkey" PRIMARY KEY ("feedId","tagId")
);
-- CreateTable
CREATE TABLE "Field" (
"id" UUID NOT NULL,
"name" TEXT NOT NULL,
"value" TEXT NOT NULL,
"feedId" UUID NOT NULL,
CONSTRAINT "Field_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Feed" (
"id" UUID NOT NULL,
"nodeId" UUID NOT NULL,
"foundAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"refreshedAt" TIMESTAMP(3) NOT NULL,
"name" TEXT NOT NULL,
"displayName" TEXT NOT NULL,
"description" TEXT NOT NULL,
"followersCount" INTEGER NOT NULL,
"followingCount" INTEGER NOT NULL,
"statusesCount" INTEGER,
"bot" BOOLEAN,
"url" TEXT NOT NULL,
"avatar" TEXT,
"locked" BOOLEAN NOT NULL,
"lastStatusAt" TIMESTAMP(3),
"createdAt" TIMESTAMP(3) NOT NULL,
"type" "FeedType" NOT NULL DEFAULT E'account',
"parentFeedName" TEXT,
"parentFeedDomain" TEXT,
"fulltext" TEXT NOT NULL DEFAULT E'',
CONSTRAINT "Feed_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Node" (
"id" UUID NOT NULL,
"softwareName" TEXT,
"softwareVersion" TEXT,
"totalUserCount" INTEGER,
"monthActiveUserCount" INTEGER,
"halfYearActiveUserCount" INTEGER,
"statusesCount" INTEGER,
"openRegistrations" BOOLEAN,
"foundAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"refreshedAt" TIMESTAMP(3),
"refreshAttemptedAt" TIMESTAMP(3),
"domain" TEXT NOT NULL,
CONSTRAINT "Node_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- AddForeignKey
ALTER TABLE "Email" ADD CONSTRAINT "Email_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_tagId_fkey" FOREIGN KEY ("tagId") REFERENCES "Tag"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Field" ADD CONSTRAINT "Field_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Feed" ADD CONSTRAINT "Feed_nodeId_fkey" FOREIGN KEY ("nodeId") REFERENCES "Node"("id") ON DELETE RESTRICT ON UPDATE CASCADE;

Wyświetl plik

@ -1,163 +0,0 @@
/*
Warnings:
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
*/
-- DropIndex
DROP INDEX "Email_address_idx";
-- DropIndex
DROP INDEX "Feed_bot_idx";
-- DropIndex
DROP INDEX "Feed_createdAt_idx";
-- DropIndex
DROP INDEX "Feed_description_idx";
-- DropIndex
DROP INDEX "Feed_displayName_idx";
-- DropIndex
DROP INDEX "Feed_fulltext_idx";
-- DropIndex
DROP INDEX "Feed_lastStatusAt_idx";
-- DropIndex
DROP INDEX "Feed_locked_idx";
-- DropIndex
DROP INDEX "Feed_name_nodeId_key";
-- DropIndex
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
-- DropIndex
DROP INDEX "Feed_refreshedAt_idx";
-- DropIndex
DROP INDEX "Feed_type_idx";
-- DropIndex
DROP INDEX "Field_name_idx";
-- DropIndex
DROP INDEX "Field_value_idx";
-- DropIndex
DROP INDEX "Node_domain_key";
-- DropIndex
DROP INDEX "Node_foundAt_idx";
-- DropIndex
DROP INDEX "Node_halfYearActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_monthActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_openRegistrations_idx";
-- DropIndex
DROP INDEX "Node_refreshAttemptedAt_idx";
-- DropIndex
DROP INDEX "Node_refreshedAt_idx";
-- DropIndex
DROP INDEX "Node_softwareName_idx";
-- DropIndex
DROP INDEX "Node_softwareVersion_idx";
-- DropIndex
DROP INDEX "Node_statusesCount_idx";
-- DropIndex
DROP INDEX "Node_totalUserCount_idx";
-- DropIndex
DROP INDEX "Tag_name_key";
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");

Wyświetl plik

@ -1,167 +0,0 @@
/*
Warnings:
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
*/
-- DropIndex
DROP INDEX "Email_address_idx";
-- DropIndex
DROP INDEX "Feed_bot_idx";
-- DropIndex
DROP INDEX "Feed_createdAt_idx";
-- DropIndex
DROP INDEX "Feed_description_idx";
-- DropIndex
DROP INDEX "Feed_displayName_idx";
-- DropIndex
DROP INDEX "Feed_fulltext_idx";
-- DropIndex
DROP INDEX "Feed_lastStatusAt_idx";
-- DropIndex
DROP INDEX "Feed_locked_idx";
-- DropIndex
DROP INDEX "Feed_name_nodeId_key";
-- DropIndex
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
-- DropIndex
DROP INDEX "Feed_refreshedAt_idx";
-- DropIndex
DROP INDEX "Feed_type_idx";
-- DropIndex
DROP INDEX "Field_name_idx";
-- DropIndex
DROP INDEX "Field_value_idx";
-- DropIndex
DROP INDEX "Node_domain_key";
-- DropIndex
DROP INDEX "Node_foundAt_idx";
-- DropIndex
DROP INDEX "Node_halfYearActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_monthActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_openRegistrations_idx";
-- DropIndex
DROP INDEX "Node_refreshAttemptedAt_idx";
-- DropIndex
DROP INDEX "Node_refreshedAt_idx";
-- DropIndex
DROP INDEX "Node_softwareName_idx";
-- DropIndex
DROP INDEX "Node_softwareVersion_idx";
-- DropIndex
DROP INDEX "Node_statusesCount_idx";
-- DropIndex
DROP INDEX "Node_totalUserCount_idx";
-- DropIndex
DROP INDEX "Tag_name_key";
-- AlterTable
ALTER TABLE "Feed" ALTER COLUMN "followersCount" DROP NOT NULL,
ALTER COLUMN "followingCount" DROP NOT NULL;
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'pleroma';

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'misskey';

Wyświetl plik

@ -1,3 +0,0 @@
update "Node"
set "softwareName"=lower("softwareName")
where true;

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'hometown';

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'ecko';

Wyświetl plik

@ -1,193 +0,0 @@
/*
Warnings:
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
*/
-- DropForeignKey
ALTER TABLE "Email" DROP CONSTRAINT "Email_feedId_fkey";
-- DropForeignKey
ALTER TABLE "Feed" DROP CONSTRAINT "Feed_nodeId_fkey";
-- DropForeignKey
ALTER TABLE "FeedToTag" DROP CONSTRAINT "FeedToTag_feedId_fkey";
-- DropForeignKey
ALTER TABLE "FeedToTag" DROP CONSTRAINT "FeedToTag_tagId_fkey";
-- DropForeignKey
ALTER TABLE "Field" DROP CONSTRAINT "Field_feedId_fkey";
-- DropIndex
DROP INDEX "Email_address_idx";
-- DropIndex
DROP INDEX "Feed_bot_idx";
-- DropIndex
DROP INDEX "Feed_createdAt_idx";
-- DropIndex
DROP INDEX "Feed_description_idx";
-- DropIndex
DROP INDEX "Feed_displayName_idx";
-- DropIndex
DROP INDEX "Feed_fulltext_idx";
-- DropIndex
DROP INDEX "Feed_lastStatusAt_idx";
-- DropIndex
DROP INDEX "Feed_locked_idx";
-- DropIndex
DROP INDEX "Feed_name_nodeId_key";
-- DropIndex
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
-- DropIndex
DROP INDEX "Feed_refreshedAt_idx";
-- DropIndex
DROP INDEX "Feed_type_idx";
-- DropIndex
DROP INDEX "Field_name_idx";
-- DropIndex
DROP INDEX "Field_value_idx";
-- DropIndex
DROP INDEX "Node_domain_key";
-- DropIndex
DROP INDEX "Node_foundAt_idx";
-- DropIndex
DROP INDEX "Node_halfYearActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_monthActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_openRegistrations_idx";
-- DropIndex
DROP INDEX "Node_refreshAttemptedAt_idx";
-- DropIndex
DROP INDEX "Node_refreshedAt_idx";
-- DropIndex
DROP INDEX "Node_softwareName_idx";
-- DropIndex
DROP INDEX "Node_softwareVersion_idx";
-- DropIndex
DROP INDEX "Node_statusesCount_idx";
-- DropIndex
DROP INDEX "Node_totalUserCount_idx";
-- DropIndex
DROP INDEX "Tag_name_key";
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
-- AddForeignKey
ALTER TABLE "Email" ADD CONSTRAINT "Email_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_tagId_fkey" FOREIGN KEY ("tagId") REFERENCES "Tag"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Field" ADD CONSTRAINT "Field_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Feed" ADD CONSTRAINT "Feed_nodeId_fkey" FOREIGN KEY ("nodeId") REFERENCES "Node"("id") ON DELETE CASCADE ON UPDATE CASCADE;

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'friendica';

Wyświetl plik

@ -1,3 +0,0 @@
# Please do not edit this file manually
# It should be added in your version-control system (i.e. Git)
provider = "postgresql"

Wyświetl plik

@ -1,115 +0,0 @@
datasource db {
url = env("POSTGRES_URL")
provider = "postgresql"
}
generator client {
provider = "prisma-client-js"
previewFeatures = ["extendedIndexes","fullTextSearch","referentialActions"]
}
model Tag {
id String @id @default(uuid()) @db.Uuid
name String @unique
feedToTag FeedToTag[]
}
model Email {
id String @id @default(uuid()) @db.Uuid
address String
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
feedId String @db.Uuid
@@index([address])
}
model FeedToTag {
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
feedId String @db.Uuid
tag Tag @relation(fields: [tagId], references: [id], onDelete: Cascade)
tagId String @db.Uuid
@@id([feedId, tagId])
}
model Field {
id String @id @default(uuid()) @db.Uuid
name String
value String
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
feedId String @db.Uuid
@@index([name])
@@index([value])
}
enum FeedType{
account
channel
}
model Feed {
id String @id @default(uuid()) @db.Uuid
node Node @relation(fields: [nodeId], references: [id], onDelete: Cascade)
nodeId String @db.Uuid
foundAt DateTime @default(now())
refreshedAt DateTime @updatedAt
name String
displayName String
description String
feedToTags FeedToTag[]
fields Field[]
emails Email[]
followersCount Int?
followingCount Int?
statusesCount Int?
bot Boolean?
url String
avatar String?
locked Boolean
lastStatusAt DateTime?
createdAt DateTime
type FeedType @default(account)
parentFeedName String?
parentFeedDomain String?
fulltext String @default("")
@@index([displayName])
@@index([description])
@@index([bot])
@@index([locked])
@@index([lastStatusAt])
@@index([createdAt])
@@index([refreshedAt])
@@index([parentFeedName,parentFeedDomain])
@@index([type])
@@index([fulltext])
@@unique([name, nodeId])
}
model Node {
id String @id @default(uuid()) @db.Uuid
softwareName String?
softwareVersion String?
totalUserCount Int?
monthActiveUserCount Int?
halfYearActiveUserCount Int?
statusesCount Int?
openRegistrations Boolean?
foundAt DateTime @default(now())
refreshedAt DateTime?
refreshAttemptedAt DateTime?
domain String @unique
feeds Feed[]
@@index([softwareName])
@@index([softwareVersion])
@@index([totalUserCount])
@@index([monthActiveUserCount])
@@index([halfYearActiveUserCount])
@@index([statusesCount])
@@index([openRegistrations])
@@index([refreshedAt])
@@index([refreshAttemptedAt])
@@index([foundAt])
}

Wyświetl plik

@ -4,6 +4,7 @@ import { assertSuccessJsonResponse } from '../assertSuccessJsonResponse'
import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds'
const schema = z.object({
name: z.string().optional(),
software: z.object({
name: z.string(),
version: z.string()

Wyświetl plik

@ -6,17 +6,17 @@ export interface FeedData {
description:string,
followersCount: number,
followingCount:number,
statusesCount:number,
bot:boolean,
statusesCount?:number,
bot?:boolean,
url: string,
avatar:string|null,
avatar?:string,
locked:boolean,
lastStatusAt:Date|null,
lastStatusAt?:Date,
createdAt:Date
fields: FieldData[],
type: 'account'|'channel'
parentFeed: {
parentFeed?: {
name:string
hostDomain:string
}|null
}
}

Wyświetl plik

@ -1,5 +1,5 @@
export interface FieldData{
name: string,
value: string,
verifiedAt: Date|null
verifiedAt: Date|undefined
}

Wyświetl plik

@ -1,9 +1,7 @@
import axios from 'axios'
import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse'
import { FeedData } from '../FeedData'
import { z } from 'zod'
import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds'
import { NodeProviderMethod } from '../NodeProviderMethod'
import { NoMoreFeedsError } from '../NoMoreFeedsError'
import { FeedProviderMethod } from '../FeedProviderMethod'
@ -91,24 +89,24 @@ export const retrieveUsersPage:FeedProviderMethod = async (domain, page) => {
url: `https://${domain}/@${item.username}`,
avatar: item.avatarUrl,
locked: item.isLocked,
lastStatusAt: item.updatedAt !== null ? new Date(item.updatedAt) : null,
lastStatusAt: item.updatedAt !== null ? new Date(item.updatedAt) : undefined,
createdAt: new Date(item.createdAt),
fields: [
...item.fields.map(field => {
return {
name: replaceEmojis(field.name, item.emojis),
value: replaceEmojis(field.value, item.emojis),
verifiedAt: null
verifiedAt: undefined
}
}),
...[
{ name: 'Location', value: item.location, verifiedAt: null },
{ name: 'Birthday', value: item.birthday, verifiedAt: null },
{ name: 'Language', value: item.lang, verifiedAt: null }
{ name: 'Location', value: item.location, verifiedAt: undefined },
{ name: 'Birthday', value: item.birthday, verifiedAt: undefined },
{ name: 'Language', value: item.lang, verifiedAt: undefined }
].filter(field => field.value !== null)
],
type: 'account',
parentFeed: null
parentFeed: undefined
}
}
)

Wyświetl plik

@ -1,7 +1,7 @@
import { z } from 'zod'
export const avatarSchema = z.nullable(z.object({
export const avatarSchema = z.optional(z.nullable(z.object({
path: z.string()
}))
})))
export type Avatar = z.infer<typeof avatarSchema>

Wyświetl plik

@ -1,8 +1,8 @@
import { Avatar } from './Avatar'
export const parseAvatarUrl = (data:Avatar, domain:string):string|null => {
export const parseAvatarUrl = (data:Avatar, domain:string):string|undefined => {
if (data === null) {
return null
return undefined
}
return `https://${domain}${data.path}`
}

Wyświetl plik

@ -57,11 +57,11 @@ export const retrieveAccounts:FeedProviderMethod = async (domain, page) => {
followersCount: item.followersCount,
followingCount: item.followingCount,
createdAt: new Date(item.createdAt),
bot: null,
lastStatusAt: null,
statusesCount: null,
bot: undefined,
lastStatusAt: undefined,
statusesCount: undefined,
type: 'account',
parentFeed: null
parentFeed: undefined
}
})
}

Wyświetl plik

@ -54,7 +54,7 @@ export const retrieveVideoChannels:FeedProviderMethod = async (domain, page) =>
.filter(item => item.host === domain)
.map((item):FeedData => {
const fields:FieldData[] = item.support
? [{ name: 'support', value: item.support, verifiedAt: null }]
? [{ name: 'support', value: item.support, verifiedAt: undefined }]
: []
return {
name: item.name,
@ -67,9 +67,9 @@ export const retrieveVideoChannels:FeedProviderMethod = async (domain, page) =>
followersCount: item.followersCount,
followingCount: item.followingCount,
createdAt: new Date(item.createdAt),
bot: null,
lastStatusAt: null,
statusesCount: null,
bot: undefined,
lastStatusAt: undefined,
statusesCount: undefined,
type: 'channel',
parentFeed: {
name: item.ownerAccount.name,

Wyświetl plik

@ -0,0 +1,19 @@
import { lookup } from 'dns/promises'
import { updateNodeIps } from '../../Storage/Nodes/updateNodeIps'
import { ElasticClient } from '../../Storage/ElasticClient'
import Node from '../../Storage/Definitions/Node'
const refreshNodeIps = async (elastic: ElasticClient, node:Node):Promise<Node> => {
console.info('Looking up node ip addresses', {
nodeDomain: node.domain
})
try {
const addresses = await lookup(node.domain, { all: true })
return updateNodeIps(elastic, node, addresses.map(resolution => resolution.address))
} catch (error) {
console.warn('Could not lookup the domain', { node, error })
return node
}
}
export default refreshNodeIps

Wyświetl plik

@ -1,28 +1,15 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
import { FeedData } from '../../Fediverse/Providers/FeedData'
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
import { createFeedTags } from '../../Storage/Tags/createFeedTags'
import { fetchTags } from '../../Storage/Tags/fetchTags'
import { extractTags } from '../../StringTools/extractTags'
import { extractEmails } from '../../StringTools/extractEmails'
import { createFeedFields } from '../../Storage/Fields/createFeedFields'
import { createFeedEmails } from '../../Storage/Emails/createFeedEmails'
import { createFeed } from '../../Storage/Feeds/createFeed'
import prepareFulltext from './prepareFulltext'
import Feed from '../../Storage/Definitions/Feed'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const addFeed = async (prisma: PrismaClient, node: Node, feedData: FeedData): Promise<Feed> => {
export const addFeed = async (elastic: ElasticClient, node: Node, feedData: FeedData): Promise<Feed> => {
const fulltext = prepareFulltext(feedData, node)
const feed = await createFeed(prisma, { ...feedData, fulltext }, node)
await createFeedFields(prisma, feed, feedData.fields)
const tagNames = extractTags(fulltext)
await createMissingTags(prisma, tagNames)
const tags = await fetchTags(prisma, tagNames)
await createFeedTags(prisma, feed, tags)
const emails = extractEmails(fulltext)
await createFeedEmails(prisma, feed, emails)
return feed
const extractedTags = extractTags(fulltext)
const extractedEmails = extractEmails(fulltext)
return await createFeed(elastic, { ...feedData, extractedTags, extractedEmails }, node)
}

Wyświetl plik

@ -1,6 +1,6 @@
import { FeedData } from '../../Fediverse/Providers/FeedData'
import striptags from 'striptags'
import { Node } from '@prisma/client'
import Node from '../../Storage/Definitions/Node'
export default function (feedData: FeedData, node: Node):string {
return striptags(

Wyświetl plik

@ -1,33 +1,17 @@
import { PrismaClient, Feed, Node } from '@prisma/client'
import { FeedData } from '../../Fediverse/Providers/FeedData'
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
import { createFeedTags } from '../../Storage/Tags/createFeedTags'
import { fetchTags } from '../../Storage/Tags/fetchTags'
import { extractTags } from '../../StringTools/extractTags'
import { extractEmails } from '../../StringTools/extractEmails'
import { createFeedFields } from '../../Storage/Fields/createFeedFields'
import { createFeedEmails } from '../../Storage/Emails/createFeedEmails'
import { deleteAllFeedFields } from '../../Storage/Fields/deleteAllFeedFields'
import { deleteAllFeedTags } from '../../Storage/Tags/deleteAllFeedTags'
import { deleteAllFeedEmails } from '../../Storage/Emails/deleteAllFeedEmails'
import { updateFeed } from '../../Storage/Feeds/updateFeed'
import Feed from '../../Storage/Definitions/Feed'
import Node from '../../Storage/Definitions/Node'
import prepareFulltext from './prepareFulltext'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshFeed = async (prisma: PrismaClient, feed:Feed, feedData: FeedData, node: Node): Promise<Feed> => {
export const refreshFeed = async (elastic: ElasticClient, feed:Feed, feedData: FeedData, node: Node): Promise<Feed> => {
const fulltext = prepareFulltext(feedData, node)
await deleteAllFeedFields(prisma, feed)
await createFeedFields(prisma, feed, feedData.fields)
const extractedTags = extractTags(fulltext)
const extractedEmails = extractEmails(fulltext)
const tagNames = extractTags(fulltext)
await createMissingTags(prisma, tagNames)
const tags = await fetchTags(prisma, tagNames)
await deleteAllFeedTags(prisma, feed)
await createFeedTags(prisma, feed, tags)
const emails = extractEmails(fulltext)
await deleteAllFeedEmails(prisma, feed)
await createFeedEmails(prisma, feed, emails)
return await updateFeed(prisma, feed, { ...feedData, fulltext })
return await updateFeed(elastic, feed, { ...feedData, extractedTags, extractedEmails })
}

Wyświetl plik

@ -1,12 +1,13 @@
import { refreshFeedsOnPage } from './refreshFeedsOnPage'
import { FeedProvider } from '../../Fediverse/Providers/FeedProvider'
import { Node, PrismaClient } from '@prisma/client'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshFeeds = async (prisma:PrismaClient, provider:FeedProvider, node:Node):Promise<void> => {
export const refreshFeeds = async (elastic: ElasticClient, provider:FeedProvider, node:Node):Promise<void> => {
try {
for (let page = 0; true; page++) {
console.info('Retrieve feeds page', { nodeDomain: node.domain, provider: provider.getKey(), page: page })
await refreshFeedsOnPage(prisma, provider, node, page)
await refreshFeedsOnPage(elastic, provider, node, page)
}
} catch (e) {
console.info('Feed search finished: ' + e, { nodeDomain: node.domain, provider: provider.getKey() })

Wyświetl plik

@ -1,11 +1,13 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
import { refreshOrAddFeed } from './refreshOrAddFeed'
import { FeedProvider } from '../../Fediverse/Providers/FeedProvider'
import Node from '../../Storage/Definitions/Node'
import Feed from '../../Storage/Definitions/Feed'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshFeedsOnPage = async (prisma: PrismaClient, provider:FeedProvider, node:Node, page:number):Promise<Feed[]> => {
export const refreshFeedsOnPage = async (elastic: ElasticClient, provider:FeedProvider, node:Node, page:number):Promise<Feed[]> => {
const feedData = await provider.retrieveFeeds(node.domain, page)
console.info('Retrieved feeds', { count: feedData.length, domain: node.domain, provider: provider.getKey(), page: page })
return Promise.all(feedData.map(
feedDataItem => refreshOrAddFeed(prisma, node, feedDataItem)
feedDataItem => refreshOrAddFeed(elastic, node, feedDataItem)
))
}

Wyświetl plik

@ -1,15 +1,22 @@
import { FeedData } from '../../Fediverse/Providers/FeedData'
import { fetchFeedByNodeAndName } from '../../Storage/Feeds/fetchFeedByNodeAndName'
import { refreshFeed } from './refreshFeed'
import { addFeed } from './addFeed'
import { Node, PrismaClient, Feed } from '@prisma/client'
import Feed from '../../Storage/Definitions/Feed'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
import getFeed from '../../Storage/Feeds/getFeed'
export const refreshOrAddFeed = async (prisma:PrismaClient, node:Node, feedData:FeedData):Promise<Feed> => {
const feed = await fetchFeedByNodeAndName(prisma, node, feedData.name)
export const refreshOrAddFeed = async (elastic: ElasticClient, node:Node, feedData:FeedData):Promise<Feed> => {
let feed:Feed|null = null
try {
feed = await getFeed(elastic, `${feedData.name}@${node.domain}`)
} catch (e) {
}
if (feed) {
console.info('Refreshing feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type })
return await refreshFeed(prisma, feed, feedData, node)
return await refreshFeed(elastic, feed, feedData, node)
}
console.info('Adding feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type })
return await addFeed(prisma, node, feedData)
return await addFeed(elastic, node, feedData)
}

Wyświetl plik

@ -1,12 +1,13 @@
import { Node, PrismaClient } from '@prisma/client'
import { retrieveDomainNodeInfo } from '../../Fediverse/NodeInfo/retrieveDomainNodeInfo'
import { updateNode } from '../../Storage/Nodes/updateNode'
import { updateNodeInfo } from '../../Storage/Nodes/updateNodeInfo'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshNodeInfo = async (prisma: PrismaClient, node:Node):Promise<Node> => {
export const refreshNodeInfo = async (elastic: ElasticClient, node:Node):Promise<Node> => {
console.info('Updating info of node', { nodeDomain: node.domain })
try {
const nodeInfo = await retrieveDomainNodeInfo(node.domain)
return await updateNode(prisma, node, nodeInfo)
return await updateNodeInfo(elastic, node, nodeInfo)
} catch (error) {
console.warn('Failed to update node info: ' + error)
return node

Wyświetl plik

@ -1,12 +1,13 @@
import { Node, PrismaClient } from '@prisma/client'
import { NodeProvider } from '../../Fediverse/Providers/NodeProvider'
import { findNewNodesOnPage } from './findNewNodesOnPage'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const findNewNodes = async (prisma: PrismaClient, provider:NodeProvider, node:Node):Promise<void> => {
export const findNewNodes = async (elastic: ElasticClient, provider:NodeProvider, node:Node):Promise<void> => {
try {
for (let page = 0; true; page++) {
console.info('Retrieve node page', { domain: node.domain, provider: provider.getKey() })
await findNewNodesOnPage(prisma, provider, node, page)
await findNewNodesOnPage(elastic, provider, node, page)
}
} catch (e) {
console.info('Node search finished: ' + e, { domain: node.domain, provider: provider.getKey() })

Wyświetl plik

@ -1,11 +1,14 @@
import { PrismaClient, Node } from '@prisma/client'
import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes'
import { NodeProvider } from '../../Fediverse/Providers/NodeProvider'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
import isDomainNotBanned from '../../Storage/Nodes/isDomainNotBanned'
export const findNewNodesOnPage = async (
prisma:PrismaClient, provider: NodeProvider, node:Node, page:number
elastic: ElasticClient, provider: NodeProvider, node:Node, page:number
):Promise<number> => {
const domains = await provider.retrieveNodes(node.domain, page)
let domains = await provider.retrieveNodes(node.domain, page)
domains = domains.filter(isDomainNotBanned)
console.log('Found nodes', { count: domains.length, domain: node.domain, provider: provider.getKey(), page: page })
return await createMissingNodes(prisma, domains)
return await createMissingNodes(elastic, domains, node.domain)
}

Wyświetl plik

@ -0,0 +1,17 @@
import countNodeFeeds from '../../Storage/Feeds/countNodeFeeds'
import { setNodeStats } from '../../Storage/Nodes/setNodeStats'
import { ElasticClient } from '../../Storage/ElasticClient'
import Node from '../../Storage/Definitions/Node'
export type NodeStats ={
account: number,
channel: number,
}
export default async function updateNodeFeedStats (elasticClient: ElasticClient, node: Node) {
await setNodeStats(
elasticClient,
node,
await countNodeFeeds(elasticClient, node.domain)
)
}

Wyświetl plik

@ -1,8 +1,8 @@
import { PrismaClient } from '@prisma/client'
import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes'
import { ElasticClient } from '../../Storage/ElasticClient'
export const addNodeSeed = async (prisma:PrismaClient, domain:string):Promise<boolean> => {
console.info('Trying to add seed domain node', { domain: domain })
const result = await createMissingNodes(prisma, [domain])
export const addNodeSeed = async (elastic: ElasticClient, domains:string[]):Promise<boolean> => {
console.info('Trying to add seed domain nodes', { domains: domains })
const result = await createMissingNodes(elastic, domains, undefined)
return result > 0
}

Wyświetl plik

@ -0,0 +1,11 @@
import { deleteDomainFeeds } from '../../Storage/Feeds/deleteDomainFeeds'
import { deleteDomainNodes } from '../../Storage/Nodes/deleteDomainNodes'
import { ElasticClient } from '../../Storage/ElasticClient'
export default async function deleteDomains (elastic: ElasticClient, domains:string[]):Promise<number> {
if (domains === []) {
return
}
await deleteDomainFeeds(elastic, domains)
return deleteDomainNodes(elastic, domains)
}

Wyświetl plik

@ -0,0 +1,7 @@
export default function getBannedDomains ():string[] {
const domains = process.env.BANNED_DOMAINS ?? ''
if (domains === '') {
return []
}
return domains.split(',').map(domain => domain.toLowerCase())
}

Wyświetl plik

@ -1,9 +0,0 @@
import { PrismaClient, Tag } from '@prisma/client'
import { fetchTags } from '../../Storage/Tags/fetchTags'
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
export const fetchOrCreateTags = async (prisma:PrismaClient, tagNames:string[]):Promise<Tag[]> => {
console.log('Searching for tags', { count: tagNames.length })
await createMissingTags(prisma, tagNames)
return await fetchTags(prisma, tagNames)
}

Wyświetl plik

@ -1,4 +1,3 @@
import { PrismaClient } from '@prisma/client'
import { fetchNodeToProcess } from '../Storage/Nodes/fetchNodeToProcess'
import { ProviderRegistry } from '../Fediverse/Providers/ProviderRegistry'
import { setNodeRefreshed } from '../Storage/Nodes/setNodeRefreshed'
@ -9,18 +8,22 @@ import { NodeProvider } from '../Fediverse/Providers/NodeProvider'
import { FeedProvider } from '../Fediverse/Providers/FeedProvider'
import { refreshFeeds } from './Feeds/refreshFeeds'
import { deleteOldFeeds } from '../Storage/Feeds/deleteOldFeeds'
import refreshNodeIps from './Dns/refreshNodeIps'
import { ElasticClient } from '../Storage/ElasticClient'
import updateNodeFeedStats from './Nodes/updateNodeFeedStats'
export const processNextNode = async (prisma:PrismaClient, providerRegistry:ProviderRegistry):Promise<void> => {
export const processNextNode = async (elastic: ElasticClient, providerRegistry:ProviderRegistry):Promise<void> => {
console.info('#############################################')
let node = await fetchNodeToProcess(prisma)
node = await setNodeRefreshAttempted(prisma, node)
let node = await fetchNodeToProcess(elastic)
node = await setNodeRefreshAttempted(elastic, node)
node = await refreshNodeInfo(prisma, node)
node = await refreshNodeIps(elastic, node)
node = await refreshNodeInfo(elastic, node)
if (!providerRegistry.containsKey(node.softwareName)) {
console.warn('Unknown software', { domain: node.domain, software: node.softwareName })
await deleteOldFeeds(prisma, node)
await setNodeRefreshed(prisma, node)
await deleteOldFeeds(elastic, node)
await setNodeRefreshed(elastic, node)
return
}
const provider = providerRegistry.getProviderByKey(node.softwareName)
@ -28,18 +31,19 @@ export const processNextNode = async (prisma:PrismaClient, providerRegistry:Prov
await Promise.all(
provider.getNodeProviders().map((nodeProvider:NodeProvider) => {
console.info('Searching for nodes', { domain: node.domain, provider: nodeProvider.getKey() })
return findNewNodes(prisma, nodeProvider, node)
return findNewNodes(elastic, nodeProvider, node)
})
)
await Promise.all(
provider.getFeedProviders().map((feedProvider:FeedProvider) => {
console.info('Searching for feeds', { domain: node.domain, provider: feedProvider.getKey() })
return refreshFeeds(prisma, feedProvider, node)
return refreshFeeds(elastic, feedProvider, node)
})
)
await deleteOldFeeds(prisma, node)
await deleteOldFeeds(elastic, node)
await updateNodeFeedStats(elastic, node)
await setNodeRefreshed(prisma, node)
await setNodeRefreshed(elastic, node)
}

Wyświetl plik

@ -0,0 +1,29 @@
import Field from './Field'
interface Feed {
domain: string
foundAt: number,
refreshedAt?: number,
name: string,
fullName: string,
displayName: string,
description: string,
strippedDescription?: string,
followersCount?: number,
followingCount?: number,
statusesCount?: number,
lastStatusAt?: number,
createdAt?: number,
bot?: boolean,
locked: boolean,
url: string,
avatar?: string,
type: 'account' | 'channel',
parentFeedName?: string,
parentFeedDomain?: string
fields: Field[],
extractedEmails: string[],
extractedTags: string[]
}
export default Feed

Wyświetl plik

@ -0,0 +1,8 @@
interface Field {
name: string,
value: string
strippedName?: string
strippedValue?: string
}
export default Field

Wyświetl plik

@ -0,0 +1,13 @@
interface Geo {
cityName?: string,
continentName?: string,
countryIsoCode?: string,
countryName?: string,
latitude: number
longitude: number
location?: string,
regionIsoCode?: string,
regionName?: string
}
export default Geo

Wyświetl plik

@ -0,0 +1,25 @@
import Geo from './Geo'
interface Node {
name?:string,
strippedName?:string,
foundAt: number,
refreshAttemptedAt?: number
refreshedAt?: number
openRegistrations?: boolean
domain: string,
serverIps?: string[],
geoip?: Geo[],
softwareName?: string;
softwareVersion?: string
standardizedSoftwareVersion?: string
halfYearActiveUserCount?: number,
monthActiveUserCount?: number,
statusesCount?: number,
totalUserCount?: number,
discoveredByDomain?:string,
accountFeedCount?: number,
channelFeedCount?: number,
}
export default Node

Wyświetl plik

@ -0,0 +1,3 @@
const feedIndex = 'feed'
export default feedIndex

Wyświetl plik

@ -0,0 +1,3 @@
const nodeIndex = 'node'
export default nodeIndex

Wyświetl plik

@ -0,0 +1,15 @@
import { Client } from '@elastic/elasticsearch'
const elasticClient = new Client({
node: {
url: new URL(process.env.ELASTIC_URL ?? 'http://elastic:9200')
},
auth: {
username: process.env.ELASTIC_USER ?? 'elastic',
password: process.env.ELASTIC_PASSWORD
}
})
export type ElasticClient = typeof elasticClient
export default elasticClient

Wyświetl plik

@ -1,15 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const createFeedEmails = async (prisma:PrismaClient, feed:Feed, emails:string[]):Promise<number> => {
const result = await prisma.email.createMany({
data: emails.map(email => {
return {
feedId: feed.id,
address: email
}
}),
skipDuplicates: true
})
console.log('Added emails to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const deleteAllFeedEmails = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
const result = await prisma.email.deleteMany({
where: {
feedId: feed.id
}
})
console.log('Removed all emails from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,5 +1,6 @@
import { FeedData } from '../../Fediverse/Providers/FeedData'
export default interface StorageFeedData extends FeedData{
fulltext:string
extractedTags:string[],
extractedEmails:string[],
}

Wyświetl plik

@ -0,0 +1,84 @@
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
import dateProperty from '../Properties/dateProperty'
import { IngestProcessorContainer } from '@elastic/elasticsearch/lib/api/types'
const assertFeedIndex = async (elastic: ElasticClient): Promise<void> => {
console.info('Setting feed pipeline')
const processors: IngestProcessorContainer[] = []
await elastic.ingest.putPipeline({
id: 'feed',
description: 'Default feed pipeline',
processors: processors
})
console.info('Checking feed index')
const exists = await elastic.indices.exists({
index: feedIndex
})
if (exists) {
console.info('Feed index exists')
} else {
console.info('Creating feed index')
await elastic.indices.create({
index: feedIndex,
settings: {
default_pipeline: 'feed',
analysis: {
analyzer: {
standard: {
type: 'standard'
},
html: {
type: 'custom',
tokenizer: 'standard',
filter: [
'lowercase'
],
char_filter: [
'html_strip'
]
}
}
}
}
})
}
console.info('Setting feed mapping')
await elastic.indices.putMapping({
index: feedIndex,
properties: {
domain: { type: 'text', analyzer: 'standard' },
foundAt: dateProperty,
refreshedAt: dateProperty,
name: { type: 'text', analyzer: 'standard' },
fullName: { type: 'text', analyzer: 'standard' },
displayName: { type: 'text', analyzer: 'html' },
description: { type: 'text', analyzer: 'html' },
followersCount: { type: 'integer' },
followingCount: { type: 'integer' },
statusesCount: { type: 'integer' },
lastStatusAt: dateProperty,
createdAt: dateProperty,
bot: { type: 'boolean' },
locked: { type: 'boolean' },
url: { type: 'text' },
avatar: { type: 'text' },
type: { type: 'keyword' },
parentFeedName: { type: 'text', analyzer: 'standard' },
parentFeedDomain: { type: 'text', analyzer: 'standard' },
fields: {
type: 'nested',
properties: {
name: { type: 'text', analyzer: 'html' },
value: { type: 'text', analyzer: 'html' }
}
},
extractedEmails: { type: 'text', analyzer: 'standard' },
extractedTags: { type: 'text', analyzer: 'standard' }
}
})
await elastic.indices.refresh({ index: feedIndex })
}
export default assertFeedIndex

Wyświetl plik

@ -0,0 +1,39 @@
import { ElasticClient } from '../ElasticClient'
import Feed from '../Definitions/Feed'
import feedIndex from '../Definitions/feedIndex'
import { NodeStats } from '../../Jobs/Nodes/updateNodeFeedStats'
type Aggregation = {
buckets:{
key:'account'|'channel',
// eslint-disable-next-line camelcase
doc_count:number
}[]
}
export default async function countNodeFeeds (elastic: ElasticClient, domain:string):Promise<NodeStats> {
await elastic.indices.refresh({ index: feedIndex })
const response = await elastic.search<Feed>({
index: feedIndex,
query: {
term: { domain }
},
size: 0,
aggs: {
types: {
terms: {
field: 'type'
}
}
}
})
const types = response.aggregations.types as Aggregation
const result:NodeStats = {
channel: 0,
account: 0
}
types.buckets.forEach(item => {
result[item.key] += item.doc_count
})
return result
}

Wyświetl plik

@ -1,9 +1,18 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
import StorageFeedData from './StorageFeedData'
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
import getFeed from './getFeed'
import Feed from '../Definitions/Feed'
import Node from '../Definitions/Node'
export const createFeed = async (prisma: PrismaClient, feedData: StorageFeedData, node: Node): Promise<Feed> => {
const feed = await prisma.feed.create({
data: {
export const createFeed = async (elastic: ElasticClient, feedData: StorageFeedData, node: Node): Promise<Feed> => {
const fullName = `${feedData.name}@${node.domain}`
await elastic.create<Feed>({
index: feedIndex,
id: fullName,
document: {
fullName,
domain: node.domain,
url: feedData.url,
name: feedData.name,
bot: feedData.bot,
@ -11,16 +20,22 @@ export const createFeed = async (prisma: PrismaClient, feedData: StorageFeedData
followersCount: feedData.followersCount,
followingCount: feedData.followingCount,
statusesCount: feedData.statusesCount,
lastStatusAt: feedData.lastStatusAt,
lastStatusAt: feedData.lastStatusAt?.getTime(),
description: feedData.description,
displayName: feedData.displayName,
locked: feedData.locked,
nodeId: node.id,
createdAt: feedData.createdAt,
fulltext: feedData.fulltext,
createdAt: feedData.createdAt.getTime(),
foundAt: (new Date()).getTime(),
fields: feedData.fields.map(field => {
return { name: field.name, value: field.value }
}),
extractedEmails: feedData.extractedEmails,
extractedTags: feedData.extractedTags,
parentFeedName: feedData.parentFeed?.name,
parentFeedDomain: feedData.parentFeed?.hostDomain,
type: feedData.type
}
})
console.info('Created new feed', { feedName: feed.name, nodeDomain: node.domain })
return feed
console.info('Created new feed', { feedName: feedData.name, nodeDomain: node.domain })
return getFeed(elastic, fullName)
}

Wyświetl plik

@ -0,0 +1,28 @@
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
export const deleteDomainFeeds = async (elastic: ElasticClient, domains:string[]): Promise<number> => {
await elastic.indices.refresh({ index: feedIndex })
const result = await elastic.deleteByQuery({
index: feedIndex,
query: {
bool: {
should: domains.map(domain => {
return {
regexp: {
domain: {
value: '(.*\\.)?' + domain,
case_insensitive: true
}
}
}
}),
minimum_should_match: 1
}
}
})
console.info('Deleted domain feeds', {
count: result.deleted, domains
})
return result.deleted
}

Wyświetl plik

@ -1,18 +1,22 @@
import { Node, PrismaClient } from '@prisma/client'
import { ElasticClient } from '../ElasticClient'
import Node from '../Definitions/Node'
import feedIndex from '../Definitions/feedIndex'
export const deleteOldFeeds = async (prisma: PrismaClient, node: Node): Promise<number> => {
const result = await prisma.feed.deleteMany({
where: {
nodeId: {
equals: node.id
},
refreshedAt: {
lt: node.refreshAttemptedAt
export const deleteOldFeeds = async (elastic: ElasticClient, node: Node): Promise<number> => {
await elastic.indices.refresh({ index: feedIndex })
const result = await elastic.deleteByQuery({
index: feedIndex,
query: {
bool: {
must: [
{ match: { domain: node.domain } },
{ range: { refreshedAt: { lt: node.refreshAttemptedAt } } }
]
}
}
})
console.info('Deleted old feeds', {
count: result.count, olderThen: node.refreshAttemptedAt, nodeDomain: node.domain
count: result.deleted, olderThen: node.refreshAttemptedAt, nodeDomain: node.domain
})
return result.count
return result.deleted
}

Wyświetl plik

@ -1,10 +0,0 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
export const fetchFeedByNodeAndName = async (prisma:PrismaClient, node:Node, name:string):Promise<Feed> => {
return await prisma.feed.findFirst({
where: {
node: node,
name: name
}
})
}

Wyświetl plik

@ -0,0 +1,13 @@
import Feed from '../Definitions/Feed'
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
const getFeed = async (elastic: ElasticClient, feedFullName:string):Promise<Feed> => {
const result = await elastic.get<Feed>({
index: feedIndex,
id: feedFullName
})
return result._source
}
export default getFeed

Wyświetl plik

@ -1,9 +1,14 @@
import { Feed, PrismaClient } from '@prisma/client'
import StorageFeedData from './StorageFeedData'
import Feed from '../Definitions/Feed'
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
import getFeed from './getFeed'
export const updateFeed = async (prisma:PrismaClient, feed:Feed, feedData:StorageFeedData):Promise<Feed> => {
const updatedFeed = await prisma.feed.update({
data: {
export const updateFeed = async (elastic: ElasticClient, feed:Feed, feedData:StorageFeedData):Promise<Feed> => {
await elastic.update<Feed>({
index: feedIndex,
id: feed.fullName,
doc: {
url: feedData.url,
bot: feedData.bot,
avatar: feedData.avatar,
@ -15,14 +20,15 @@ export const updateFeed = async (prisma:PrismaClient, feed:Feed, feedData:Storag
displayName: feedData.displayName,
locked: feedData.locked,
createdAt: feedData.createdAt,
refreshedAt: new Date(),
fulltext: feedData.fulltext,
type: feedData.type
},
where: {
id: feed.id
refreshedAt: (new Date()).getTime(),
type: feedData.type,
fields: feedData.fields.map(field => {
return { name: field.name, value: field.value }
}),
extractedEmails: feedData.extractedEmails,
extractedTags: feedData.extractedTags
}
})
console.info('Updated feed', { feedName: feed.name, nodeId: feed.nodeId })
return updatedFeed
console.info('Updated feed', { feedName: feed.name, nodeDomain: feed.domain })
return getFeed(elastic, feed.fullName)
}

Wyświetl plik

@ -1,17 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
import { FieldData } from '../../Fediverse/Providers/FieldData'
export const createFeedFields = async (prisma:PrismaClient, feed:Feed, fieldData:FieldData[]):Promise<number> => {
const result = await prisma.field.createMany({
data: fieldData.map(item => {
return {
feedId: feed.id,
name: item.name,
value: item.value
}
}),
skipDuplicates: true
})
console.log('Added fields to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const deleteAllFeedFields = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
const result = await prisma.field.deleteMany({
where: {
feedId: feed.id
}
})
console.log('Removed all fields from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -0,0 +1,95 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import dateProperty from '../Properties/dateProperty'
const assertNodeIndex = async (elastic: ElasticClient):Promise<void> => {
console.info('Setting node pipeline')
await elastic.ingest.putPipeline({
id: 'node',
description: 'Default node pipeline',
processors: [
{
// @ts-ignore
geoip: {
ignore_missing: true,
field: 'serverIps',
properties: [
'location',
'continent_name',
'country_name',
'country_iso_code',
'region_iso_code',
'region_name',
'city_name'
],
target_field: 'geoip'
}
},
{
grok: {
ignore_missing: true,
field: 'softwareVersion',
patterns: ['%{VERSION:standardizedSoftwareVersion}'],
pattern_definitions: {
VERSION: '(?:[0-9]+\\.?[0-9]+\\.?[0-9]+)'
}
}
}
]
})
console.info('Checking node index')
const exists = await elastic.indices.exists({
index: nodeIndex
})
if (exists) {
console.info('Node index exists')
} else {
console.info('Creating node index')
await elastic.indices.create({
index: nodeIndex,
settings: {
default_pipeline: 'node'
}
})
}
console.info('Setting node mapping')
await elastic.indices.putMapping({
index: nodeIndex,
properties: {
name: { type: 'text' },
domain: { type: 'text' },
softwareName: { type: 'keyword' },
softwareVersion: { type: 'text' },
standardizedSoftwareVersion: { type: 'keyword' },
totalUserCount: { type: 'integer' },
monthActiveUserCount: { type: 'integer' },
halfYearActiveUserCount: { type: 'integer' },
statusesCount: { type: 'integer' },
foundAt: dateProperty,
refreshedAt: dateProperty,
refreshAttemptedAt: dateProperty,
openRegistrations: { type: 'boolean' },
serverIps: { type: 'ip' },
discoveredByDomain: { type: 'text' },
geoip: {
type: 'nested',
properties: {
location: { type: 'geo_point' },
continent_name: { type: 'keyword' },
country_name: { type: 'keyword' },
country_iso_code: { type: 'keyword' },
region_name: { type: 'keyword' },
region_iso_code: { type: 'keyword' },
city_name: { type: 'keyword' }
}
},
accountFeedCount: { type: 'integer' },
channelFeedCount: { type: 'integer' }
}
})
await elastic.indices.refresh({ index: nodeIndex })
}
export default assertNodeIndex

Wyświetl plik

@ -1,15 +1,24 @@
import { PrismaClient, Node } from '@prisma/client'
export const createMissingNodes = async (client:PrismaClient, domains:string[]):Promise<number> => {
const result = await client.node.createMany(
{
data: domains.map(domain => {
return {
domain: domain
}
}),
skipDuplicates: true
}
)
console.info('Created new nodes', { count: result.count })
return result.count
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
export const createMissingNodes = async (elastic: ElasticClient, domains:string[], discoveredByDomain:string|undefined):Promise<number> => {
const response = await elastic.bulk({
index: nodeIndex,
body: domains.flatMap(domain => [
{
create: { _id: domain }
},
{
domain: domain,
discoveredByDomain,
foundAt: (new Date()).getTime()
}
])
})
const createdCount = response.items.filter(item => item.create.status === 201).length
console.warn('Created new nodes', {
requestedCount: domains.length,
createdCount: createdCount,
errors: response.items.filter(item => item.create.status !== 201).map(item => item.create.error.reason)
})
return createdCount
}

Wyświetl plik

@ -0,0 +1,28 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
export const deleteDomainNodes = async (elastic: ElasticClient, domains:string[]): Promise<number> => {
await elastic.indices.refresh({ index: nodeIndex })
const result = await elastic.deleteByQuery({
index: nodeIndex,
query: {
bool: {
should: domains.map(domain => {
return {
regexp: {
domain: {
value: '(.*\\.)?' + domain,
case_insensitive: true
}
}
}
}),
minimum_should_match: 1
}
}
})
console.info('Deleted domain nodes', {
count: result.deleted, domains
})
return result.deleted
}

Wyświetl plik

@ -1,66 +1,20 @@
import { Node, PrismaClient } from '@prisma/client'
import { NoNodeFoundError } from './NoNodeFoundError'
import findNotProcessedNodeWithAttemptLimit from './findNotProcessedNodeWithAttemptLimit'
import findNodeWithOldestRefreshWithLimits from './findNodeWithOldestRefreshWithLimits'
import Node from '../Definitions/Node'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
export const fetchNodeToProcess = async (prisma: PrismaClient): Promise<Node> => {
const currentTimestamp = Date.now()
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
console.log('Searching for not yet processed node not attempted before attemptLimit', { attemptLimitDate, attemptLimitMilliseconds })
const newNode = await prisma.node.findFirst({
orderBy: {
foundAt: 'asc'
},
where: {
refreshedAt: null,
OR: [
{
refreshAttemptedAt: {
lt: attemptLimitDate
}
},
{
refreshAttemptedAt: null
}
]
}
})
if (newNode) {
console.log('Found not yet processed node', { domain: newNode.domain })
return newNode
export const fetchNodeToProcess = async (elastic: ElasticClient): Promise<Node> => {
await elastic.indices.refresh({ index: nodeIndex })
let node = await findNotProcessedNodeWithAttemptLimit(elastic)
if (node !== null) {
return node
}
const refreshLimitMilliseconds = parseInt(process.env.REFRESH_HOURS ?? '168') * 60 * 60 * 1000
const refreshLimitDate = new Date(currentTimestamp - refreshLimitMilliseconds)
console.log('Searching instance not refreshed for longest time and before refreshLimit and attemptLimit', {
refreshLimitMilliseconds,
refreshLimitDate,
attemptLimitDate,
attemptLimitMilliseconds
})
const node = await prisma.node.findFirst({
orderBy: {
refreshedAt: 'asc'
},
where: {
refreshedAt: {
lt: refreshLimitDate
},
OR: [
{
refreshAttemptedAt: {
lt: attemptLimitDate
}
},
{
refreshAttemptedAt: null
}
]
}
})
if (!node) {
throw new NoNodeFoundError()
node = await findNodeWithOldestRefreshWithLimits(elastic)
if (node !== null) {
return node
}
console.log('Found oldest node', { domain: node.domain })
return node
throw new NoNodeFoundError()
}

Wyświetl plik

@ -0,0 +1,45 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
const findNodeWithOldestRefreshWithLimits = async (elastic: ElasticClient): Promise<Node | null> => {
const currentTimestamp = Date.now()
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
const refreshLimitMilliseconds = parseInt(process.env.REFRESH_HOURS ?? '168') * 60 * 60 * 1000
const refreshLimitDate = new Date(currentTimestamp - refreshLimitMilliseconds)
console.log('Searching instance not refreshed for longest time and before refreshLimit and attemptLimit', {
refreshLimitMilliseconds,
refreshLimitDate,
attemptLimitDate,
attemptLimitMilliseconds
})
const result = await elastic.search<Node>({
index: nodeIndex,
body: {
size: 1,
sort: [{
refreshedAt: { order: 'asc' }
}],
query: {
bool: {
must: [
{ range: { refreshedAt: { lt: refreshLimitDate.getTime() } } }
],
should: [
{ range: { refreshAttemptedAt: { lt: attemptLimitDate.getTime() } } },
{ bool: { must_not: [{ exists: { field: 'refreshAttemptedAt' } }] } }
],
minimum_should_match: 1
}
}
}
})
if (result.hits.hits.length > 0) {
const node = result.hits.hits[0]._source
console.log('Found oldest node', { node })
return node
}
return null
}
export default findNodeWithOldestRefreshWithLimits

Wyświetl plik

@ -0,0 +1,37 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
const findNotProcessedNodeWithAttemptLimit = async (elastic: ElasticClient): Promise<Node|null> => {
const currentTimestamp = Date.now()
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
console.log('Searching for not yet processed node not attempted before attemptLimit', { attemptLimitDate, attemptLimitMilliseconds })
const result = await elastic.search<Node>({
index: nodeIndex,
body: {
size: 1,
sort: [{
foundAt: { order: 'asc' }
}],
query: {
bool: {
must_not: [
{ exists: { field: 'refreshedAt' } }],
should: [
{ bool: { must_not: [{ exists: { field: 'refreshAttemptedAt' } }] } },
{ range: { refreshAttemptedAt: { lt: attemptLimitDate.getTime() } } }
],
minimum_should_match: 1
}
}
}
})
if (result.hits.hits.length > 0) {
const node = result.hits.hits[0]._source
console.log('Found not yet processed node', { node })
return node
}
return null
}
export default findNotProcessedNodeWithAttemptLimit

Wyświetl plik

@ -0,0 +1,13 @@
import { ElasticClient } from '../ElasticClient'
import Node from '../Definitions/Node'
import nodeIndex from '../Definitions/nodeIndex'
const getNode = async (elastic:ElasticClient, domain:string):Promise<Node> => {
const result = await elastic.get<Node>({
index: nodeIndex,
id: domain
})
return result._source
}
export default getNode

Wyświetl plik

@ -0,0 +1,7 @@
import getBannedDomains from '../../Jobs/Seed/getBannedDomains'
export default function isDomainNotBanned (domain):boolean {
return getBannedDomains().filter(
banned => domain.match(new RegExp('(.*\\.)?' + banned, 'gi')) !== null
).length === 0
}

Wyświetl plik

@ -1,14 +1,17 @@
import { Node, PrismaClient } from '@prisma/client'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
import getNode from './getNode'
export const setNodeRefreshAttempted = async (prisma:PrismaClient, node:Node):Promise<Node> => {
export const setNodeRefreshAttempted = async (elastic: ElasticClient, node:Node):Promise<Node> => {
const date = new Date()
console.info('Setting node refresh attempt', { domain: node.domain, date: date })
return await prisma.node.update({
data: {
refreshAttemptedAt: date
},
where: {
id: node.id
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
refreshAttemptedAt: date.getTime()
}
})
return getNode(elastic, node.domain)
}

Wyświetl plik

@ -1,14 +1,17 @@
import { Node, PrismaClient } from '@prisma/client'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
import getNode from './getNode'
export const setNodeRefreshed = async (prisma:PrismaClient, node:Node):Promise<Node> => {
export const setNodeRefreshed = async (elastic: ElasticClient, node:Node):Promise<Node> => {
const date = new Date()
console.info('Setting node refreshed', { domain: node.domain, date: date })
return await prisma.node.update({
data: {
refreshedAt: date
},
where: {
id: node.id
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
refreshedAt: date.getTime()
}
})
return getNode(elastic, node.domain)
}

Wyświetl plik

@ -0,0 +1,18 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
import getNode from './getNode'
import { NodeStats } from '../../Jobs/Nodes/updateNodeFeedStats'
export const setNodeStats = async (elastic: ElasticClient, node:Node, stats: NodeStats):Promise<Node> => {
console.info('Setting node stats', { domain: node.domain, stats })
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
accountFeedCount: stats.account,
channelFeedCount: stats.channel
}
})
return getNode(elastic, node.domain)
}

Wyświetl plik

@ -1,21 +0,0 @@
import { Node, PrismaClient } from '@prisma/client'
import { NodeInfo } from '../../Fediverse/NodeInfo/retrieveNodeInfo'
export const updateNode = async (prisma: PrismaClient, node: Node, nodeInfo:NodeInfo):Promise<Node> => {
const updated = await prisma.node.update({
where: {
id: node.id
},
data: {
softwareName: nodeInfo.software.name.toLocaleLowerCase(),
softwareVersion: nodeInfo.software.version,
totalUserCount: nodeInfo.usage?.users?.total ?? null,
monthActiveUserCount: nodeInfo.usage?.users?.activeMonth ?? null,
halfYearActiveUserCount: nodeInfo.usage?.users?.activeHalfyear ?? null,
statusesCount: nodeInfo.usage?.localPosts ?? null,
openRegistrations: nodeInfo.openRegistrations ?? null
}
})
console.info('Updated node info', { domain: updated.domain, software: updated.softwareName })
return updated
}

Wyświetl plik

@ -0,0 +1,33 @@
import { NodeInfo } from '../../Fediverse/NodeInfo/retrieveNodeInfo'
import Node from '../Definitions/Node'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import getNode from './getNode'
const assertPositiveInt = (number:number|undefined):number|undefined => {
if (number === undefined) {
return undefined
}
return Math.max(0, Math.round(number))
}
export const updateNodeInfo = async (elastic: ElasticClient, node: Node, nodeInfo:NodeInfo):Promise<Node> => {
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
name: nodeInfo?.name,
openRegistrations: nodeInfo?.openRegistrations,
softwareName: nodeInfo?.software?.name?.toLocaleLowerCase(),
softwareVersion: nodeInfo?.software?.version,
halfYearActiveUserCount: assertPositiveInt(nodeInfo?.usage?.users?.activeHalfyear),
monthActiveUserCount: assertPositiveInt(nodeInfo?.usage?.users.activeMonth),
statusesCount: assertPositiveInt(nodeInfo?.usage?.localPosts),
totalUserCount: assertPositiveInt(nodeInfo?.usage?.users?.total)
}
})
const resultNode = await getNode(elastic, node.domain)
console.info('Updated node info', { node })
return resultNode
}

Wyświetl plik

@ -0,0 +1,17 @@
import Node from '../Definitions/Node'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import getNode from './getNode'
export const updateNodeIps = async (elastic:ElasticClient, node: Node, ips:string[]):Promise<Node> => {
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
serverIps: ips
}
})
const resultNode = await getNode(elastic, node.domain)
console.info('Updated node ips', { resultNode })
return resultNode
}

Wyświetl plik

@ -1,5 +0,0 @@
import { PrismaClient } from '@prisma/client'
const prismaClient = new PrismaClient()
export default prismaClient

Wyświetl plik

@ -0,0 +1,5 @@
import { MappingProperty } from '@elastic/elasticsearch/lib/api/types'
const dateProperty:MappingProperty = { type: 'date', format: 'epoch_millis' }
export default dateProperty

Wyświetl plik

@ -1,15 +0,0 @@
import { PrismaClient, Tag, Feed } from '@prisma/client'
export const createFeedTags = async (prisma:PrismaClient, feed:Feed, tags:Tag[]):Promise<number> => {
const result = await prisma.feedToTag.createMany({
data: tags.map(tag => {
return {
feedId: feed.id,
tagId: tag.id
}
}),
skipDuplicates: true
})
console.log('Added tags to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,15 +0,0 @@
import { PrismaClient } from '@prisma/client'
export const createMissingTags = async (prisma:PrismaClient, tagNames:string[]):Promise<number> => {
const result = await prisma.tag.createMany({
data: tagNames.map(tagName => {
return {
name: tagName
}
}
),
skipDuplicates: true
})
console.log('Created missing tags', { count: result.count })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const deleteAllFeedTags = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
const result = await prisma.feedToTag.deleteMany({
where: {
feedId: feed.id
}
})
console.log('Removed all tags from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Tag } from '@prisma/client'
export const fetchTags = async (prisma:PrismaClient, tagNames:string[]):Promise<Tag[]> => {
return await prisma.tag.findMany({
where: {
name: {
in: tagNames
}
}
})
}

Wyświetl plik

@ -1,12 +1,16 @@
import providerRegistry from './Fediverse/Providers'
import prismaClient from './Storage/PrismaClient'
import { addNodeSeed } from './Jobs/Seed/addNodeSeed'
import { processNextNode } from './Jobs/processNextNode'
import assertNodeIndex from './Storage/Nodes/assertNodeIndex'
import assertFeedIndex from './Storage/Feeds/assertFeedIndex'
import elasticClient from './Storage/ElasticClient'
import deleteDomains from './Jobs/Seed/deleteBannedNodes'
import getBannedDomains from './Jobs/Seed/getBannedDomains'
const loop = async (): Promise<void> => {
while (true) {
try {
await processNextNode(prismaClient, providerRegistry)
await processNextNode(elasticClient, providerRegistry)
} catch (err) {
console.warn(err)
const waitForJobMilliseconds = parseInt(process.env.WAIT_FOR_JOB_MINUTES ?? '60') * 60 * 1000
@ -18,8 +22,11 @@ const loop = async (): Promise<void> => {
}
const app = async (): Promise<void> => {
const seedDomain = process.env.SEED_NODE_DOMAIN ?? 'mastodon.social'
await addNodeSeed(prismaClient, seedDomain)
await assertNodeIndex(elasticClient)
await assertFeedIndex(elasticClient)
await deleteDomains(elasticClient, getBannedDomains())
const seedDomains = (process.env.SEED_NODE_DOMAIN ?? 'mastodon.social').split(',')
await addNodeSeed(elasticClient, seedDomains)
setTimeout(loop)
}