From 0c2ade0b52bbd6e7fda29b776df37c4c8b64c38f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0t=C4=9Bp=C3=A1n=20=C5=A0korpil?= Date: Mon, 3 Jan 2022 13:26:17 +0100 Subject: [PATCH] Added more env configs --- Dockerfile | 7 ++- README.md | 14 +++--- .../Fediverse/NodeInfo/retrieveNodeInfo.ts | 3 +- .../Fediverse/NodeInfo/retrieveWellKnown.ts | 3 +- .../Mastodon/retrieveLocalPublicUsersPage.ts | 5 ++- .../Providers/Mastodon/retrievePeers.ts | 3 +- .../Providers/Peertube/retrieveAccounts.ts | 3 +- .../Providers/Peertube/retrieveFollowers.ts | 3 +- .../Peertube/retrieveVideoChannels.ts | 3 +- .../getDefaultTimeoutMilliseconds.ts | 3 ++ .../src/Storage/Nodes/fetchNodeToProcess.ts | 45 +++++++++++++++---- application/src/app.ts | 7 ++- 12 files changed, 73 insertions(+), 26 deletions(-) create mode 100644 application/src/Fediverse/getDefaultTimeoutMilliseconds.ts diff --git a/Dockerfile b/Dockerfile index dd3775b..612a32f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,11 @@ FROM node:16-bullseye AS build ENV POSTGRES_URL='postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public' \ - SEED_NODE_DOMAIN='mastodon.social' + SEED_NODE_DOMAIN='mastodon.social' \ + REATTEMPT_MINUTES='60' \ + REFRESH_HOURS='120' \ + WAIT_FOR_JOB_MINUTES='60' \ + DEFAULT_TIMEOUT_MILLISECONDS='10000' \ + TZ='UTC' WORKDIR /srv COPY application/package*.json ./ COPY application/prisma ./prisma/ diff --git a/README.md b/README.md index d40b42a..54c026b 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,15 @@ Data providers for more apps will be probably added soon (Pull requests are welc Configuration is done using environmental variables: -| Variable | Description | Value example | -|--------------------|-------------------------------------------------------------|-------------------------------------------------------------------------| -| `POSTGRES_URL` | Postgres database uri | `postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public` | -| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` | - +| Variable | Description | Default value / Example value | +|--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------| +| `POSTGRES_URL` | Postgres database uri | `postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public` | +| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` | +| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` | +| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` | +| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` | +| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` | +| `TZ` | _Optional_, Timezone | `UTC` | ## Deploy App is designed to be run in docker container and deployed using docker-compose. More info can be found in [FediSearch example docker-compose](https://github.com/Stopka/fedisearch-compose) project diff --git a/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts b/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts index 36fac9b..b7dabd3 100644 --- a/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts +++ b/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts @@ -1,6 +1,7 @@ import axios from 'axios' import { z } from 'zod' import { assertSuccessJsonResponse } from '../assertSuccessJsonResponse' +import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds' const schema = z.object({ software: z.object({ @@ -25,7 +26,7 @@ export type NodeInfo = z.infer export const retrieveNodeInfo = async (url:string):Promise => { console.info('Retrieving node info', { url: url }) - const nodeInfoResponse = await axios.get(url, { timeout: 10000 }) + const nodeInfoResponse = await axios.get(url, { timeout: getDefaultTimeoutMilliseconds() }) assertSuccessJsonResponse(nodeInfoResponse) return schema.parse(nodeInfoResponse.data) } diff --git a/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts b/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts index 7b384dd..26088ab 100644 --- a/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts +++ b/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts @@ -1,6 +1,7 @@ import axios from 'axios' import { assertSuccessJsonResponse } from '../assertSuccessJsonResponse' import { z } from 'zod' +import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds' const wellKnownSchema = z.object({ links: z.array( @@ -16,7 +17,7 @@ export type WellKnown = z.infer export const retrieveWellKnown = async (domain:string):Promise => { console.info('Retrieving well known', { domain: domain }) const wellKnownUrl = `https://${domain}/.well-known/nodeinfo` - const wellKnownResponse = await axios.get(wellKnownUrl, { timeout: 10000 }) + const wellKnownResponse = await axios.get(wellKnownUrl, { timeout: getDefaultTimeoutMilliseconds() }) assertSuccessJsonResponse(wellKnownResponse) return wellKnownSchema.parse(wellKnownResponse.data) } diff --git a/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts b/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts index c48dfe8..511c215 100644 --- a/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts +++ b/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts @@ -1,7 +1,8 @@ import axios from 'axios' import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { FeedData } from '../FeedData' -import { string, z } from 'zod' +import { z } from 'zod' +import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' const limit = 500 @@ -56,7 +57,7 @@ export const retrieveLocalPublicUsersPage = async (domain: string, page: number) offset: page * limit, local: true }, - timeout: 10000 + timeout: getDefaultTimeoutMilliseconds() }) assertSuccessJsonResponse(response) const responseData = schema.parse(response.data) diff --git a/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts b/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts index 521bf06..b8389cc 100644 --- a/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts +++ b/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts @@ -1,6 +1,7 @@ import axios from 'axios' import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' +import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' const schema = z.array( z.string() @@ -12,7 +13,7 @@ export const retrievePeers = async (domain:string, page:number):Promise { + return parseInt(process.env.DEFAULT_TIMEOUT_MILLISECONDS ?? '10000') +} diff --git a/application/src/Storage/Nodes/fetchNodeToProcess.ts b/application/src/Storage/Nodes/fetchNodeToProcess.ts index 73bb059..e55d1e9 100644 --- a/application/src/Storage/Nodes/fetchNodeToProcess.ts +++ b/application/src/Storage/Nodes/fetchNodeToProcess.ts @@ -1,34 +1,63 @@ import { Node, PrismaClient } from '@prisma/client' export const fetchNodeToProcess = async (prisma: PrismaClient): Promise => { - console.log('Searching for not yet processed node') + const currentTimestamp = Date.now() + const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000 + const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds) + console.log('Searching for not yet processed node not attempted before attemptLimit', { attemptLimitDate, attemptLimitMilliseconds }) const newNode = await prisma.node.findFirst({ orderBy: { foundAt: 'asc' }, where: { - refreshedAt: null + refreshedAt: null, + OR: [ + { + refreshAttemptedAt: { + lt: attemptLimitDate + } + }, + { + refreshAttemptedAt: null + } + ] + } }) if (newNode) { console.log('Found not yet processed node', { domain: newNode.domain }) return newNode } - const date = new Date() - date.setMonth(date.getMonth() - 1) - console.log('Searching instance not refreshed for longest time and at least a month ago', { date: date }) + const refreshLimitMilliseconds = parseInt(process.env.REFRESH_HOURS ?? '168') * 60 * 60 * 1000 + const refreshLimitDate = new Date(currentTimestamp - refreshLimitMilliseconds) + console.log('Searching instance not refreshed for longest time and before refreshLimit and attemptLimit', { + refreshLimitMilliseconds, + refreshLimitDate, + attemptLimitDate, + attemptLimitMilliseconds + }) const node = await prisma.node.findFirst({ orderBy: { refreshedAt: 'asc' }, where: { refreshedAt: { - lt: date - } + lt: refreshLimitDate + }, + OR: [ + { + refreshAttemptedAt: { + lt: attemptLimitDate + } + }, + { + refreshAttemptedAt: null + } + ] } }) if (node) { - console.log('Found oldest node', { domain: newNode.domain }) + console.log('Found oldest node', { domain: node.domain }) } else { throw new Error('No node found') } diff --git a/application/src/app.ts b/application/src/app.ts index 9372635..c7022b1 100644 --- a/application/src/app.ts +++ b/application/src/app.ts @@ -9,10 +9,9 @@ const loop = async (): Promise => { await processNextNode(prismaClient, providerRegistry) } catch (err) { console.warn(err) - const milisecondsInMinute = 1000 * 60 - const timeout = 60 * milisecondsInMinute - console.info('Delaying next node process', { timeoutMinutes: timeout / milisecondsInMinute, now: new Date() }) - setTimeout(loop, timeout) + const waitForJobMilliseconds = parseInt(process.env.WAIT_FOR_JOB_MINUTES ?? '60') * 60 * 1000 + console.info('Delaying next node process', { timeoutMilliseconds: waitForJobMilliseconds, timeoutDate: new Date(Date.now() + waitForJobMilliseconds), now: new Date() }) + setTimeout(loop, waitForJobMilliseconds) return } }