diff --git a/application/src/Jobs/Feeds/addFeed.ts b/application/src/Jobs/Feeds/addFeed.ts index c5c1565..4de0066 100644 --- a/application/src/Jobs/Feeds/addFeed.ts +++ b/application/src/Jobs/Feeds/addFeed.ts @@ -1,6 +1,6 @@ import { FeedData } from '../../Fediverse/Providers/FeedData' -import { extractTags } from '../../StringTools/extractTags' -import { extractEmails } from '../../StringTools/extractEmails' +import { extractTags } from '../../Utils/extractTags' +import { extractEmails } from '../../Utils/extractEmails' import { createFeed } from '../../Storage/Feeds/createFeed' import prepareFulltext from './prepareFulltext' import Feed from '../../Storage/Definitions/Feed' diff --git a/application/src/Jobs/Feeds/refreshFeed.ts b/application/src/Jobs/Feeds/refreshFeed.ts index 3647a2e..129593a 100644 --- a/application/src/Jobs/Feeds/refreshFeed.ts +++ b/application/src/Jobs/Feeds/refreshFeed.ts @@ -1,6 +1,6 @@ import { FeedData } from '../../Fediverse/Providers/FeedData' -import { extractTags } from '../../StringTools/extractTags' -import { extractEmails } from '../../StringTools/extractEmails' +import { extractTags } from '../../Utils/extractTags' +import { extractEmails } from '../../Utils/extractEmails' import { updateFeed } from '../../Storage/Feeds/updateFeed' import Feed from '../../Storage/Definitions/Feed' import Node from '../../Storage/Definitions/Node' diff --git a/application/src/Jobs/Feeds/refreshFeedsOnPage.ts b/application/src/Jobs/Feeds/refreshFeedsOnPage.ts index 48e7e26..03c120b 100644 --- a/application/src/Jobs/Feeds/refreshFeedsOnPage.ts +++ b/application/src/Jobs/Feeds/refreshFeedsOnPage.ts @@ -1,4 +1,5 @@ import RobotsTxt from '../../Fediverse/RobotsTxt/RobotsTxt.js' +import batchPromises from '../../Utils/batchPromises.js' import { refreshOrAddFeed } from './refreshOrAddFeed' import { FeedProvider } from '../../Fediverse/Providers/FeedProvider' import Node from '../../Storage/Definitions/Node' @@ -21,10 +22,12 @@ export const refreshFeedsOnPage = async ( provider: provider.getKey(), page }) - return await Promise.all( + return await batchPromises( indexableFeedData.map( - async (feedDataItem) => - await refreshOrAddFeed(elastic, node, feedDataItem) - ) + (feedDataItem) => { + return async () => await refreshOrAddFeed(elastic, node, feedDataItem) + } + ), + Number(process.env.STORAGE_BATCH_SIZE ?? 5) ) } diff --git a/application/src/Jobs/processNextNode.ts b/application/src/Jobs/processNextNode.ts index 44286c9..e9cb0cf 100644 --- a/application/src/Jobs/processNextNode.ts +++ b/application/src/Jobs/processNextNode.ts @@ -2,6 +2,7 @@ import fetchRobotsTxt from '../Fediverse/RobotsTxt/fetchRobotsTxt.js' import { fetchNodeToProcess } from '../Storage/Nodes/fetchNodeToProcess' import { ProviderRegistry } from '../Fediverse/Providers/ProviderRegistry' import { setNodeRefreshed } from '../Storage/Nodes/setNodeRefreshed' +import batchPromises from '../Utils/batchPromises.js' import { refreshNodeInfo } from './NodeInfo/refreshNodeInfo' import { setNodeRefreshAttempted } from '../Storage/Nodes/setNodeRefreshAttempted' import { findNewNodes } from './Nodes/findNewNodes' @@ -37,24 +38,30 @@ export const processNextNode = async ( } const provider = providerRegistry.getProviderByKey(softwareName) - await Promise.all( - provider.getNodeProviders().map(async (nodeProvider: NodeProvider) => { - console.info('Searching for nodes', { - domain: node.domain, - provider: nodeProvider.getKey() - }) - return await findNewNodes(elastic, nodeProvider, node, robotsTxt) - }) + await batchPromises( + provider.getNodeProviders().map((nodeProvider: NodeProvider) => { + return async () => { + console.info('Searching for nodes', { + domain: node.domain, + provider: nodeProvider.getKey() + }) + return await findNewNodes(elastic, nodeProvider, node, robotsTxt) + } + }), + Number(process.env.NODE_PROVIDER_BATCH_SIZE ?? 5) ) - await Promise.all( - provider.getFeedProviders().map(async (feedProvider: FeedProvider) => { - console.info('Searching for feeds', { - domain: node.domain, - provider: feedProvider.getKey() - }) - return await refreshFeeds(elastic, feedProvider, node, robotsTxt) - }) + await batchPromises( + provider.getFeedProviders().map((feedProvider: FeedProvider) => { + return async () => { + console.info('Searching for feeds', { + domain: node.domain, + provider: feedProvider.getKey() + }) + return await refreshFeeds(elastic, feedProvider, node, robotsTxt) + } + }), + Number(process.env.FEED_PROVIDER_BATCH_SIZE ?? 5) ) await deleteOldFeeds(elastic, node) diff --git a/application/src/Utils/PromiseFactory.ts b/application/src/Utils/PromiseFactory.ts new file mode 100644 index 0000000..90050c0 --- /dev/null +++ b/application/src/Utils/PromiseFactory.ts @@ -0,0 +1,3 @@ +type PromiseFactory = () => Promise + +export default PromiseFactory diff --git a/application/src/Utils/batchPromises.ts b/application/src/Utils/batchPromises.ts new file mode 100644 index 0000000..d3c0e9f --- /dev/null +++ b/application/src/Utils/batchPromises.ts @@ -0,0 +1,18 @@ +import PromiseFactory from './PromiseFactory.js' + +export default async function batchPromises ( + promiseFactories: Array>, + batchSize: number +): Promise { + const results: TResult[] = [] + + do { + const batch = promiseFactories.splice(0, batchSize) + results.push( + ...await Promise.all( + batch.map(async promiseFactory => await promiseFactory()) + ) + ) + } while (promiseFactories.length > 0) + return results +} diff --git a/application/src/StringTools/extractEmails.ts b/application/src/Utils/extractEmails.ts similarity index 100% rename from application/src/StringTools/extractEmails.ts rename to application/src/Utils/extractEmails.ts diff --git a/application/src/StringTools/extractTags.ts b/application/src/Utils/extractTags.ts similarity index 100% rename from application/src/StringTools/extractTags.ts rename to application/src/Utils/extractTags.ts