diff --git a/crawlers/deploy/xai-metadata.timer b/crawlers/deploy/xai-metadata.timer index f4631718..6537f0fe 100644 --- a/crawlers/deploy/xai-metadata.timer +++ b/crawlers/deploy/xai-metadata.timer @@ -3,7 +3,7 @@ Description=Execute Xai metadata crawler each 10m [Timer] OnBootSec=20s -OnUnitActiveSec=60m +OnUnitActiveSec=10m [Install] WantedBy=timers.target diff --git a/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py b/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py index b7586f5a..166bfb5c 100644 --- a/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py +++ b/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py @@ -82,7 +82,10 @@ def crawl_uri(metadata_uri: str) -> Any: def parse_metadata( - blockchain_type: AvailableBlockchainType, batch_size: int, max_recrawl: int + blockchain_type: AvailableBlockchainType, + batch_size: int, + max_recrawl: int, + threads: int, ): """ Parse all metadata of tokens. @@ -185,7 +188,9 @@ def parse_metadata( try: with db_session.begin(): for token_uri_data in requests_chunk: - with ThreadPoolExecutor(max_workers=1) as executor: + with ThreadPoolExecutor( + max_workers=threads + ) as executor: future = executor.submit( crawl_uri, token_uri_data.token_uri ) @@ -235,7 +240,9 @@ def handle_crawl(args: argparse.Namespace) -> None: blockchain_type = AvailableBlockchainType(args.blockchain) - parse_metadata(blockchain_type, args.commit_batch_size, args.max_recrawl) + parse_metadata( + blockchain_type, args.commit_batch_size, args.max_recrawl, args.threads + ) def main() -> None: @@ -269,6 +276,13 @@ def main() -> None: default=300, help="Maximum amount of recrawling of already crawled tokens", ) + metadata_crawler_parser.add_argument( + "--threads", + "-t", + type=int, + default=4, + help="Amount of threads for crawling", + ) metadata_crawler_parser.set_defaults(func=handle_crawl) args = parser.parse_args()