Merge pull request #1116 from moonstream-to/metadata-perfomance-improve

Add metadata crawler improvments
pull/1118/head
Andrey Dolgolev 2024-07-26 00:43:01 +03:00 zatwierdzone przez GitHub
commit 8503ed8036
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
2 zmienionych plików z 18 dodań i 4 usunięć

Wyświetl plik

@ -3,7 +3,7 @@ Description=Execute Xai metadata crawler each 10m
[Timer]
OnBootSec=20s
OnUnitActiveSec=60m
OnUnitActiveSec=10m
[Install]
WantedBy=timers.target

Wyświetl plik

@ -82,7 +82,10 @@ def crawl_uri(metadata_uri: str) -> Any:
def parse_metadata(
blockchain_type: AvailableBlockchainType, batch_size: int, max_recrawl: int
blockchain_type: AvailableBlockchainType,
batch_size: int,
max_recrawl: int,
threads: int,
):
"""
Parse all metadata of tokens.
@ -185,7 +188,9 @@ def parse_metadata(
try:
with db_session.begin():
for token_uri_data in requests_chunk:
with ThreadPoolExecutor(max_workers=1) as executor:
with ThreadPoolExecutor(
max_workers=threads
) as executor:
future = executor.submit(
crawl_uri, token_uri_data.token_uri
)
@ -235,7 +240,9 @@ def handle_crawl(args: argparse.Namespace) -> None:
blockchain_type = AvailableBlockchainType(args.blockchain)
parse_metadata(blockchain_type, args.commit_batch_size, args.max_recrawl)
parse_metadata(
blockchain_type, args.commit_batch_size, args.max_recrawl, args.threads
)
def main() -> None:
@ -269,6 +276,13 @@ def main() -> None:
default=300,
help="Maximum amount of recrawling of already crawled tokens",
)
metadata_crawler_parser.add_argument(
"--threads",
"-t",
type=int,
default=4,
help="Amount of threads for crawling",
)
metadata_crawler_parser.set_defaults(func=handle_crawl)
args = parser.parse_args()