manual download archiver improvements

pull/129/head
msramalho 2024-02-16 02:17:34 +00:00
rodzic c303b7078e
commit 4e5c041186
1 zmienionych plików z 4 dodań i 0 usunięć

Wyświetl plik

@ -3,6 +3,8 @@ from abc import abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
import os import os
import mimetypes, requests import mimetypes, requests
from loguru import logger
from retrying import retry
from ..core import Metadata, Step, ArchivingContext from ..core import Metadata, Step, ArchivingContext
@ -41,6 +43,7 @@ class Archiver(Step):
return mime.split("/")[0] return mime.split("/")[0]
return "" return ""
@retry(wait_random_min=500, wait_random_max=3500, stop_max_attempt_number=5)
def download_from_url(self, url: str, to_filename: str = None) -> str: def download_from_url(self, url: str, to_filename: str = None) -> str:
""" """
downloads a URL to provided filename, or inferred from URL, returns local filename downloads a URL to provided filename, or inferred from URL, returns local filename
@ -50,6 +53,7 @@ class Archiver(Step):
if len(to_filename) > 64: if len(to_filename) > 64:
to_filename = to_filename[-64:] to_filename = to_filename[-64:]
to_filename = os.path.join(ArchivingContext.get_tmp_dir(), to_filename) to_filename = os.path.join(ArchivingContext.get_tmp_dir(), to_filename)
logger.debug(f"downloading {url[0:50]=} {to_filename=}")
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
} }