kopia lustrzana https://github.com/bellingcat/auto-archiver
manual download archiver improvements
rodzic
c303b7078e
commit
4e5c041186
|
@ -3,6 +3,8 @@ from abc import abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import os
|
import os
|
||||||
import mimetypes, requests
|
import mimetypes, requests
|
||||||
|
from loguru import logger
|
||||||
|
from retrying import retry
|
||||||
|
|
||||||
from ..core import Metadata, Step, ArchivingContext
|
from ..core import Metadata, Step, ArchivingContext
|
||||||
|
|
||||||
|
@ -41,6 +43,7 @@ class Archiver(Step):
|
||||||
return mime.split("/")[0]
|
return mime.split("/")[0]
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
@retry(wait_random_min=500, wait_random_max=3500, stop_max_attempt_number=5)
|
||||||
def download_from_url(self, url: str, to_filename: str = None) -> str:
|
def download_from_url(self, url: str, to_filename: str = None) -> str:
|
||||||
"""
|
"""
|
||||||
downloads a URL to provided filename, or inferred from URL, returns local filename
|
downloads a URL to provided filename, or inferred from URL, returns local filename
|
||||||
|
@ -50,6 +53,7 @@ class Archiver(Step):
|
||||||
if len(to_filename) > 64:
|
if len(to_filename) > 64:
|
||||||
to_filename = to_filename[-64:]
|
to_filename = to_filename[-64:]
|
||||||
to_filename = os.path.join(ArchivingContext.get_tmp_dir(), to_filename)
|
to_filename = os.path.join(ArchivingContext.get_tmp_dir(), to_filename)
|
||||||
|
logger.debug(f"downloading {url[0:50]=} {to_filename=}")
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
||||||
}
|
}
|
||||||
|
|
Ładowanie…
Reference in New Issue