From 6bd6f88b46220ac3bdd82e1936699ca0aa372be8 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 9 May 2022 17:45:54 +0200 Subject: [PATCH] refactor --- archivers/base_archiver.py | 7 +++---- archivers/telegram_archiver.py | 3 ++- archivers/telethon_archiver.py | 8 ++++---- archivers/tiktok_archiver.py | 4 ++-- archivers/youtubedl_archiver.py | 4 ++-- configs/config.py | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/archivers/base_archiver.py b/archivers/base_archiver.py index aa85444..a395c3f 100644 --- a/archivers/base_archiver.py +++ b/archivers/base_archiver.py @@ -30,7 +30,6 @@ class ArchiveResult: class Archiver(ABC): name = "default" - TMP_FOLDER = "tmp/" def __init__(self, storage: Storage, driver): self.storage = storage @@ -61,7 +60,7 @@ class Archiver(ABC): page += f"" page_key = self.get_key(urlparse(url).path.replace("/", "_") + ".html") - page_filename = Archiver.TMP_FOLDER + page_key + page_filename = Storage.TMP_FOLDER + page_key page_cdn = self.storage.get_cdn_url(page_key) with open(page_filename, "w") as f: @@ -86,7 +85,7 @@ class Archiver(ABC): if '.' not in path: key += '.jpg' - filename = Archiver.TMP_FOLDER + key + filename = Storage.TMP_FOLDER + key d = requests.get(media_url, headers=headers) with open(filename, 'wb') as f: @@ -127,7 +126,7 @@ class Archiver(ABC): def get_screenshot(self, url): key = self.get_key(urlparse(url).path.replace( "/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png") - filename = Archiver.TMP_FOLDER + key + filename = Storage.TMP_FOLDER + key try: self.driver.get(url) diff --git a/archivers/telegram_archiver.py b/archivers/telegram_archiver.py index 5c586e6..aafba85 100644 --- a/archivers/telegram_archiver.py +++ b/archivers/telegram_archiver.py @@ -6,6 +6,7 @@ import re import html from .base_archiver import Archiver, ArchiveResult +from storages import Storage class TelegramArchiver(Archiver): @@ -52,7 +53,7 @@ class TelegramArchiver(Archiver): video_id = video_url.split('/')[-1].split('?')[0] key = self.get_key(video_id) - filename = Archiver.TMP_FOLDER + key + filename = Storage.TMP_FOLDER + key cdn_url = self.storage.get_cdn_url(key) if check_if_exists and self.storage.exists(key): diff --git a/archivers/telethon_archiver.py b/archivers/telethon_archiver.py index 69bd49a..eb740a9 100644 --- a/archivers/telethon_archiver.py +++ b/archivers/telethon_archiver.py @@ -70,8 +70,8 @@ class TelethonArchiver(Archiver): message = post.message for mp in media_posts: if len(mp.message) > len(message): message = mp.message - filename = self.client.download_media(mp.media, f'{Archiver.TMP_FOLDER}{chat}_{group_id}/{mp.id}') - key = filename.split(Archiver.TMP_FOLDER)[1] + filename = self.client.download_media(mp.media, f'{Storage.TMP_FOLDER}{chat}_{group_id}/{mp.id}') + key = filename.split(Storage.TMP_FOLDER)[1] self.storage.upload(filename, key) hash = self.get_hash(filename) cdn_url = self.storage.get_cdn_url(key) @@ -83,8 +83,8 @@ class TelethonArchiver(Archiver): return ArchiveResult(status=status, cdn_url=page_cdn, title=post.message, timestamp=post.date, hash=page_hash, screenshot=screenshot) elif len(media_posts) == 1: key = self.get_key(f'{chat}_{post_id}') - filename = self.client.download_media(post.media, f'{Archiver.TMP_FOLDER}{key}') - key = filename.split(Archiver.TMP_FOLDER)[1].replace(" ", "") + filename = self.client.download_media(post.media, f'{Storage.TMP_FOLDER}{key}') + key = filename.split(Storage.TMP_FOLDER)[1].replace(" ", "") self.storage.upload(filename, key) hash = self.get_hash(filename) cdn_url = self.storage.get_cdn_url(key) diff --git a/archivers/tiktok_archiver.py b/archivers/tiktok_archiver.py index 902e808..4daa675 100644 --- a/archivers/tiktok_archiver.py +++ b/archivers/tiktok_archiver.py @@ -3,7 +3,7 @@ import tiktok_downloader from loguru import logger from .base_archiver import Archiver, ArchiveResult - +from storages import Storage class TiktokArchiver(Archiver): name = "tiktok" @@ -18,7 +18,7 @@ class TiktokArchiver(Archiver): info = tiktok_downloader.info_post(url) key = self.get_key(f'{info.id}.mp4') cdn_url = self.storage.get_cdn_url(key) - filename = Archiver.TMP_FOLDER + key + filename = Storage.TMP_FOLDER + key if check_if_exists and self.storage.exists(key): status = 'already archived' diff --git a/archivers/youtubedl_archiver.py b/archivers/youtubedl_archiver.py index 493b792..bc0456e 100644 --- a/archivers/youtubedl_archiver.py +++ b/archivers/youtubedl_archiver.py @@ -5,11 +5,11 @@ import yt_dlp from loguru import logger from .base_archiver import Archiver, ArchiveResult - +from storages import Storage class YoutubeDLArchiver(Archiver): name = "youtube_dl" - ydl_opts = {'outtmpl': f'{Archiver.TMP_FOLDER}%(id)s.%(ext)s', 'quiet': False} + ydl_opts = {'outtmpl': f'{Storage.TMP_FOLDER}%(id)s.%(ext)s', 'quiet': False} def download(self, url, check_if_exists=False): netloc = self.get_netloc(url) diff --git a/configs/config.py b/configs/config.py index 6e68310..d5ef5ad 100644 --- a/configs/config.py +++ b/configs/config.py @@ -9,7 +9,7 @@ from utils.gworksheet import GWorksheet from storages import S3Config, S3Storage from .wayback_config import WaybackConfig from .telegram_config import TelegramConfig -from archivers import Archiver +from storages import Storage class Config: @@ -45,8 +45,8 @@ class Config: assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file" self.header = int(getattr(self.args, "header") or execution.get("header", 1)) - self.tmp_folder = execution.get("tmp_folder", Archiver.TMP_FOLDER) - Archiver.TMP_FOLDER = self.tmp_folder + self.tmp_folder = execution.get("tmp_folder", Storage.TMP_FOLDER) + Storage.TMP_FOLDER = self.tmp_folder self.storage = execution.get("storage", "s3")