From ea261635a2354391b1c016beb3c31b87f5c35631 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Wed, 25 May 2022 10:32:26 +0200 Subject: [PATCH] cleanup --- .gitignore | 4 ++- archivers/wayback_archiver.py | 5 +--- auto_archive.py | 2 +- test.py | 51 ----------------------------------- 4 files changed, 5 insertions(+), 57 deletions(-) delete mode 100644 test.py diff --git a/.gitignore b/.gitignore index 4eeb410..d15b3e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ tmp/ +temp/ .env* .DS_Store expmt/ @@ -10,4 +11,5 @@ anu.html .pytest_cach anon* config.json -config-*.json \ No newline at end of file +config-*.json +logs/* \ No newline at end of file diff --git a/archivers/wayback_archiver.py b/archivers/wayback_archiver.py index 10814dc..8a0a21f 100644 --- a/archivers/wayback_archiver.py +++ b/archivers/wayback_archiver.py @@ -1,13 +1,10 @@ -import time, requests, os +import time, requests from bs4 import BeautifulSoup -# from dataclasses import dataclass from storages import Storage from .base_archiver import Archiver, ArchiveResult from configs import WaybackConfig - -# TODO: use WaybackConfig class WaybackArchiver(Archiver): name = "wayback" diff --git a/auto_archive.py b/auto_archive.py index 833f944..141bdc8 100644 --- a/auto_archive.py +++ b/auto_archive.py @@ -133,7 +133,7 @@ def main(): mkdir_if_not_exists(c.tmp_folder) process_sheet(c, c.sheet, header=c.header, columns=c.column_names) shutil.rmtree(c.tmp_folder) - c.webdriver.quit() + c.destroy_webdriver() if __name__ == '__main__': diff --git a/test.py b/test.py deleted file mode 100644 index 4061c9f..0000000 --- a/test.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import datetime -import argparse -import requests -import shutil -import gspread -from loguru import logger -from dotenv import load_dotenv -from selenium import webdriver -import traceback - -import archivers -from storages import S3Storage, S3Config -from utils import GWorksheet, mkdir_if_not_exists - -load_dotenv() - - -options = webdriver.FirefoxOptions() -options.headless = True -driver = webdriver.Firefox(options=options) -driver.set_window_size(1400, 2000) - -s3_config = S3Config( - bucket=os.getenv('DO_BUCKET'), - region=os.getenv('DO_SPACES_REGION'), - key=os.getenv('DO_SPACES_KEY'), - secret=os.getenv('DO_SPACES_SECRET'), - folder="temp" -) -s3_client = S3Storage(s3_config) -telegram_config = archivers.TelegramConfig( - api_id=os.getenv('TELEGRAM_API_ID'), - api_hash=os.getenv('TELEGRAM_API_HASH') -) - -archiver = archivers.TelethonArchiver(s3_client, driver, telegram_config) - -URLs = [ - # "https://t.me/c/1226032830/24864", - # "https://t.me/truexanewsua/32650", - "https://t.me/informatsia_obstanovka/5239", - # "https://t.me/informatsia_obstanovka/5240", - # "https://t.me/informatsia_obstanovka/5241", - # "https://t.me/informatsia_obstanovka/5242" -] - - -for url in URLs: - print(url) - print(archiver.download(url, False))