started cleaning auto_archive

pull/33/head
msramalho 2022-06-03 17:32:55 +02:00
rodzic a2fdfacb26
commit aaa1d299da
1 zmienionych plików z 1 dodań i 28 usunięć

Wyświetl plik

@ -9,8 +9,7 @@ import traceback
from archivers import TelethonArchiver, TelegramArchiver, TiktokArchiver, YoutubeDLArchiver, TwitterArchiver, WaybackArchiver, ArchiveResult
from utils import GWorksheet, mkdir_if_not_exists, expand_url
from configs import Config
import archivers
from storages import S3Storage, S3Config
from storages.gd_storage import GDConfig, GDStorage
from utils import GWorksheet, mkdir_if_not_exists
import sys
@ -21,9 +20,6 @@ logger.add("logs/3success.log", level="SUCCESS")
logger.add("logs/4warning.log", level="WARNING")
logger.add("logs/5error.log", level="ERROR")
load_dotenv()
def update_sheet(gw, row, result: ArchiveResult):
cell_updates = []
row_values = gw.get_row(row)
@ -61,25 +57,6 @@ def update_sheet(gw, row, result: ArchiveResult):
def process_sheet(c: Config, sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
sh = c.gsheets_client.open(sheet)
def process_sheet(sheet, storage="s3", header=1, columns=GWorksheet.COLUMN_NAMES):
gc = gspread.service_account(filename='service_account.json')
sh = gc.open(sheet)
s3_config = S3Config(
bucket=os.getenv('DO_BUCKET'),
region=os.getenv('DO_SPACES_REGION'),
key=os.getenv('DO_SPACES_KEY'),
secret=os.getenv('DO_SPACES_SECRET')
)
gd_config = GDConfig(
root_folder_id=os.getenv('GD_ROOT_FOLDER_ID'),
)
telegram_config = archivers.TelegramConfig(
api_id=os.getenv('TELEGRAM_API_ID'),
api_hash=os.getenv('TELEGRAM_API_HASH')
)
# loop through worksheets to check
for ii, wks in enumerate(sh.worksheets()):
logger.info(f'Opening worksheet {ii=}: {wks.title=} {header=}')
@ -99,9 +76,6 @@ def process_sheet(sheet, storage="s3", header=1, columns=GWorksheet.COLUMN_NAMES
c.set_folder(f'{sheet.replace(" ", "_")}/{wks.title.replace(" ", "_")}/')
storage = c.get_storage()
gd_config.folder = f'{sheet.replace(" ", "_")}/{wks.title.replace(" ", "_")}/'
gd_client = GDStorage(gd_config)
# loop through rows in worksheet
for row in range(1 + header, gw.count_rows() + 1):
url = gw.get_cell(row, 'url')
@ -188,7 +162,6 @@ def process_sheet(sheet, storage="s3", header=1, columns=GWorksheet.COLUMN_NAMES
@logger.catch
def main():
logger.debug(f'Passed args:{sys.argv}')
c = Config()
c.parse()