Merge pull request #43 from bellingcat/refactor-tmp-dir-logic

pull/44/head
Miguel Sozinho Ramalho 2022-06-16 19:00:19 +01:00 zatwierdzone przez GitHub
commit 74cef2f21b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
5 zmienionych plików z 14 dodań i 19 usunięć

2
.gitignore vendored
Wyświetl plik

@ -1,4 +1,4 @@
tmp/
tmp*/
temp/
.env*
.DS_Store

Wyświetl plik

@ -1,4 +1,4 @@
import os, datetime, shutil, traceback, random
import os, datetime, shutil, traceback, random, tempfile
from loguru import logger
from slugify import slugify
@ -141,10 +141,10 @@ def main():
c = Config()
c.parse()
logger.info(f'Opening document {c.sheet} for header {c.header}')
mkdir_if_not_exists(Storage.TMP_FOLDER)
process_sheet(c)
c.destroy_webdriver()
shutil.rmtree(Storage.TMP_FOLDER)
with tempfile.TemporaryDirectory(dir="./") as tmpdir:
Storage.TMP_FOLDER = tmpdir
process_sheet(c)
c.destroy_webdriver()
if __name__ == '__main__':

Wyświetl plik

@ -1,9 +1,8 @@
import shutil
import tempfile
import auto_archive
from loguru import logger
from configs import Config
from storages import Storage
from utils import mkdir_if_not_exists
def main():
@ -17,13 +16,13 @@ def main():
wks = sh.get_worksheet(0)
values = wks.get_all_values()
mkdir_if_not_exists(Storage.TMP_FOLDER)
for i in range(11, len(values)):
c.sheet = values[i][0]
logger.info(f"Processing {c.sheet}")
auto_archive.process_sheet(c)
c.destroy_webdriver()
shutil.rmtree(Storage.TMP_FOLDER)
with tempfile.TemporaryDirectory(dir="./") as tmpdir:
Storage.TMP_FOLDER = tmpdir
for i in range(11, len(values)):
c.sheet = values[i][0]
logger.info(f"Processing {c.sheet}")
auto_archive.process_sheet(c)
c.destroy_webdriver()
if __name__ == "__main__":

Wyświetl plik

@ -50,7 +50,6 @@ class Config:
self.sheet = getattr_or(self.args, "sheet", execution.get("sheet"))
assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file"
self.header = int(getattr_or(self.args, "header", execution.get("header", 1)))
Storage.TMP_FOLDER = execution.get("tmp_folder", Storage.TMP_FOLDER)
self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3"))
self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False)
if self.save_logs:
@ -238,7 +237,6 @@ class Config:
"header": self.header,
"check_if_exists": self.check_if_exists,
"save_logs": self.save_logs,
"tmp_folder": Storage.TMP_FOLDER,
"selenium_config": asdict(self.selenium_config),
"selenium_webdriver": self.webdriver != None,
"s3_config": hasattr(self, "s3_config"),

Wyświetl plik

@ -66,8 +66,6 @@ execution:
timeout_seconds: 120
window_width: 1400
window_height: 2000
# local tmp folder to save files before uploading to storage
tmp_folder: tmp/
# puts execution logs into /logs folder, defaults to false
save_logs: true
# custom column names, only needed if different from default, can be overwritten with CMD --col-NAME="VALUE"