Merge pull request #43 from bellingcat/refactor-tmp-dir-logic

pull/44/head
Miguel Sozinho Ramalho 2022-06-16 19:00:19 +01:00 zatwierdzone przez GitHub
commit 74cef2f21b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
5 zmienionych plików z 14 dodań i 19 usunięć

2
.gitignore vendored
Wyświetl plik

@ -1,4 +1,4 @@
tmp/ tmp*/
temp/ temp/
.env* .env*
.DS_Store .DS_Store

Wyświetl plik

@ -1,4 +1,4 @@
import os, datetime, shutil, traceback, random import os, datetime, shutil, traceback, random, tempfile
from loguru import logger from loguru import logger
from slugify import slugify from slugify import slugify
@ -141,10 +141,10 @@ def main():
c = Config() c = Config()
c.parse() c.parse()
logger.info(f'Opening document {c.sheet} for header {c.header}') logger.info(f'Opening document {c.sheet} for header {c.header}')
mkdir_if_not_exists(Storage.TMP_FOLDER) with tempfile.TemporaryDirectory(dir="./") as tmpdir:
process_sheet(c) Storage.TMP_FOLDER = tmpdir
c.destroy_webdriver() process_sheet(c)
shutil.rmtree(Storage.TMP_FOLDER) c.destroy_webdriver()
if __name__ == '__main__': if __name__ == '__main__':

Wyświetl plik

@ -1,9 +1,8 @@
import shutil import tempfile
import auto_archive import auto_archive
from loguru import logger from loguru import logger
from configs import Config from configs import Config
from storages import Storage from storages import Storage
from utils import mkdir_if_not_exists
def main(): def main():
@ -17,13 +16,13 @@ def main():
wks = sh.get_worksheet(0) wks = sh.get_worksheet(0)
values = wks.get_all_values() values = wks.get_all_values()
mkdir_if_not_exists(Storage.TMP_FOLDER) with tempfile.TemporaryDirectory(dir="./") as tmpdir:
for i in range(11, len(values)): Storage.TMP_FOLDER = tmpdir
c.sheet = values[i][0] for i in range(11, len(values)):
logger.info(f"Processing {c.sheet}") c.sheet = values[i][0]
auto_archive.process_sheet(c) logger.info(f"Processing {c.sheet}")
c.destroy_webdriver() auto_archive.process_sheet(c)
shutil.rmtree(Storage.TMP_FOLDER) c.destroy_webdriver()
if __name__ == "__main__": if __name__ == "__main__":

Wyświetl plik

@ -50,7 +50,6 @@ class Config:
self.sheet = getattr_or(self.args, "sheet", execution.get("sheet")) self.sheet = getattr_or(self.args, "sheet", execution.get("sheet"))
assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file" assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file"
self.header = int(getattr_or(self.args, "header", execution.get("header", 1))) self.header = int(getattr_or(self.args, "header", execution.get("header", 1)))
Storage.TMP_FOLDER = execution.get("tmp_folder", Storage.TMP_FOLDER)
self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3")) self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3"))
self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False) self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False)
if self.save_logs: if self.save_logs:
@ -238,7 +237,6 @@ class Config:
"header": self.header, "header": self.header,
"check_if_exists": self.check_if_exists, "check_if_exists": self.check_if_exists,
"save_logs": self.save_logs, "save_logs": self.save_logs,
"tmp_folder": Storage.TMP_FOLDER,
"selenium_config": asdict(self.selenium_config), "selenium_config": asdict(self.selenium_config),
"selenium_webdriver": self.webdriver != None, "selenium_webdriver": self.webdriver != None,
"s3_config": hasattr(self, "s3_config"), "s3_config": hasattr(self, "s3_config"),

Wyświetl plik

@ -66,8 +66,6 @@ execution:
timeout_seconds: 120 timeout_seconds: 120
window_width: 1400 window_width: 1400
window_height: 2000 window_height: 2000
# local tmp folder to save files before uploading to storage
tmp_folder: tmp/
# puts execution logs into /logs folder, defaults to false # puts execution logs into /logs folder, defaults to false
save_logs: true save_logs: true
# custom column names, only needed if different from default, can be overwritten with CMD --col-NAME="VALUE" # custom column names, only needed if different from default, can be overwritten with CMD --col-NAME="VALUE"