kopia lustrzana https://github.com/bellingcat/auto-archiver
Merge pull request #43 from bellingcat/refactor-tmp-dir-logic
commit
74cef2f21b
|
@ -1,4 +1,4 @@
|
|||
tmp/
|
||||
tmp*/
|
||||
temp/
|
||||
.env*
|
||||
.DS_Store
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import os, datetime, shutil, traceback, random
|
||||
import os, datetime, shutil, traceback, random, tempfile
|
||||
|
||||
from loguru import logger
|
||||
from slugify import slugify
|
||||
|
@ -141,10 +141,10 @@ def main():
|
|||
c = Config()
|
||||
c.parse()
|
||||
logger.info(f'Opening document {c.sheet} for header {c.header}')
|
||||
mkdir_if_not_exists(Storage.TMP_FOLDER)
|
||||
process_sheet(c)
|
||||
c.destroy_webdriver()
|
||||
shutil.rmtree(Storage.TMP_FOLDER)
|
||||
with tempfile.TemporaryDirectory(dir="./") as tmpdir:
|
||||
Storage.TMP_FOLDER = tmpdir
|
||||
process_sheet(c)
|
||||
c.destroy_webdriver()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
import shutil
|
||||
import tempfile
|
||||
import auto_archive
|
||||
from loguru import logger
|
||||
from configs import Config
|
||||
from storages import Storage
|
||||
from utils import mkdir_if_not_exists
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -17,13 +16,13 @@ def main():
|
|||
wks = sh.get_worksheet(0)
|
||||
values = wks.get_all_values()
|
||||
|
||||
mkdir_if_not_exists(Storage.TMP_FOLDER)
|
||||
for i in range(11, len(values)):
|
||||
c.sheet = values[i][0]
|
||||
logger.info(f"Processing {c.sheet}")
|
||||
auto_archive.process_sheet(c)
|
||||
c.destroy_webdriver()
|
||||
shutil.rmtree(Storage.TMP_FOLDER)
|
||||
with tempfile.TemporaryDirectory(dir="./") as tmpdir:
|
||||
Storage.TMP_FOLDER = tmpdir
|
||||
for i in range(11, len(values)):
|
||||
c.sheet = values[i][0]
|
||||
logger.info(f"Processing {c.sheet}")
|
||||
auto_archive.process_sheet(c)
|
||||
c.destroy_webdriver()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -50,7 +50,6 @@ class Config:
|
|||
self.sheet = getattr_or(self.args, "sheet", execution.get("sheet"))
|
||||
assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file"
|
||||
self.header = int(getattr_or(self.args, "header", execution.get("header", 1)))
|
||||
Storage.TMP_FOLDER = execution.get("tmp_folder", Storage.TMP_FOLDER)
|
||||
self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3"))
|
||||
self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False)
|
||||
if self.save_logs:
|
||||
|
@ -238,7 +237,6 @@ class Config:
|
|||
"header": self.header,
|
||||
"check_if_exists": self.check_if_exists,
|
||||
"save_logs": self.save_logs,
|
||||
"tmp_folder": Storage.TMP_FOLDER,
|
||||
"selenium_config": asdict(self.selenium_config),
|
||||
"selenium_webdriver": self.webdriver != None,
|
||||
"s3_config": hasattr(self, "s3_config"),
|
||||
|
|
|
@ -66,8 +66,6 @@ execution:
|
|||
timeout_seconds: 120
|
||||
window_width: 1400
|
||||
window_height: 2000
|
||||
# local tmp folder to save files before uploading to storage
|
||||
tmp_folder: tmp/
|
||||
# puts execution logs into /logs folder, defaults to false
|
||||
save_logs: true
|
||||
# custom column names, only needed if different from default, can be overwritten with CMD --col-NAME="VALUE"
|
||||
|
|
Ładowanie…
Reference in New Issue