kopia lustrzana https://github.com/bellingcat/auto-archiver
Merge pull request #43 from bellingcat/refactor-tmp-dir-logic
commit
74cef2f21b
|
@ -1,4 +1,4 @@
|
||||||
tmp/
|
tmp*/
|
||||||
temp/
|
temp/
|
||||||
.env*
|
.env*
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import os, datetime, shutil, traceback, random
|
import os, datetime, shutil, traceback, random, tempfile
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
@ -141,10 +141,10 @@ def main():
|
||||||
c = Config()
|
c = Config()
|
||||||
c.parse()
|
c.parse()
|
||||||
logger.info(f'Opening document {c.sheet} for header {c.header}')
|
logger.info(f'Opening document {c.sheet} for header {c.header}')
|
||||||
mkdir_if_not_exists(Storage.TMP_FOLDER)
|
with tempfile.TemporaryDirectory(dir="./") as tmpdir:
|
||||||
process_sheet(c)
|
Storage.TMP_FOLDER = tmpdir
|
||||||
c.destroy_webdriver()
|
process_sheet(c)
|
||||||
shutil.rmtree(Storage.TMP_FOLDER)
|
c.destroy_webdriver()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
import shutil
|
import tempfile
|
||||||
import auto_archive
|
import auto_archive
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from configs import Config
|
from configs import Config
|
||||||
from storages import Storage
|
from storages import Storage
|
||||||
from utils import mkdir_if_not_exists
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -17,13 +16,13 @@ def main():
|
||||||
wks = sh.get_worksheet(0)
|
wks = sh.get_worksheet(0)
|
||||||
values = wks.get_all_values()
|
values = wks.get_all_values()
|
||||||
|
|
||||||
mkdir_if_not_exists(Storage.TMP_FOLDER)
|
with tempfile.TemporaryDirectory(dir="./") as tmpdir:
|
||||||
for i in range(11, len(values)):
|
Storage.TMP_FOLDER = tmpdir
|
||||||
c.sheet = values[i][0]
|
for i in range(11, len(values)):
|
||||||
logger.info(f"Processing {c.sheet}")
|
c.sheet = values[i][0]
|
||||||
auto_archive.process_sheet(c)
|
logger.info(f"Processing {c.sheet}")
|
||||||
c.destroy_webdriver()
|
auto_archive.process_sheet(c)
|
||||||
shutil.rmtree(Storage.TMP_FOLDER)
|
c.destroy_webdriver()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -50,7 +50,6 @@ class Config:
|
||||||
self.sheet = getattr_or(self.args, "sheet", execution.get("sheet"))
|
self.sheet = getattr_or(self.args, "sheet", execution.get("sheet"))
|
||||||
assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file"
|
assert self.sheet is not None, "'sheet' must be provided either through command line or configuration file"
|
||||||
self.header = int(getattr_or(self.args, "header", execution.get("header", 1)))
|
self.header = int(getattr_or(self.args, "header", execution.get("header", 1)))
|
||||||
Storage.TMP_FOLDER = execution.get("tmp_folder", Storage.TMP_FOLDER)
|
|
||||||
self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3"))
|
self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3"))
|
||||||
self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False)
|
self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False)
|
||||||
if self.save_logs:
|
if self.save_logs:
|
||||||
|
@ -238,7 +237,6 @@ class Config:
|
||||||
"header": self.header,
|
"header": self.header,
|
||||||
"check_if_exists": self.check_if_exists,
|
"check_if_exists": self.check_if_exists,
|
||||||
"save_logs": self.save_logs,
|
"save_logs": self.save_logs,
|
||||||
"tmp_folder": Storage.TMP_FOLDER,
|
|
||||||
"selenium_config": asdict(self.selenium_config),
|
"selenium_config": asdict(self.selenium_config),
|
||||||
"selenium_webdriver": self.webdriver != None,
|
"selenium_webdriver": self.webdriver != None,
|
||||||
"s3_config": hasattr(self, "s3_config"),
|
"s3_config": hasattr(self, "s3_config"),
|
||||||
|
|
|
@ -66,8 +66,6 @@ execution:
|
||||||
timeout_seconds: 120
|
timeout_seconds: 120
|
||||||
window_width: 1400
|
window_width: 1400
|
||||||
window_height: 2000
|
window_height: 2000
|
||||||
# local tmp folder to save files before uploading to storage
|
|
||||||
tmp_folder: tmp/
|
|
||||||
# puts execution logs into /logs folder, defaults to false
|
# puts execution logs into /logs folder, defaults to false
|
||||||
save_logs: true
|
save_logs: true
|
||||||
# custom column names, only needed if different from default, can be overwritten with CMD --col-NAME="VALUE"
|
# custom column names, only needed if different from default, can be overwritten with CMD --col-NAME="VALUE"
|
||||||
|
|
Ładowanie…
Reference in New Issue