kopia lustrzana https://github.com/bellingcat/auto-archiver
auto
rodzic
82ae7c4f4a
commit
b19bd9a81a
|
@ -19,6 +19,15 @@
|
|||
"justMyCode": true,
|
||||
"args": ["--sheet","Test Hashing CIR"]
|
||||
},
|
||||
{
|
||||
"name": "Archive - Dearbhla",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "auto_archive.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"args": ["--sheet","Archive - Dearbhla"]
|
||||
},
|
||||
{
|
||||
"name": "Kayleigh - test - DM",
|
||||
"type": "python",
|
||||
|
|
|
@ -127,12 +127,10 @@ class Archiver(ABC):
|
|||
return hash.hexdigest()
|
||||
|
||||
def get_screenshot(self, url):
|
||||
logger.debug(f'In get_screenshot for {url}')
|
||||
key = self.get_key(urlparse(url).path.replace(
|
||||
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
|
||||
filename = 'tmp/' + key
|
||||
|
||||
|
||||
# DM - Accept cookies popup dismiss for ytdlp video
|
||||
if 'facebook.com' in url:
|
||||
try:
|
||||
|
@ -141,11 +139,8 @@ class Archiver(ABC):
|
|||
except:
|
||||
logger.error('Failed on fb accept cookies')
|
||||
|
||||
logger.debug(f'get_screenshot: Requesting url')
|
||||
|
||||
self.driver.get(url)
|
||||
|
||||
logger.debug(f'get_screenshot: Back from request')
|
||||
time.sleep(6)
|
||||
|
||||
self.driver.save_screenshot(filename)
|
||||
|
|
|
@ -15,8 +15,10 @@ from utils import GWorksheet, mkdir_if_not_exists
|
|||
|
||||
import sys
|
||||
|
||||
load_dotenv()
|
||||
logger.add("trace.log", level="TRACE")
|
||||
logger.add("warnings.log", level="WARNING")
|
||||
|
||||
load_dotenv()
|
||||
|
||||
def update_sheet(gw, row, result: archivers.ArchiveResult):
|
||||
cell_updates = []
|
||||
|
@ -67,6 +69,9 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
|
|||
gc = gspread.service_account(filename='service_account.json')
|
||||
sh = gc.open(sheet)
|
||||
|
||||
# DM test raise error for decorator to catch
|
||||
# raise ValueError('A very specific bad thing happened.')
|
||||
|
||||
s3_config = S3Config(
|
||||
bucket=os.getenv('DO_BUCKET'),
|
||||
region=os.getenv('DO_SPACES_REGION'),
|
||||
|
@ -88,7 +93,9 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
|
|||
|
||||
# loop through worksheets to check
|
||||
for ii, wks in enumerate(sh.worksheets()):
|
||||
logger.info(f'Opening worksheet {ii}: "{wks.title}" header={header}')
|
||||
# logger.info(f'Opening worksheet {ii}: "{wks.title}" header={header}')
|
||||
# DM take " out of log message and clarify ii
|
||||
logger.info(f'Opening worksheet ii={ii}: {wks.title} header={header}')
|
||||
gw = GWorksheet(wks, header_row=header, columns=columns)
|
||||
|
||||
if not gw.col_exists('url'):
|
||||
|
@ -134,6 +141,7 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
|
|||
result = archiver.download(url, check_if_exists=True)
|
||||
except Exception as e:
|
||||
result = False
|
||||
# DM loguru writes traceback to files so this traceback may be superfluous
|
||||
logger.error(f'Got unexpected error in row {row} with archiver {archiver} for url {url}: {e}\n{traceback.format_exc()}')
|
||||
|
||||
if result:
|
||||
|
@ -166,10 +174,11 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES):
|
|||
logger.success(f'Finshed worksheet {wks.title}')
|
||||
driver.quit()
|
||||
|
||||
|
||||
@logger.catch
|
||||
def main():
|
||||
|
||||
print(sys.argv[1:])
|
||||
# DM don't want to use print anymore
|
||||
# print(sys.argv[1:])
|
||||
logger.info(f'Passed args:{sys.argv}')
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Automatically archive social media videos from a Google Sheets document')
|
||||
|
@ -192,4 +201,3 @@ def main():
|
|||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
logger.success("finished")
|
||||
|
|
Ładowanie…
Reference in New Issue