import json, gspread from ..core import Step class Gsheets(Step): name = "gsheets" def __init__(self, config: dict) -> None: # without this STEP.__init__ is not called super().__init__(config) self.gsheets_client = gspread.service_account(filename=self.service_account) # TODO: config should be responsible for conversions try: self.header = int(self.header) except: pass assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}" assert self.sheet is not None or self.sheet_id is not None, "You need to define either a 'sheet' name or a 'sheet_id' in your orchestration file when using gsheets." @staticmethod def configs() -> dict: return { "sheet": {"default": None, "help": "name of the sheet to archive"}, "sheet_id": {"default": None, "help": "(alternative to sheet name) the id of the sheet to archive"}, "header": {"default": 1, "help": "index of the header row (starts at 1)"}, "service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path"}, "columns": { "default": { 'url': 'link', 'status': 'archive status', 'folder': 'destination folder', 'archive': 'archive location', 'date': 'archive date', 'thumbnail': 'thumbnail', 'timestamp': 'upload timestamp', 'title': 'upload title', 'text': 'text content', 'screenshot': 'screenshot', 'hash': 'hash', 'pdq_hash': 'perceptual hashes', 'wacz': 'wacz', 'replaywebpage': 'replaywebpage', }, "help": "names of columns in the google sheet (stringified JSON object)", "cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val)) }, } def open_sheet(self): if self.sheet: return self.gsheets_client.open(self.sheet) else: # self.sheet_id return self.gsheets_client.open_by_key(self.sheet_id)