2022-12-10 12:03:46 +00:00
import json , gspread
2023-01-21 19:01:02 +00:00
from . . core import Step
2022-12-10 12:03:46 +00:00
class Gsheets ( Step ) :
name = " gsheets "
def __init__ ( self , config : dict ) - > None :
# without this STEP.__init__ is not called
super ( ) . __init__ ( config )
self . gsheets_client = gspread . service_account ( filename = self . service_account )
2023-05-19 11:17:34 +00:00
# TODO: config should be responsible for conversions
2023-02-02 11:00:24 +00:00
try : self . header = int ( self . header )
except : pass
2022-12-10 12:03:46 +00:00
assert type ( self . header ) == int , f " header ( { self . header } ) value must be an integer not { type ( self . header ) } "
2023-05-19 11:17:34 +00:00
assert self . sheet is not None or self . sheet_id is not None , " You need to define either a ' sheet ' name or a ' sheet_id ' in your orchestration file when using gsheets. "
2022-12-10 12:03:46 +00:00
@staticmethod
def configs ( ) - > dict :
return {
" sheet " : { " default " : None , " help " : " name of the sheet to archive " } ,
2023-05-19 11:17:34 +00:00
" sheet_id " : { " default " : None , " help " : " (alternative to sheet name) the id of the sheet to archive " } ,
2022-12-10 12:03:46 +00:00
" header " : { " default " : 1 , " help " : " index of the header row (starts at 1) " } ,
" service_account " : { " default " : " secrets/service_account.json " , " help " : " service account JSON file path " } ,
" columns " : {
" default " : {
' url ' : ' link ' ,
' status ' : ' archive status ' ,
' folder ' : ' destination folder ' ,
' archive ' : ' archive location ' ,
' date ' : ' archive date ' ,
' thumbnail ' : ' thumbnail ' ,
' timestamp ' : ' upload timestamp ' ,
' title ' : ' upload title ' ,
2023-01-04 18:02:44 +00:00
' text ' : ' text content ' ,
2022-12-10 12:03:46 +00:00
' screenshot ' : ' screenshot ' ,
' hash ' : ' hash ' ,
2023-06-26 16:27:57 +00:00
' pdq_hash ' : ' perceptual hashes ' ,
2022-12-10 12:03:46 +00:00
' wacz ' : ' wacz ' ,
' replaywebpage ' : ' replaywebpage ' ,
} ,
2022-12-14 14:01:39 +00:00
" help " : " names of columns in the google sheet (stringified JSON object) " ,
2022-12-10 12:03:46 +00:00
" cli_set " : lambda cli_val , cur_val : dict ( cur_val , * * json . loads ( cli_val ) )
} ,
2023-05-19 11:17:34 +00:00
}
def open_sheet ( self ) :
if self . sheet :
return self . gsheets_client . open ( self . sheet )
else : # self.sheet_id
return self . gsheets_client . open_by_key ( self . sheet_id )