auto-archiver/utils/gworksheet.py

108 wiersze
3.4 KiB
Python
Czysty Zwykły widok Historia

2022-02-23 08:54:03 +00:00
from gspread import utils
class GWorksheet:
2022-03-13 11:05:09 +00:00
"""
This class makes read/write operations to the a worksheet easier.
It can read the headers from a custom row number, but the row references
should always include the offset of the header.
eg: if header=4, row 5 will be the first with data.
"""
2022-02-23 08:54:03 +00:00
COLUMN_NAMES = {
2022-02-25 15:09:35 +00:00
'url': 'link',
2022-06-07 16:41:58 +00:00
'status': 'archive status',
'folder': 'destination folder',
2022-02-23 08:54:03 +00:00
'archive': 'archive location',
'date': 'archive date',
'thumbnail': 'thumbnail',
'thumbnail_index': 'thumbnail index',
'timestamp': 'upload timestamp',
'title': 'upload title',
2022-02-25 15:09:35 +00:00
'duration': 'duration',
'screenshot': 'screenshot',
'hash': 'hash'
2022-02-23 08:54:03 +00:00
}
2022-02-25 15:09:35 +00:00
def __init__(self, worksheet, columns=COLUMN_NAMES, header_row=1):
2022-02-23 08:54:03 +00:00
self.wks = worksheet
self.columns = columns
self.values = self.wks.get_values()
if len(self.values) > 0:
self.headers = [v.lower() for v in self.values[header_row - 1]]
else:
self.headers = []
2022-02-23 08:54:03 +00:00
def _check_col_exists(self, col: str):
if col not in self.columns:
raise Exception(f'Column {col} is not in the configured column names: {self.columns.keys()}')
def _col_index(self, col: str):
2022-02-23 08:54:03 +00:00
self._check_col_exists(col)
return self.headers.index(self.columns[col])
2022-02-23 08:54:03 +00:00
def col_exists(self, col: str):
2022-02-23 08:54:03 +00:00
self._check_col_exists(col)
return self.columns[col] in self.headers
2022-02-23 08:54:03 +00:00
def count_rows(self):
return len(self.values)
2022-02-23 08:54:03 +00:00
def get_row(self, row: int):
# row is 1-based
return self.values[row - 1]
2022-02-23 08:54:03 +00:00
2022-02-25 15:09:35 +00:00
def get_values(self):
return self.values
2022-02-25 15:09:35 +00:00
def get_cell(self, row, col: str, fresh=False):
"""
returns the cell value from (row, col),
where row can be an index (1-based) OR list of values
as received from self.get_row(row)
if fresh=True, the sheet is queried again for this cell
"""
col_index = self._col_index(col)
if fresh:
2022-03-12 19:25:52 +00:00
return self.wks.cell(row, col_index + 1).value
2022-02-23 08:54:03 +00:00
if type(row) == int:
row = self.get_row(row)
if col_index >= len(row):
return ''
return row[col_index]
2022-06-07 16:41:58 +00:00
def get_cell_or_default(self, row, col: str, default: str = None, fresh=False, when_empty_use_default=True):
2022-05-26 17:18:29 +00:00
"""
return self.get_cell or default value on error (eg: column is missing)
"""
try:
2022-06-07 16:41:58 +00:00
val = self.get_cell(row, col, fresh)
if when_empty_use_default and val.strip() == "":
return default
return val
2022-05-26 17:18:29 +00:00
except:
return default
def set_cell(self, row: int, col: str, val):
2022-02-23 08:54:03 +00:00
# row is 1-based
col_index = self._col_index(col) + 1
2022-03-12 19:11:38 +00:00
self.wks.update_cell(row, col_index, val)
2022-02-23 08:54:03 +00:00
def batch_set_cell(self, cell_updates):
"""
receives a list of [(row:int, col:str, val)] and batch updates it, the parameters are the same as in the self.set_cell() method
"""
cell_updates = [
2022-02-23 08:54:03 +00:00
{
2022-02-23 15:07:58 +00:00
'range': self.to_a1(row, col),
2022-02-23 08:54:03 +00:00
'values': [[val]]
}
for row, col, val in cell_updates
2022-02-23 08:54:03 +00:00
]
self.wks.batch_update(cell_updates, value_input_option='USER_ENTERED')
2022-02-23 08:54:03 +00:00
2022-02-23 15:07:58 +00:00
def to_a1(self, row: int, col: str):
# row is 1-based
2022-03-12 19:11:38 +00:00
return utils.rowcol_to_a1(row, self._col_index(col) + 1)