auto-archiver/utils/gworksheet.py

from gspread import utils


class GWorksheet:
    """
    This class makes read/write operations to the a worksheet easier.
    It can read the headers from a custom row number, but the row references
    should always include the offset of the header. 
    eg: if header=4, row 5 will be the first with data. 
    """
    COLUMN_NAMES = {
        'url': 'link',
        'status': 'archive status',
        'folder': 'destination folder',
        'archive': 'archive location',
        'date': 'archive date',
        'thumbnail': 'thumbnail',
        'thumbnail_index': 'thumbnail index',
        'timestamp': 'upload timestamp',
        'title': 'upload title',
        'duration': 'duration',
        'screenshot': 'screenshot',
        'hash': 'hash'
    }

    def __init__(self, worksheet, columns=COLUMN_NAMES, header_row=1):
        self.wks = worksheet
        self.columns = columns
        self.values = self.wks.get_values()
        if len(self.values) > 0:
            self.headers = [v.lower() for v in self.values[header_row - 1]]
        else:
            self.headers = []

    def _check_col_exists(self, col: str):
        if col not in self.columns:
            raise Exception(f'Column {col} is not in the configured column names: {self.columns.keys()}')

    def _col_index(self, col: str):
        self._check_col_exists(col)
        return self.headers.index(self.columns[col])

    def col_exists(self, col: str):
        self._check_col_exists(col)
        return self.columns[col] in self.headers

    def count_rows(self):
        return len(self.values)

    def get_row(self, row: int):
        # row is 1-based
        return self.values[row - 1]

    def get_values(self):
        return self.values

    def get_cell(self, row, col: str, fresh=False):
        """
        returns the cell value from (row, col), 
        where row can be an index (1-based) OR list of values
        as received from self.get_row(row)
        if fresh=True, the sheet is queried again for this cell
        """
        col_index = self._col_index(col)

        if fresh:
            return self.wks.cell(row, col_index + 1).value
        if type(row) == int:
            row = self.get_row(row)

        if col_index >= len(row):
            return ''
        return row[col_index]

    def get_cell_or_default(self, row, col: str, default: str = None, fresh=False, when_empty_use_default=True):
        """
        return self.get_cell or default value on error (eg: column is missing)
        """
        try:
            val = self.get_cell(row, col, fresh)
            if when_empty_use_default and val.strip() == "":
                return default
            return val
        except:
            return default

    def set_cell(self, row: int, col: str, val):
        # row is 1-based
        col_index = self._col_index(col) + 1
        self.wks.update_cell(row, col_index, val)

    def batch_set_cell(self, cell_updates):
        """
        receives a list of [(row:int, col:str, val)] and batch updates it, the parameters are the same as in the self.set_cell() method
        """
        cell_updates = [
            {
                'range': self.to_a1(row, col),
                'values': [[val]]
            }
            for row, col, val in cell_updates
        ]
        self.wks.batch_update(cell_updates, value_input_option='USER_ENTERED')

    def to_a1(self, row: int, col: str):
        # row is 1-based
        return utils.rowcol_to_a1(row, self._col_index(col) + 1)
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`from gspread import utils`


			`class GWorksheet:`
improves documentation 2022-03-13 11:05:09 +00:00			`"""`
			`This class makes read/write operations to the a worksheet easier.`
			`It can read the headers from a custom row number, but the row references`
			`should always include the offset of the header.`
			`eg: if header=4, row 5 will be the first with data.`
			`"""`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`COLUMN_NAMES = {`
Add header argument; set up webdriver 2022-02-25 15:09:35 +00:00			`'url': 'link',`
refactor 2022-06-07 16:41:58 +00:00			`'status': 'archive status',`
			`'folder': 'destination folder',`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`'archive': 'archive location',`
			`'date': 'archive date',`
			`'thumbnail': 'thumbnail',`
			`'thumbnail_index': 'thumbnail index',`
			`'timestamp': 'upload timestamp',`
			`'title': 'upload title',`
Add header argument; set up webdriver 2022-02-25 15:09:35 +00:00			`'duration': 'duration',`
			`'screenshot': 'screenshot',`
			`'hash': 'hash'`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`}`

Add header argument; set up webdriver 2022-02-25 15:09:35 +00:00			`def __init__(self, worksheet, columns=COLUMN_NAMES, header_row=1):`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`self.wks = worksheet`
			`self.columns = columns`
fix index out of range for empty sheets 2022-05-10 20:24:21 +00:00			`self.values = self.wks.get_values()`
			`if len(self.values) > 0:`
			`self.headers = [v.lower() for v in self.values[header_row - 1]]`
			`else:`
			`self.headers = []`
extracted worksheet operations 2022-02-23 08:54:03 +00:00
			`def _check_col_exists(self, col: str):`
			`if col not in self.columns:`
			`raise Exception(f'Column {col} is not in the configured column names: {self.columns.keys()}')`

making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`def _col_index(self, col: str):`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`self._check_col_exists(col)`
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`return self.headers.index(self.columns[col])`
extracted worksheet operations 2022-02-23 08:54:03 +00:00
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`def col_exists(self, col: str):`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`self._check_col_exists(col)`
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`return self.columns[col] in self.headers`
extracted worksheet operations 2022-02-23 08:54:03 +00:00
			`def count_rows(self):`
simplifies access to google sheets, single get_values 2022-03-09 11:17:51 +00:00			`return len(self.values)`
extracted worksheet operations 2022-02-23 08:54:03 +00:00
			`def get_row(self, row: int):`
			`# row is 1-based`
simplifies access to google sheets, single get_values 2022-03-09 11:17:51 +00:00			`return self.values[row - 1]`
extracted worksheet operations 2022-02-23 08:54:03 +00:00
Add header argument; set up webdriver 2022-02-25 15:09:35 +00:00			`def get_values(self):`
simplifies access to google sheets, single get_values 2022-03-09 11:17:51 +00:00			`return self.values`
Add header argument; set up webdriver 2022-02-25 15:09:35 +00:00
implements fresh status retrieval if needed 2022-03-10 18:00:02 +00:00			`def get_cell(self, row, col: str, fresh=False):`
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`"""`
			`returns the cell value from (row, col),`
			`where row can be an index (1-based) OR list of values`
			`as received from self.get_row(row)`
implements fresh status retrieval if needed 2022-03-10 18:00:02 +00:00			`if fresh=True, the sheet is queried again for this cell`
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`"""`
implements fresh status retrieval if needed 2022-03-10 18:00:02 +00:00			`col_index = self._col_index(col)`

			`if fresh:`
fix offset 2022-03-12 19:25:52 +00:00			`return self.wks.cell(row, col_index + 1).value`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`if type(row) == int:`
			`row = self.get_row(row)`

			`if col_index >= len(row):`
			`return ''`
			`return row[col_index]`

refactor 2022-06-07 16:41:58 +00:00			`def get_cell_or_default(self, row, col: str, default: str = None, fresh=False, when_empty_use_default=True):`
refactoring filenumber into subfolder 2022-05-26 17:18:29 +00:00			`"""`
			`return self.get_cell or default value on error (eg: column is missing)`
			`"""`
			`try:`
refactor 2022-06-07 16:41:58 +00:00			`val = self.get_cell(row, col, fresh)`
			`if when_empty_use_default and val.strip() == "":`
			`return default`
			`return val`
refactoring filenumber into subfolder 2022-05-26 17:18:29 +00:00			`except:`
			`return default`

making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`def set_cell(self, row: int, col: str, val):`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`# row is 1-based`
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`col_index = self._col_index(col) + 1`
offby1 2022-03-12 19:11:38 +00:00			`self.wks.update_cell(row, col_index, val)`
extracted worksheet operations 2022-02-23 08:54:03 +00:00
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`def batch_set_cell(self, cell_updates):`
			`"""`
			`receives a list of [(row:int, col:str, val)] and batch updates it, the parameters are the same as in the self.set_cell() method`
			`"""`
			`cell_updates = [`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`{`
cleanup and docs 2022-02-23 15:07:58 +00:00			`'range': self.to_a1(row, col),`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`'values': [[val]]`
			`}`
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`for row, col, val in cell_updates`
extracted worksheet operations 2022-02-23 08:54:03 +00:00			`]`
making code more resilient to exceptions 2022-02-23 12:57:11 +00:00			`self.wks.batch_update(cell_updates, value_input_option='USER_ENTERED')`
extracted worksheet operations 2022-02-23 08:54:03 +00:00
cleanup and docs 2022-02-23 15:07:58 +00:00			`def to_a1(self, row: int, col: str):`
			`# row is 1-based`
offby1 2022-03-12 19:11:38 +00:00			`return utils.rowcol_to_a1(row, self._col_index(col) + 1)`