making code more resilient to exceptions

2022-02-23 13:57:11 +01:00 · 2022-02-23 13:57:11 +01:00 · 9550cd509e
commit 9550cd509e
--- a/archivers/telegram_archiver.py
+++ b/archivers/telegram_archiver.py
@ -54,8 +54,8 @@ class TelegramArchiver(Archiver):
        # extract duration from HTML
        duration = s.find_all('time')[0].contents[0]
        if ':' in duration:
-            duration = float(duration.split(
-                ':')[0]) * 60 + float(duration.split(':')[1])
+            duration = float(duration.split(':')[0]) * 60
+            + float(duration.split(':')[1])
        else:
            duration = float(duration)

--- a/auto_archive.py
+++ b/auto_archive.py
@ -14,17 +14,18 @@ load_dotenv()


 def update_sheet(gw, row, result: archivers.ArchiveResult):
-    update = []
+    cell_updates = []
+    row_values = gw.get_row(row)

    def batch_if_valid(col, val, final_value=None):
        final_value = final_value or val
-        if val and gw.col_exists(col) and gw.cell(row, col) == '':
-            update.append((row, col, final_value))
+        if val and gw.col_exists(col) and gw.get_cell(row_values, col) == '':
+            cell_updates.append((row, col, final_value))

-    update.append((row, 'status', result.status))
+    cell_updates.append((row, 'status', result.status))

    batch_if_valid('archive', result.cdn_url)
-    batch_if_valid('archive', True, datetime.datetime.now().isoformat())
+    batch_if_valid('date', True, datetime.datetime.now().isoformat())
    batch_if_valid('thumbnail', result.thumbnail, f'=IMAGE("{result.thumbnail}")')
    batch_if_valid('thumbnail_index', result.thumbnail_index)
    batch_if_valid('title', result.title)
@ -34,7 +35,18 @@ def update_sheet(gw, row, result: archivers.ArchiveResult):
        result.timestamp = datetime.datetime.fromtimestamp(result.timestamp).isoformat()
    batch_if_valid('timestamp', result.timestamp)

-    gw.update_batch(update)
+    gw.batch_set_cell(cell_updates)
+
+
+def expand_url(url):
+    # expand short URL links
+    if 'https://t.co/' in url:
+        try:
+            r = requests.get(url)
+            url = r.url
+        except:
+            logger.error(f'Failed to expand url {url}')
+    return url


 def process_sheet(sheet):
@ -74,38 +86,34 @@ def process_sheet(sheet):
        ]

        # loop through rows in worksheet
-        for i in range(2, gw.count_rows() + 1):
-            row = gw.get_row(i)
-            url = gw.cell(row, 'url')
-            status = gw.cell(row, 'status')
+        for row in range(2, gw.count_rows() + 1):
+            url = gw.get_cell(row, 'url')
+            status = gw.get_cell(row, 'status')
            if url != '' and status in ['', None]:
-                gw.update(i, 'status', 'Archive in progress')
+                gw.set_cell(row, 'status', 'Archive in progress')

-                # expand short URL links
-                if 'https://t.co/' in url:
-                    r = requests.get(url)
-                    url = r.url
+                url = expand_url(url)

                for archiver in active_archivers:
-                    logger.debug(f'Trying {archiver} on row {i}')
+                    logger.debug(f'Trying {archiver} on row {row}')
+
                    # TODO: add support for multiple videos/images
-                    result = archiver.download(url, check_if_exists=True)
+                    try:
+                        result = archiver.download(url, check_if_exists=True)
+                    except Exception as e:
+                        result = False
+                        logger.error(f'Got unexpected error in row {row} with archiver {archiver} for url {url}: {e}')

                    if result:
-                        logger.success(f'{archiver} succeeded on row {i}')
-                        break
+                        if result.status in ['success', 'already archived']:
+                            logger.success(f'{archiver} succeeded on row {row}')
+                            break
+                        logger.warning(f'{archiver} did not succeed on row {row}, final status: {result.status}')

                if result:
-                    update_sheet(gw, i, result)
+                    update_sheet(gw, row, result)
                else:
-                    gw.update(i, 'status', 'failed: no archiver')
-
-        #             # except:
-        #             # if any unexpected errors occured, log these into the Google Sheet
-        #             # t, value, traceback = sys.exc_info()
-
-        #             # update_sheet(wks, i, str(
-        #             #     value), {}, columns, v)
+                    gw.set_cell(row, 'status', 'failed: no archiver')


 def main():
--- a/gworksheet.py
+++ b/gworksheet.py
@ -19,20 +19,18 @@ class GWorksheet:
        self.headers = [v.lower() for v in self.wks.row_values(1)]
        self.columns = columns

-    def worksheet(self): return self.wks
-
    def _check_col_exists(self, col: str):
        if col not in self.columns:
            raise Exception(f'Column {col} is not in the configured column names: {self.columns.keys()}')

+    def _col_index(self, col: str):
+        self._check_col_exists(col)
+        return self.headers.index(self.columns[col])
+
    def col_exists(self, col: str):
        self._check_col_exists(col)
        return self.columns[col] in self.headers

-    def col_index(self, col: str):
-        self._check_col_exists(col)
-        return self.headers.index(self.columns[col])
-
    def count_rows(self):
        return len(self.wks.get_values())

@ -40,30 +38,37 @@ class GWorksheet:
        # row is 1-based
        return self.wks.row_values(row)

-    def cell(self, row, col: str):
-        # row can be index (1-based) or list of values
+    def get_cell(self, row, col: str):
+        """
+        returns the cell value from (row, col), 
+        where row can be an index (1-based) OR list of values
+        as received from self.get_row(row)
+        """
        if type(row) == int:
            row = self.get_row(row)

-        col_index = self.col_index(col)
+        col_index = self._col_index(col)
        if col_index >= len(row):
            return ''
        return row[col_index]

-    def update(self, row: int, col: str, val):
+    def set_cell(self, row: int, col: str, val):
        # row is 1-based
-        col_index = self.col_index(col) + 1
+        col_index = self._col_index(col) + 1
        self.wks.update_cell(row, col_index, val)

-    def update_batch(self, updates):
-        updates = [
+    def batch_set_cell(self, cell_updates):
+        """
+        receives a list of [(row:int, col:str, val)] and batch updates it, the parameters are the same as in the self.set_cell() method
+        """
+        cell_updates = [
            {
-                'range': self.to_a1(row, self.col_index(col) + 1),
+                'range': self.to_a1(row, self._col_index(col) + 1),
                'values': [[val]]
            }
-            for row, col, val in updates
+            for row, col, val in cell_updates
        ]
-        self.wks.batch_update(updates, value_input_option='USER_ENTERED')
+        self.wks.batch_update(cell_updates, value_input_option='USER_ENTERED')

    def to_a1(self, row: int, col: int):
        # row, col are 1-based