Plawright content fetcher - Fixes for status codes and screenshot info (#2168)

2024-02-08 15:15:04 +01:00 · 2024-02-08 15:15:04 +01:00 · 3287283065
commit 3287283065
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@ -396,6 +396,8 @@ class base_html_playwright(Fetcher):
                # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
                logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
                logger.critical(response)
+                context.close()
+                browser.close()
                raise PageUnloadable(url=url, status_code=None, message=str(e))

            if self.status_code != 200 and not ignore_status_codes:
@ -436,16 +438,17 @@ class base_html_playwright(Fetcher):
            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
            # acceptable screenshot quality here
            try:
-                # The actual screenshot
-                self.screenshot = self.page.screenshot(type='jpeg', full_page=True,
-                                                       quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
+                # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
+                self.screenshot = self.page.screenshot(type='jpeg',
+                                                       full_page=True,
+                                                       quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
+                                                       )
            except Exception as e:
+                # It's likely the screenshot was too long/big and something crashed
+                raise ScreenshotUnavailable(url=url, status_code=self.status_code)
+            finally:
                context.close()
                browser.close()
-                raise ScreenshotUnavailable(url=url, status_code=response.status_code)
-
-            context.close()
-            browser.close()


 class base_html_webdriver(Fetcher):
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -411,7 +411,7 @@ class update_worker(threading.Thread):
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
                    except content_fetcher.ScreenshotUnavailable as e:
-                        err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
+                        err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False