Refactor image saving with forked process to reduce memory usage, improvements to xpath scraper handling (#3099)

2025-04-11 09:04:51 +02:00 · 2025-04-11 09:04:51 +02:00 · 4269079c54
commit 4269079c54
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@ -4,7 +4,7 @@ import re
 from random import randint
 from loguru import logger

-from changedetectionio.content_fetchers.helpers import capture_full_page
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
 from changedetectionio.content_fetchers.base import manage_user_agent
 from changedetectionio.safe_jinja import render as jinja_render

@ -293,12 +293,16 @@ class browsersteps_live_ui(steppable_browser_interface):
    def get_current_state(self):
        """Return the screenshot and interactive elements mapping, generally always called after action_()"""
        import importlib.resources
+        import json
+        # because we for now only run browser steps in playwright mode (not puppeteer mode)
+        from changedetectionio.content_fetchers.playwright import capture_full_page
+
        xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()

        now = time.time()
        self.page.wait_for_timeout(1 * 1000)

-        screenshot = capture_full_page(self.page)
+        screenshot = capture_full_page(page=self.page)

        logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")

@ -306,13 +310,21 @@ class browsersteps_live_ui(steppable_browser_interface):
        self.page.evaluate("var include_filters=''")
        # Go find the interactive elements
        # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
-        elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
-        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)

-        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
+        self.page.request_gc()
+
+        scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
+
+        MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+        xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
+            "visualselector_xpath_selectors": scan_elements,
+            "max_height": MAX_TOTAL_HEIGHT
+        }))
+        self.page.request_gc()
+
        # So the JS will find the smallest one first
        xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
-        logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
+        logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")

        # playwright._impl._api_types.Error: Browser closed.
        # @todo show some countdown timer?
--- a/changedetectionio/content_fetchers/init.py
+++ b/changedetectionio/content_fetchers/init.py
@ -7,11 +7,29 @@ import os
 # Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
 visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'

+SCREENSHOT_MAX_HEIGHT_DEFAULT = 16000
+SCREENSHOT_DEFAULT_QUALITY = 40
+
+# Maximum total height for the final image (When in stitch mode).
+# We limit this to 16000px due to the huge amount of RAM that was being used
+# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
+MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+
+# The size at which we will switch to stitching method, when below this (and
+# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
+# screenshot method.
+SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000

 # available_fetchers() will scan this implementation looking for anything starting with html_
 # this information is used in the form selections
 from changedetectionio.content_fetchers.requests import fetcher as html_requests

+
+import importlib.resources
+XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
+INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
+
+
 def available_fetchers():
    # See the if statement at the bottom of this file for how we switch between playwright and webdriver
    import inspect
--- a/changedetectionio/content_fetchers/base.py
+++ b/changedetectionio/content_fetchers/base.py
@ -63,11 +63,6 @@ class Fetcher():
    # Time ONTOP of the system defined env minimum time
    render_extract_delay = 0

-    def __init__(self):
-        import importlib.resources
-        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
-        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
-
    @abstractmethod
    def get_error(self):
        return self.error
@ -143,6 +138,7 @@ class Fetcher():
                logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
                self.screenshot_step("before-" + str(step_n))
                self.save_step_html("before-" + str(step_n))
+
                try:
                    optional_value = step['optional_value']
                    selector = step['selector']
--- a/changedetectionio/content_fetchers/helpers.py
+++ b/changedetectionio/content_fetchers/helpers.py
@ -1,138 +0,0 @@
-# Pages with a vertical height longer than this will use the 'stitch together' method.
-
-# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
-# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
-# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
-
-from loguru import logger
-
-def capture_full_page(page):
-    import io
-    import os
-    import time
-    from PIL import Image, ImageDraw, ImageFont
-
-    # Maximum total height for the final image (When in stitch mode).
-    # We limit this to 16000px due to the huge amount of RAM that was being used
-    # Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
-    MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))
-
-    # The size at which we will switch to stitching method, when below this (and
-    # MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
-    # screenshot method.
-    SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
-
-    WARNING_TEXT_HEIGHT = 20  # Height of the warning text overlay
-
-    # Save the original viewport size
-    original_viewport = page.viewport_size
-    start = time.time()
-
-    stitched_image = None
-
-    try:
-        viewport_width = original_viewport["width"]
-        viewport_height = original_viewport["height"]
-
-        page_height = page.evaluate("document.documentElement.scrollHeight")
-
-        # Optimization to avoid unnecessary stitching if we can avoid it
-        # Use the default screenshot method for smaller pages to take advantage
-        # of GPU and native playwright screenshot optimizations
-        if (
-            page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
-            and page_height < MAX_TOTAL_HEIGHT
-        ):
-            logger.debug("Using default screenshot method")
-            screenshot = page.screenshot(
-                type="jpeg",
-                quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
-                full_page=True,
-            )
-            logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
-            return screenshot
-
-        logger.debug(
-            "Using stitching method for large screenshot because page height exceeds threshold"
-        )
-
-        # Limit the total capture height
-        capture_height = min(page_height, MAX_TOTAL_HEIGHT)
-
-        # Calculate number of chunks needed using ORIGINAL viewport height
-        num_chunks = (capture_height + viewport_height - 1) // viewport_height
-
-        # Create the final image upfront to avoid holding all chunks in memory
-        stitched_image = Image.new("RGB", (viewport_width, capture_height))
-
-        # Track cumulative paste position
-        y_offset = 0
-
-        for _ in range(num_chunks):
-            # Scroll to position (no viewport resizing)
-            page.evaluate(f"window.scrollTo(0, {y_offset})")
-
-            # Capture only the visible area using clip
-            with io.BytesIO(
-                page.screenshot(
-                    type="jpeg",
-                    clip={
-                        "x": 0,
-                        "y": 0,
-                        "width": viewport_width,
-                        "height": min(viewport_height, capture_height - y_offset),
-                    },
-                    quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
-                )
-            ) as buf:
-                with Image.open(buf) as img:
-                    img.load()
-                    stitched_image.paste(img, (0, y_offset))
-                    y_offset += img.height
-
-        logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")
-
-        # Overlay warning text if the screenshot was trimmed
-        if capture_height < page_height:
-            draw = ImageDraw.Draw(stitched_image)
-            warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
-
-            # Load font (default system font if Arial is unavailable)
-            try:
-                font = ImageFont.truetype(
-                    "arial.ttf", WARNING_TEXT_HEIGHT
-                )  # Arial (Windows/Mac)
-            except IOError:
-                font = ImageFont.load_default()  # Default font if Arial not found
-
-            # Get text bounding box (correct method for newer Pillow versions)
-            text_bbox = draw.textbbox((0, 0), warning_text, font=font)
-            text_width = text_bbox[2] - text_bbox[0]  # Calculate text width
-            text_height = text_bbox[3] - text_bbox[1]  # Calculate text height
-
-            # Define background rectangle (top of the image)
-            draw.rectangle(
-                [(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
-            )
-
-            # Center text horizontally within the warning area
-            text_x = (viewport_width - text_width) // 2
-            text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
-
-            # Draw the warning text in red
-            draw.text((text_x, text_y), warning_text, fill="red", font=font)
-
-        # Save final image
-        with io.BytesIO() as output:
-            stitched_image.save(
-                output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
-            )
-            screenshot = output.getvalue()
-
-    finally:
-        # Restore the original viewport size
-        page.set_viewport_size(original_viewport)
-        if stitched_image is not None:
-            stitched_image.close()
-
-    return screenshot
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@ -4,10 +4,102 @@ from urllib.parse import urlparse

 from loguru import logger

-from changedetectionio.content_fetchers.helpers import capture_full_page
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
+    SCREENSHOT_SIZE_STITCH_THRESHOLD, MAX_TOTAL_HEIGHT, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
+from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
 from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
 from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable

+
+
+def capture_full_page(page):
+    import os
+    import time
+    from multiprocessing import Process, Pipe
+
+    start = time.time()
+
+    page_height = page.evaluate("document.documentElement.scrollHeight")
+
+    logger.debug(f"Playwright viewport size {page.viewport_size}")
+
+    ############################################################
+    #### SCREENSHOT FITS INTO ONE SNAPSHOT (SMALLER PAGES) #####
+    ############################################################
+
+    # Optimization to avoid unnecessary stitching if we can avoid it
+    # Use the default screenshot method for smaller pages to take advantage
+    # of GPU and native playwright screenshot optimizations
+    # - No PIL needed here, no danger of memory leaks, no sub process required
+    if (page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD and page_height < MAX_TOTAL_HEIGHT ):
+        logger.debug("Using default screenshot method")
+        page.request_gc()
+        screenshot = page.screenshot(
+            type="jpeg",
+            quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+            full_page=True,
+        )
+        page.request_gc()
+        logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
+        return screenshot
+
+
+
+    ###################################################################################
+    #### CASE FOR LARGE SCREENSHOTS THAT NEED TO BE TRIMMED DUE TO MEMORY ISSUES  #####
+    ###################################################################################
+    # - PIL can easily allocate memory and not release it cleanly
+    # - Fetching screenshot from playwright seems  OK
+    # Image.new is leaky even with .close()
+    # So lets prepare all the data chunks and farm it out to a subprocess for clean memory handling
+
+    logger.debug(
+        "Using stitching method for large screenshot because page height exceeds threshold"
+    )
+
+    # Limit the total capture height
+    capture_height = min(page_height, MAX_TOTAL_HEIGHT)
+
+    # Calculate number of chunks needed using ORIGINAL viewport height
+    num_chunks = (capture_height + page.viewport_size['height'] - 1) // page.viewport_size['height']
+    screenshot_chunks = []
+
+    # Track cumulative paste position
+    y_offset = 0
+    for _ in range(num_chunks):
+
+        page.request_gc()
+        page.evaluate(f"window.scrollTo(0, {y_offset})")
+        page.request_gc()
+        h = min(page.viewport_size['height'], capture_height - y_offset)
+        screenshot_chunks.append(page.screenshot(
+                type="jpeg",
+                clip={
+                    "x": 0,
+                    "y": 0,
+                    "width": page.viewport_size['width'],
+                    "height": h,
+                },
+                quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+            ))
+
+        y_offset += h # maybe better to inspect the image here?
+        page.request_gc()
+
+    # PIL can leak memory in various situations, assign the work to a subprocess for totally clean handling
+
+    parent_conn, child_conn = Pipe()
+    p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, capture_height))
+    p.start()
+    result = parent_conn.recv_bytes()
+    p.join()
+
+    screenshot_chunks = None
+    logger.debug(f"Screenshot - Page height: {page_height} Capture height: {capture_height} - Stitched together in {time.time() - start:.2f}s")
+
+    return result
+
+
 class fetcher(Fetcher):
    fetcher_description = "Playwright {}/Javascript".format(
        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
@ -60,7 +152,8 @@ class fetcher(Fetcher):

    def screenshot_step(self, step_n=''):
        super().screenshot_step(step_n=step_n)
-        screenshot = capture_full_page(self.page)
+        screenshot = capture_full_page(page=self.page)
+

        if self.browser_steps_screenshot_path is not None:
            destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
@ -89,7 +182,6 @@ class fetcher(Fetcher):

        from playwright.sync_api import sync_playwright
        import playwright._impl._errors
-        from changedetectionio.content_fetchers import visualselector_xpath_selectors
        import time
        self.delete_browser_steps_screenshots()
        response = None
@ -185,13 +277,22 @@ class fetcher(Fetcher):
                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
            else:
                self.page.evaluate("var include_filters=''")
+            self.page.request_gc()

-            self.xpath_data = self.page.evaluate(
-                "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
-            self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
+            # request_gc before and after evaluate to free up memory
+            # @todo browsersteps etc
+            MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+            self.xpath_data = self.page.evaluate(XPATH_ELEMENT_JS, {
+                "visualselector_xpath_selectors": visualselector_xpath_selectors,
+                "max_height": MAX_TOTAL_HEIGHT
+            })
+            self.page.request_gc()
+
+            self.instock_data = self.page.evaluate(INSTOCK_DATA_JS)
+            self.page.request_gc()

            self.content = self.page.content()
-            logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s")
+            logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")

            # Bug 3 in Playwright screenshot handling
            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
@ -202,11 +303,18 @@ class fetcher(Fetcher):
            # acceptable screenshot quality here
            try:
                # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
-                self.screenshot = capture_full_page(self.page)
+                self.screenshot = capture_full_page(page=self.page)

            except Exception as e:
                # It's likely the screenshot was too long/big and something crashed
                raise ScreenshotUnavailable(url=url, status_code=self.status_code)
            finally:
+                # Request garbage collection one more time before closing
+                try:
+                    self.page.request_gc()
+                except:
+                    pass
+                
+                # Clean up resources properly
                context.close()
                browser.close()
--- a/changedetectionio/content_fetchers/puppeteer.py
+++ b/changedetectionio/content_fetchers/puppeteer.py
@ -6,8 +6,93 @@ from urllib.parse import urlparse

 from loguru import logger

+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
+    SCREENSHOT_SIZE_STITCH_THRESHOLD, MAX_TOTAL_HEIGHT, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
 from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
 from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
+from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
+
+
+# Bug 3 in Playwright screenshot handling
+# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
+
+# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
+# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
+# acceptable screenshot quality here
+async def capture_full_page(page):
+    import os
+    import time
+    from multiprocessing import Process, Pipe
+
+    start = time.time()
+
+    page_height = await page.evaluate("document.documentElement.scrollHeight")
+
+    logger.debug(f"Puppeteer viewport size {page.viewport}")
+
+    ############################################################
+    #### SCREENSHOT FITS INTO ONE SNAPSHOT (SMALLER PAGES) #####
+    ############################################################
+
+    # Optimization to avoid unnecessary stitching if we can avoid it
+    # Use the default screenshot method for smaller pages to take advantage
+    # of GPU and native playwright screenshot optimizations
+    # - No PIL needed here, no danger of memory leaks, no sub process required
+    if (page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD and page_height < MAX_TOTAL_HEIGHT ):
+        logger.debug("Using default screenshot method")
+        await page.evaluate(f"window.scrollTo(0, 0)")
+        screenshot = await page.screenshot(
+            type_="jpeg",
+            quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+            fullPage=True,
+        )
+        logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
+        return screenshot
+
+    ###################################################################################
+    #### CASE FOR LARGE SCREENSHOTS THAT NEED TO BE TRIMMED DUE TO MEMORY ISSUES  #####
+    ###################################################################################
+    # - PIL can easily allocate memory and not release it cleanly
+    # - Fetching screenshot from playwright seems  OK
+    # Image.new is leaky even with .close()
+    # So lets prepare all the data chunks and farm it out to a subprocess for clean memory handling
+
+    logger.debug(
+        "Using stitching method for large screenshot because page height exceeds threshold"
+    )
+
+    # Limit the total capture height
+    capture_height = min(page_height, MAX_TOTAL_HEIGHT)
+
+    # Calculate number of chunks needed using ORIGINAL viewport height
+    num_chunks = (capture_height + page.viewport['height'] - 1) // page.viewport['height']
+    screenshot_chunks = []
+
+    # Track cumulative paste position
+    y_offset = 0
+    for _ in range(num_chunks):
+        await page.evaluate(f"window.scrollTo(0, {y_offset})")
+        h = min(page.viewport['height'], capture_height - y_offset)
+        screenshot_chunks.append(await page.screenshot(
+                type_="jpeg",
+                quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+            ))
+
+        y_offset += h # maybe better to inspect the image here?
+
+    # PIL can leak memory in various situations, assign the work to a subprocess for totally clean handling
+
+    parent_conn, child_conn = Pipe()
+    p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, capture_height))
+    p.start()
+    result = parent_conn.recv_bytes()
+    p.join()
+
+    screenshot_chunks = None
+    logger.debug(f"Screenshot - Page height: {page_height} Capture height: {capture_height} - Stitched together in {time.time() - start:.2f}s")
+
+    return result
+

 class fetcher(Fetcher):
    fetcher_description = "Puppeteer/direct {}/Javascript".format(
@ -79,7 +164,6 @@ class fetcher(Fetcher):
                         empty_pages_are_a_change
                         ):

-        from changedetectionio.content_fetchers import visualselector_xpath_selectors
        self.delete_browser_steps_screenshots()
        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay

@ -181,11 +265,10 @@ class fetcher(Fetcher):
            raise PageUnloadable(url=url, status_code=None, message=str(e))

        if self.status_code != 200 and not ignore_status_codes:
-            screenshot = await self.page.screenshot(type_='jpeg',
-                                                    fullPage=True,
-                                                    quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
+            screenshot = await capture_full_page(page=self.page)

            raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
+
        content = await self.page.content

        if not empty_pages_are_a_change and len(content.strip()) == 0:
@ -203,46 +286,31 @@ class fetcher(Fetcher):

        # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
        # Setup the xPath/VisualSelector scraper
-        if current_include_filters is not None:
+        if current_include_filters:
            js = json.dumps(current_include_filters)
            await self.page.evaluate(f"var include_filters={js}")
        else:
            await self.page.evaluate(f"var include_filters=''")

-        self.xpath_data = await self.page.evaluate(
-            "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
-        self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}")
+        MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+        self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
+            "visualselector_xpath_selectors": visualselector_xpath_selectors,
+            "max_height": MAX_TOTAL_HEIGHT
+        })
+        if not self.xpath_data:
+            raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
+
+        self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)

        self.content = await self.page.content
-        # Bug 3 in Playwright screenshot handling
-        # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
-        # JPEG is better here because the screenshots can be very very large

-        # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
-        # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
-        # acceptable screenshot quality here
-        try:
-            self.screenshot = await self.page.screenshot(type_='jpeg',
-                                                         fullPage=True,
-                                                         quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
-        except Exception as e:
-            logger.error("Error fetching screenshot")
-            # // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
-            # // @ todo after text extract, we can place some overlay text with red background to say 'croppped'
-            logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot')
-            try:
-                self.screenshot = await self.page.screenshot(type_='jpeg',
-                                                             fullPage=False,
-                                                             quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
-            except Exception as e:
-                logger.error('ERROR: Failed to get viewport-only reduced screenshot :(')
-                pass
-        finally:
-            # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
-            logger.success(f"Fetching '{url}' complete, closing page")
-            await self.page.close()
-            logger.success(f"Fetching '{url}' complete, closing browser")
-            await browser.close()
+        self.screenshot = await capture_full_page(page=self.page)
+
+        # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
+        logger.success(f"Fetching '{url}' complete, closing page")
+        await self.page.close()
+        logger.success(f"Fetching '{url}' complete, closing browser")
+        await browser.close()
        logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")

    async def main(self, **kwargs):
--- a/changedetectionio/content_fetchers/res/puppeteer_fetch.js
+++ b/changedetectionio/content_fetchers/res/puppeteer_fetch.js
@ -1,190 +0,0 @@
-module.exports = async ({page, context}) => {
-
-    var {
-        url,
-        execute_js,
-        user_agent,
-        extra_wait_ms,
-        req_headers,
-        include_filters,
-        xpath_element_js,
-        screenshot_quality,
-        proxy_username,
-        proxy_password,
-        disk_cache_dir,
-        no_cache_list,
-        block_url_list,
-    } = context;
-
-    await page.setBypassCSP(true)
-    await page.setExtraHTTPHeaders(req_headers);
-
-    if (user_agent) {
-        await page.setUserAgent(user_agent);
-    }
-    // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
-
-    await page.setDefaultNavigationTimeout(0);
-
-    if (proxy_username) {
-        // Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer
-        // https://github.com/puppeteer/puppeteer/issues/676 ?
-        // https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2
-        // https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/
-        await page.authenticate({
-            username: proxy_username,
-            password: proxy_password
-        });
-    }
-
-    await page.setViewport({
-        width: 1024,
-        height: 768,
-        deviceScaleFactor: 1,
-    });
-
-    await page.setRequestInterception(true);
-    if (disk_cache_dir) {
-        console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
-    }
-    const fs = require('fs');
-    const crypto = require('crypto');
-
-    function file_is_expired(file_path) {
-        if (!fs.existsSync(file_path)) {
-            return true;
-        }
-        var stats = fs.statSync(file_path);
-        const now_date = new Date();
-        const expire_seconds = 300;
-        if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
-            console.log("CACHE EXPIRED: " + file_path);
-            return true;
-        }
-        return false;
-
-    }
-
-    page.on('request', async (request) => {
-        // General blocking of requests that waste traffic
-        if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
-
-        if (disk_cache_dir) {
-            const url = request.url();
-            const key = crypto.createHash('md5').update(url).digest("hex");
-            const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
-
-            // https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
-
-            if (fs.existsSync(dir_path + key)) {
-                console.log("* CACHE HIT , using - " + dir_path + key + " - " + url);
-                const cached_data = fs.readFileSync(dir_path + key);
-                // @todo headers can come from dir_path+key+".meta" json file
-                request.respond({
-                    status: 200,
-                    //contentType: 'text/html', //@todo
-                    body: cached_data
-                });
-                return;
-            }
-        }
-        request.continue();
-    });
-
-
-    if (disk_cache_dir) {
-        page.on('response', async (response) => {
-            const url = response.url();
-            // Basic filtering for sane responses
-            if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) {
-                console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url);
-                return;
-            }
-            if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) {
-                console.log("Skipping (no_cache_list) - " + url);
-                return;
-            }
-            if (url.toLowerCase().includes('data:')) {
-                console.log("Skipping (embedded-data) - " + url);
-                return;
-            }
-            response.buffer().then(buffer => {
-                if (buffer.length > 100) {
-                    console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
-
-                    const key = crypto.createHash('md5').update(url).digest("hex");
-                    const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
-
-                    if (!fs.existsSync(dir_path)) {
-                        fs.mkdirSync(dir_path, {recursive: true})
-                    }
-
-                    if (fs.existsSync(dir_path + key)) {
-                        if (file_is_expired(dir_path + key)) {
-                            fs.writeFileSync(dir_path + key, buffer);
-                        }
-                    } else {
-                        fs.writeFileSync(dir_path + key, buffer);
-                    }
-                }
-            });
-        });
-    }
-
-    const r = await page.goto(url, {
-        waitUntil: 'load'
-    });
-
-    await page.waitForTimeout(1000);
-    await page.waitForTimeout(extra_wait_ms);
-
-    if (execute_js) {
-        await page.evaluate(execute_js);
-        await page.waitForTimeout(200);
-    }
-
-    var xpath_data;
-    var instock_data;
-    try {
-        // Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode
-        // (Once the old playwright is removed)
-        xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters);
-        instock_data = await page.evaluate(() => {%instock_scrape_code%});
-    } catch (e) {
-        console.log(e);
-    }
-
-    // Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure
-    // Wrap it here (for now)
-
-    var b64s = false;
-    try {
-        b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'});
-    } catch (e) {
-        console.log(e);
-    }
-
-    // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
-    if (!b64s) {
-        // @todo after text extract, we can place some overlay text with red background to say 'croppped'
-        console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot');
-        try {
-            b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'});
-        } catch (e) {
-            console.log(e);
-        }
-    }
-
-    var html = await page.content();
-    return {
-        data: {
-            'content': html,
-            'headers': r.headers(),
-            'instock_data': instock_data,
-            'screenshot': b64s,
-            'status_code': r.status(),
-            'xpath_data': xpath_data
-        },
-        type: 'application/json',
-    };
-};
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@ -1,229 +1,220 @@
-// Restock Detector
-// (c) Leigh Morresi dgtlmoon@gmail.com
-//
-// Assumes the product is in stock to begin with, unless the following appears above the fold ;
-// - outOfStockTexts appears above the fold (out of stock)
-// - negateOutOfStockRegex (really is in stock)
+async () => {

-function isItemInStock() {
-    // @todo Pass these in so the same list can be used in non-JS fetchers
-    const outOfStockTexts = [
-        ' أخبرني عندما يتوفر',
-        '0 in stock',
-        'actuellement indisponible',
-        'agotado',
-        'article épuisé',
-        'artikel zurzeit vergriffen',
-        'as soon as stock is available',
-        'ausverkauft', // sold out
-        'available for back order',
-        'awaiting stock',
-        'back in stock soon',
-        'back-order or out of stock',
-        'backordered',
-        'benachrichtigt mich', // notify me
-        'brak na stanie',
-        'brak w magazynie',
-        'coming soon',
-        'currently have any tickets for this',
-        'currently unavailable',
-        'dieser artikel ist bald wieder verfügbar',
-        'dostępne wkrótce',
-        'en rupture',
-        'en rupture de stock',
-        'épuisé',
-        'esgotado',
-        'indisponible',
-        'indisponível',
-        'isn\'t in stock right now',
-        'isnt in stock right now',
-        'isn’t in stock right now',
-        'item is no longer available',
-        'let me know when it\'s available',
-        'mail me when available',
-        'message if back in stock',
-        'mevcut değil',
-        'nachricht bei',
-        'nicht auf lager',
-        'nicht lagernd',
-        'nicht lieferbar',
-        'nicht verfügbar',
-        'nicht vorrätig',
-        'nicht zur verfügung',
-        'nie znaleziono produktów',
-        'niet beschikbaar',
-        'niet leverbaar',
-        'niet op voorraad',
-        'no disponible',
-        'non disponibile',
-        'non disponible',
-        'no longer in stock',
-        'no tickets available',
-        'not available',
-        'not currently available',
-        'not in stock',
-        'notify me when available',
-        'notify me',
-        'notify when available',
-        'não disponível',
-        'não estamos a aceitar encomendas',
-        'out of stock',
-        'out-of-stock',
-        'plus disponible',
-        'prodotto esaurito',
-        'produkt niedostępny',
-        'rupture',
-        'sold out',
-        'sold-out',
-        'stok habis',
-        'stok kosong',
-        'stok varian ini habis',
-        'stokta yok',
-        'temporarily out of stock',
-        'temporarily unavailable',
-        'there were no search results for',
-        'this item is currently unavailable',
-        'tickets unavailable',
-        'tidak dijual',
-        'tidak tersedia',
-        'tijdelijk uitverkocht',
-        'tiket tidak tersedia',
-        'tükendi',
-        'unavailable nearby',
-        'unavailable tickets',
-        'vergriffen',
-        'vorbestellen',
-        'vorbestellung ist bald möglich',
-        'we don\'t currently have any',
-        'we couldn\'t find any products that match',
-        'we do not currently have an estimate of when this product will be back in stock.',
-        'we don\'t know when or if this item will be back in stock.',
-        'we were not able to find a match',
-        'when this arrives in stock',
-        'zur zeit nicht an lager',
-        '品切れ',
-        '已售',
-        '已售完',
-        '품절'
-    ];
+    function isItemInStock() {
+        // @todo Pass these in so the same list can be used in non-JS fetchers
+        const outOfStockTexts = [
+            ' أخبرني عندما يتوفر',
+            '0 in stock',
+            'actuellement indisponible',
+            'agotado',
+            'article épuisé',
+            'artikel zurzeit vergriffen',
+            'as soon as stock is available',
+            'ausverkauft', // sold out
+            'available for back order',
+            'awaiting stock',
+            'back in stock soon',
+            'back-order or out of stock',
+            'backordered',
+            'benachrichtigt mich', // notify me
+            'brak na stanie',
+            'brak w magazynie',
+            'coming soon',
+            'currently have any tickets for this',
+            'currently unavailable',
+            'dieser artikel ist bald wieder verfügbar',
+            'dostępne wkrótce',
+            'en rupture',
+            'en rupture de stock',
+            'épuisé',
+            'esgotado',
+            'indisponible',
+            'indisponível',
+            'isn\'t in stock right now',
+            'isnt in stock right now',
+            'isn’t in stock right now',
+            'item is no longer available',
+            'let me know when it\'s available',
+            'mail me when available',
+            'message if back in stock',
+            'mevcut değil',
+            'nachricht bei',
+            'nicht auf lager',
+            'nicht lagernd',
+            'nicht lieferbar',
+            'nicht verfügbar',
+            'nicht vorrätig',
+            'nicht zur verfügung',
+            'nie znaleziono produktów',
+            'niet beschikbaar',
+            'niet leverbaar',
+            'niet op voorraad',
+            'no disponible',
+            'non disponibile',
+            'non disponible',
+            'no longer in stock',
+            'no tickets available',
+            'not available',
+            'not currently available',
+            'not in stock',
+            'notify me when available',
+            'notify me',
+            'notify when available',
+            'não disponível',
+            'não estamos a aceitar encomendas',
+            'out of stock',
+            'out-of-stock',
+            'plus disponible',
+            'prodotto esaurito',
+            'produkt niedostępny',
+            'rupture',
+            'sold out',
+            'sold-out',
+            'stok habis',
+            'stok kosong',
+            'stok varian ini habis',
+            'stokta yok',
+            'temporarily out of stock',
+            'temporarily unavailable',
+            'there were no search results for',
+            'this item is currently unavailable',
+            'tickets unavailable',
+            'tidak dijual',
+            'tidak tersedia',
+            'tijdelijk uitverkocht',
+            'tiket tidak tersedia',
+            'tükendi',
+            'unavailable nearby',
+            'unavailable tickets',
+            'vergriffen',
+            'vorbestellen',
+            'vorbestellung ist bald möglich',
+            'we don\'t currently have any',
+            'we couldn\'t find any products that match',
+            'we do not currently have an estimate of when this product will be back in stock.',
+            'we don\'t know when or if this item will be back in stock.',
+            'we were not able to find a match',
+            'when this arrives in stock',
+            'zur zeit nicht an lager',
+            '品切れ',
+            '已售',
+            '已售完',
+            '품절'
+        ];


-    const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
+        const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);

-    function getElementBaseText(element) {
-        // .textContent can include text from children which may give the wrong results
-        // scan only immediate TEXT_NODEs, which will be a child of the element
-        var text = "";
-        for (var i = 0; i < element.childNodes.length; ++i)
-            if (element.childNodes[i].nodeType === Node.TEXT_NODE)
-                text += element.childNodes[i].textContent;
-        return text.toLowerCase().trim();
-    }
+        function getElementBaseText(element) {
+            // .textContent can include text from children which may give the wrong results
+            // scan only immediate TEXT_NODEs, which will be a child of the element
+            var text = "";
+            for (var i = 0; i < element.childNodes.length; ++i)
+                if (element.childNodes[i].nodeType === Node.TEXT_NODE)
+                    text += element.childNodes[i].textContent;
+            return text.toLowerCase().trim();
+        }

-    const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
+        const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');

-    // The out-of-stock or in-stock-text is generally always above-the-fold
-    // and often below-the-fold is a list of related products that may or may not contain trigger text
-    // so it's good to filter to just the 'above the fold' elements
-    // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
+        // The out-of-stock or in-stock-text is generally always above-the-fold
+        // and often below-the-fold is a list of related products that may or may not contain trigger text
+        // so it's good to filter to just the 'above the fold' elements
+        // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist


 // @todo - if it's SVG or IMG, go into image diff mode
-// %ELEMENTS% replaced at injection time because different interfaces use it with different settings

-    console.log("Scanning %ELEMENTS%");
+        function collectVisibleElements(parent, visibleElements) {
+            if (!parent) return; // Base case: if parent is null or undefined, return

-    function collectVisibleElements(parent, visibleElements) {
-        if (!parent) return; // Base case: if parent is null or undefined, return
+            // Add the parent itself to the visible elements array if it's of the specified types
+            visibleElements.push(parent);

-        // Add the parent itself to the visible elements array if it's of the specified types
-        visibleElements.push(parent);
-
-        // Iterate over the parent's children
-        const children = parent.children;
-        for (let i = 0; i < children.length; i++) {
-            const child = children[i];
-            if (
-                child.nodeType === Node.ELEMENT_NODE &&
-                window.getComputedStyle(child).display !== 'none' &&
-                window.getComputedStyle(child).visibility !== 'hidden' &&
-                child.offsetWidth >= 0 &&
-                child.offsetHeight >= 0 &&
-                window.getComputedStyle(child).contentVisibility !== 'hidden'
-            ) {
-                // If the child is an element and is visible, recursively collect visible elements
-                collectVisibleElements(child, visibleElements);
+            // Iterate over the parent's children
+            const children = parent.children;
+            for (let i = 0; i < children.length; i++) {
+                const child = children[i];
+                if (
+                    child.nodeType === Node.ELEMENT_NODE &&
+                    window.getComputedStyle(child).display !== 'none' &&
+                    window.getComputedStyle(child).visibility !== 'hidden' &&
+                    child.offsetWidth >= 0 &&
+                    child.offsetHeight >= 0 &&
+                    window.getComputedStyle(child).contentVisibility !== 'hidden'
+                ) {
+                    // If the child is an element and is visible, recursively collect visible elements
+                    collectVisibleElements(child, visibleElements);
+                }
            }
        }
-    }

-    const elementsToScan = [];
-    collectVisibleElements(document.body, elementsToScan);
+        const elementsToScan = [];
+        collectVisibleElements(document.body, elementsToScan);

-    var elementText = "";
+        var elementText = "";

-    // REGEXS THAT REALLY MEAN IT'S IN STOCK
-    for (let i = elementsToScan.length - 1; i >= 0; i--) {
-        const element = elementsToScan[i];
+        // REGEXS THAT REALLY MEAN IT'S IN STOCK
+        for (let i = elementsToScan.length - 1; i >= 0; i--) {
+            const element = elementsToScan[i];

-        // outside the 'fold' or some weird text in the heading area
-        // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
-        if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
-            continue
+            // outside the 'fold' or some weird text in the heading area
+            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
+            if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
+                continue
+            }
+
+            elementText = "";
+            try {
+                if (element.tagName.toLowerCase() === "input") {
+                    elementText = element.value.toLowerCase().trim();
+                } else {
+                    elementText = getElementBaseText(element);
+                }
+            } catch (e) {
+                console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
+            }
+
+            if (elementText.length) {
+                // try which ones could mean its in stock
+                if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
+                    console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
+                    return 'Possibly in stock';
+                }
+            }
        }

-        elementText = "";
-        try {
+        // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
+        for (let i = elementsToScan.length - 1; i >= 0; i--) {
+            const element = elementsToScan[i];
+            // outside the 'fold' or some weird text in the heading area
+            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
+            // Note: theres also an automated test that places the 'out of stock' text fairly low down
+            if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
+                continue
+            }
+            elementText = "";
            if (element.tagName.toLowerCase() === "input") {
                elementText = element.value.toLowerCase().trim();
            } else {
                elementText = getElementBaseText(element);
            }
-        } catch (e) {
-            console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
-        }

-        if (elementText.length) {
-            // try which ones could mean its in stock
-            if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
-                console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
-                return 'Possibly in stock';
-            }
-        }
-    }
-
-    // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
-    for (let i = elementsToScan.length - 1; i >= 0; i--) {
-        const element = elementsToScan[i];
-        // outside the 'fold' or some weird text in the heading area
-        // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
-        // Note: theres also an automated test that places the 'out of stock' text fairly low down
-        if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
-            continue
-        }
-        elementText = "";
-        if (element.tagName.toLowerCase() === "input") {
-            elementText = element.value.toLowerCase().trim();
-        } else {
-            elementText = getElementBaseText(element);
-        }
-
-        if (elementText.length) {
-            // and these mean its out of stock
-            for (const outOfStockText of outOfStockTexts) {
-                if (elementText.includes(outOfStockText)) {
-                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
-                    return outOfStockText; // item is out of stock
+            if (elementText.length) {
+                // and these mean its out of stock
+                for (const outOfStockText of outOfStockTexts) {
+                    if (elementText.includes(outOfStockText)) {
+                        console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
+                        return outOfStockText; // item is out of stock
+                    }
                }
            }
        }
+
+        console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
+        return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
    }

-    console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
-    return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
-}
-
 // returns the element text that makes it think it's out of stock
-return isItemInStock().trim()
-
-
+    return isItemInStock().trim()
+}
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@ -1,285 +1,285 @@
-// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
-// All rights reserved.
+async (options) => {

-// @file Scrape the page looking for elements of concern (%ELEMENTS%)
-// http://matatk.agrip.org.uk/tests/position-and-width/
-// https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
-//
-// Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
-// will automatically force a scroll somewhere, so include the position offset
-// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
-var scroll_y = 0;
-try {
-    scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
-} catch (e) {
-    console.log(e);
-}
+    let visualselector_xpath_selectors = options.visualselector_xpath_selectors
+    let max_height = options.max_height

-
-// Include the getXpath script directly, easier than fetching
-function getxpath(e) {
-    var n = e;
-    if (n && n.id) return '//*[@id="' + n.id + '"]';
-    for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
-        for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
-        for (d = n.nextSibling; d;) {
-            if (d.nodeName === n.nodeName) {
-                r = !0;
-                break
-            }
-            d = d.nextSibling
-        }
-        o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
-    }
-    return o.length ? "/" + o.reverse().join("/") : ""
-}
-
-const findUpTag = (el) => {
-    let r = el
-    chained_css = [];
-    depth = 0;
-
-    //  Strategy 1: If it's an input, with name, and there's only one, prefer that
-    if (el.name !== undefined && el.name.length) {
-        var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
-        var proposed_element = window.document.querySelectorAll(proposed);
-        if (proposed_element.length) {
-            if (proposed_element.length === 1) {
-                return proposed;
-            } else {
-                // Some sites change ID but name= stays the same, we can hit it if we know the index
-                // Find all the elements that match and work out the input[n]
-                var n = Array.from(proposed_element).indexOf(el);
-                // Return a Playwright selector for nthinput[name=zipcode]
-                return proposed + " >> nth=" + n;
-            }
-        }
-    }
-
-    // Strategy 2: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4
-    while (r.parentNode) {
-        if (depth === 5) {
-            break;
-        }
-        if ('' !== r.id) {
-            chained_css.unshift("#" + CSS.escape(r.id));
-            final_selector = chained_css.join(' > ');
-            // Be sure theres only one, some sites have multiples of the same ID tag :-(
-            if (window.document.querySelectorAll(final_selector).length === 1) {
-                return final_selector;
-            }
-            return null;
-        } else {
-            chained_css.unshift(r.tagName.toLowerCase());
-        }
-        r = r.parentNode;
-        depth += 1;
-    }
-    return null;
-}
-
-
-// @todo - if it's SVG or IMG, go into image diff mode
-// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
-
-var size_pos = [];
-// after page fetch, inject this JS
-// build a map of all elements and their positions (maybe that only include text?)
-var bbox;
-console.log("Scanning %ELEMENTS%");
-
-function collectVisibleElements(parent, visibleElements) {
-    if (!parent) return; // Base case: if parent is null or undefined, return
-
-
-    // Add the parent itself to the visible elements array if it's of the specified types
-    const tagName = parent.tagName.toLowerCase();
-    if ("%ELEMENTS%".split(',').includes(tagName)) {
-        visibleElements.push(parent);
-    }
-
-    // Iterate over the parent's children
-    const children = parent.children;
-    for (let i = 0; i < children.length; i++) {
-        const child = children[i];
-        const computedStyle = window.getComputedStyle(child);
-
-        if (
-            child.nodeType === Node.ELEMENT_NODE &&
-            computedStyle.display !== 'none' &&
-            computedStyle.visibility !== 'hidden' &&
-            child.offsetWidth >= 0 &&
-            child.offsetHeight >= 0 &&
-            computedStyle.contentVisibility !== 'hidden'
-        ) {
-            // If the child is an element and is visible, recursively collect visible elements
-            collectVisibleElements(child, visibleElements);
-        }
-    }
-}
-
-// Create an array to hold the visible elements
-const visibleElementsArray = [];
-
-// Call collectVisibleElements with the starting parent element
-collectVisibleElements(document.body, visibleElementsArray);
-
-
-visibleElementsArray.forEach(function (element) {
-
-    bbox = element.getBoundingClientRect();
-
-    // Skip really small ones, and where width or height ==0
-    if (bbox['width'] * bbox['height'] < 10) {
-        return
-    }
-
-    // Don't include elements that are offset from canvas
-    if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
-        return
-    }
-
-    // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
-    // it should not traverse when we know we can anchor off just an ID one level up etc..
-    // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
-
-    // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
-    xpath_result = false;
+    var scroll_y = 0;
    try {
-        var d = findUpTag(element);
-        if (d) {
-            xpath_result = d;
-        }
+        scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
    } catch (e) {
        console.log(e);
    }
-    // You could swap it and default to getXpath and then try the smarter one
-    // default back to the less intelligent one
-    if (!xpath_result) {
-        try {
-            // I've seen on FB and eBay that this doesnt work
-            // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
-            xpath_result = getxpath(element);
-        } catch (e) {
-            console.log(e);
-            return
+
+// Include the getXpath script directly, easier than fetching
+    function getxpath(e) {
+        var n = e;
+        if (n && n.id) return '//*[@id="' + n.id + '"]';
+        for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
+            for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
+            for (d = n.nextSibling; d;) {
+                if (d.nodeName === n.nodeName) {
+                    r = !0;
+                    break
+                }
+                d = d.nextSibling
+            }
+            o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
+        }
+        return o.length ? "/" + o.reverse().join("/") : ""
+    }
+
+    const findUpTag = (el) => {
+        let r = el
+        chained_css = [];
+        depth = 0;
+
+        //  Strategy 1: If it's an input, with name, and there's only one, prefer that
+        if (el.name !== undefined && el.name.length) {
+            var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
+            var proposed_element = window.document.querySelectorAll(proposed);
+            if (proposed_element.length) {
+                if (proposed_element.length === 1) {
+                    return proposed;
+                } else {
+                    // Some sites change ID but name= stays the same, we can hit it if we know the index
+                    // Find all the elements that match and work out the input[n]
+                    var n = Array.from(proposed_element).indexOf(el);
+                    // Return a Playwright selector for nthinput[name=zipcode]
+                    return proposed + " >> nth=" + n;
+                }
+            }
+        }
+
+        // Strategy 2: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4
+        while (r.parentNode) {
+            if (depth === 5) {
+                break;
+            }
+            if ('' !== r.id) {
+                chained_css.unshift("#" + CSS.escape(r.id));
+                final_selector = chained_css.join(' > ');
+                // Be sure theres only one, some sites have multiples of the same ID tag :-(
+                if (window.document.querySelectorAll(final_selector).length === 1) {
+                    return final_selector;
+                }
+                return null;
+            } else {
+                chained_css.unshift(r.tagName.toLowerCase());
+            }
+            r = r.parentNode;
+            depth += 1;
+        }
+        return null;
+    }
+
+
+// @todo - if it's SVG or IMG, go into image diff mode
+
+    var size_pos = [];
+// after page fetch, inject this JS
+// build a map of all elements and their positions (maybe that only include text?)
+    var bbox;
+    console.log(`Scanning for "${visualselector_xpath_selectors}"`);
+
+    function collectVisibleElements(parent, visibleElements) {
+        if (!parent) return; // Base case: if parent is null or undefined, return
+
+
+        // Add the parent itself to the visible elements array if it's of the specified types
+        const tagName = parent.tagName.toLowerCase();
+        if (visualselector_xpath_selectors.split(',').includes(tagName)) {
+            visibleElements.push(parent);
+        }
+
+        // Iterate over the parent's children
+        const children = parent.children;
+        for (let i = 0; i < children.length; i++) {
+            const child = children[i];
+            const computedStyle = window.getComputedStyle(child);
+
+            if (
+                child.nodeType === Node.ELEMENT_NODE &&
+                computedStyle.display !== 'none' &&
+                computedStyle.visibility !== 'hidden' &&
+                child.offsetWidth >= 0 &&
+                child.offsetHeight >= 0 &&
+                computedStyle.contentVisibility !== 'hidden'
+            ) {
+                // If the child is an element and is visible, recursively collect visible elements
+                collectVisibleElements(child, visibleElements);
+            }
        }
    }

-    let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
+// Create an array to hold the visible elements
+    const visibleElementsArray = [];

-    let text = element.textContent.trim().slice(0, 30).trim();
-    while (/\n{2,}|\t{2,}/.test(text)) {
-        text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
-    }
+// Call collectVisibleElements with the starting parent element
+    collectVisibleElements(document.body, visibleElementsArray);

-    // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
-    const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
-    const computedStyle = window.getComputedStyle(element);

-    size_pos.push({
-        xpath: xpath_result,
-        width: Math.round(bbox['width']),
-        height: Math.round(bbox['height']),
-        left: Math.floor(bbox['left']),
-        top: Math.floor(bbox['top']) + scroll_y,
-        // tagName used by Browser Steps
-        tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
-        // tagtype used by Browser Steps
-        tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
-        isClickable: computedStyle.cursor === "pointer",
-        // Used by the keras trainer
-        fontSize: computedStyle.getPropertyValue('font-size'),
-        fontWeight: computedStyle.getPropertyValue('font-weight'),
-        hasDigitCurrency: hasDigitCurrency,
-        label: label,
+    visibleElementsArray.forEach(function (element) {
+
+        bbox = element.getBoundingClientRect();
+
+        // Skip really small ones, and where width or height ==0
+        if (bbox['width'] * bbox['height'] < 10) {
+            return
+        }
+
+        // Don't include elements that are offset from canvas
+        if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
+            return
+        }
+
+        // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
+        // it should not traverse when we know we can anchor off just an ID one level up etc..
+        // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
+
+        // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
+        xpath_result = false;
+        try {
+            var d = findUpTag(element);
+            if (d) {
+                xpath_result = d;
+            }
+        } catch (e) {
+            console.log(e);
+        }
+        // You could swap it and default to getXpath and then try the smarter one
+        // default back to the less intelligent one
+        if (!xpath_result) {
+            try {
+                // I've seen on FB and eBay that this doesnt work
+                // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
+                xpath_result = getxpath(element);
+            } catch (e) {
+                console.log(e);
+                return
+            }
+        }
+
+        let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
+
+        let text = element.textContent.trim().slice(0, 30).trim();
+        while (/\n{2,}|\t{2,}/.test(text)) {
+            text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
+        }
+
+        // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
+        const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6))) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text);
+        const computedStyle = window.getComputedStyle(element);
+
+        if (Math.floor(bbox['top']) + scroll_y > max_height) {
+            return
+        }
+
+        size_pos.push({
+            xpath: xpath_result,
+            width: Math.round(bbox['width']),
+            height: Math.round(bbox['height']),
+            left: Math.floor(bbox['left']),
+            top: Math.floor(bbox['top']) + scroll_y,
+            // tagName used by Browser Steps
+            tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
+            // tagtype used by Browser Steps
+            tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
+            isClickable: computedStyle.cursor === "pointer",
+            // Used by the keras trainer
+            fontSize: computedStyle.getPropertyValue('font-size'),
+            fontWeight: computedStyle.getPropertyValue('font-weight'),
+            hasDigitCurrency: hasDigitCurrency,
+            label: label,
+        });
+
    });

-});
-

 // Inject the current one set in the include_filters, which may be a CSS rule
 // used for displaying the current one in VisualSelector, where its not one we generated.
-if (include_filters.length) {
-    let results;
-    // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
-    for (const f of include_filters) {
-        bbox = false;
-        q = false;
+    if (include_filters.length) {
+        let results;
+        // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
+        for (const f of include_filters) {
+            bbox = false;
+            q = false;

-        if (!f.length) {
-            console.log("xpath_element_scraper: Empty filter, skipping");
-            continue;
-        }
-
-        try {
-            // is it xpath?
-            if (f.startsWith('/') || f.startsWith('xpath')) {
-                var qry_f = f.replace(/xpath(:|\d:)/, '')
-                console.log("[xpath] Scanning for included filter " + qry_f)
-                let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
-                results = [];
-                for (let i = 0; i < xpathResult.snapshotLength; i++) {
-                    results.push(xpathResult.snapshotItem(i));
-                }
-            } else {
-                console.log("[css] Scanning for included filter " + f)
-                console.log("[css] Scanning for included filter " + f);
-                results = document.querySelectorAll(f);
+            if (!f.length) {
+                console.log("xpath_element_scraper: Empty filter, skipping");
+                continue;
            }
-        } catch (e) {
-            // Maybe catch DOMException and alert?
-            console.log("xpath_element_scraper: Exception selecting element from filter " + f);
-            console.log(e);
-        }

-        if (results != null && results.length) {
-
-            // Iterate over the results
-            results.forEach(node => {
-                // Try to resolve //something/text() back to its /something so we can atleast get the bounding box
-                try {
-                    if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
-                        node = node.parentElement
+            try {
+                // is it xpath?
+                if (f.startsWith('/') || f.startsWith('xpath')) {
+                    var qry_f = f.replace(/xpath(:|\d:)/, '')
+                    console.log("[xpath] Scanning for included filter " + qry_f)
+                    let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+                    results = [];
+                    for (let i = 0; i < xpathResult.snapshotLength; i++) {
+                        results.push(xpathResult.snapshotItem(i));
                    }
-                } catch (e) {
-                    console.log(e)
-                    console.log("xpath_element_scraper: #text resolver")
-                }
-
-                // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
-                if (typeof node.getBoundingClientRect == 'function') {
-                    bbox = node.getBoundingClientRect();
-                    console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
                } else {
+                    console.log("[css] Scanning for included filter " + f)
+                    console.log("[css] Scanning for included filter " + f);
+                    results = document.querySelectorAll(f);
+                }
+            } catch (e) {
+                // Maybe catch DOMException and alert?
+                console.log("xpath_element_scraper: Exception selecting element from filter " + f);
+                console.log(e);
+            }
+
+            if (results != null && results.length) {
+
+                // Iterate over the results
+                results.forEach(node => {
+                    // Try to resolve //something/text() back to its /something so we can atleast get the bounding box
                    try {
-                        // Try and see we can find its ownerElement
-                        bbox = node.ownerElement.getBoundingClientRect();
-                        console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
+                        if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
+                            node = node.parentElement
+                        }
                    } catch (e) {
                        console.log(e)
-                        console.log("xpath_element_scraper: error looking up q.ownerElement")
+                        console.log("xpath_element_scraper: #text resolver")
                    }
-                }

-                if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
-                    size_pos.push({
-                        xpath: f,
-                        width: parseInt(bbox['width']),
-                        height: parseInt(bbox['height']),
-                        left: parseInt(bbox['left']),
-                        top: parseInt(bbox['top']) + scroll_y,
-                        highlight_as_custom_filter: true
-                    });
-                }
-            });
+                    // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
+                    if (typeof node.getBoundingClientRect == 'function') {
+                        bbox = node.getBoundingClientRect();
+                        console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
+                    } else {
+                        try {
+                            // Try and see we can find its ownerElement
+                            bbox = node.ownerElement.getBoundingClientRect();
+                            console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
+                        } catch (e) {
+                            console.log(e)
+                            console.log("xpath_element_scraper: error looking up q.ownerElement")
+                        }
+                    }
+
+                    if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
+                        size_pos.push({
+                            xpath: f,
+                            width: parseInt(bbox['width']),
+                            height: parseInt(bbox['height']),
+                            left: parseInt(bbox['left']),
+                            top: parseInt(bbox['top']) + scroll_y,
+                            highlight_as_custom_filter: true
+                        });
+                    }
+                });
+            }
        }
    }
-}

 // Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
 // so that we dont select the wrapping element by mistake and be unable to select what we want
-size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
+    size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
+
+// browser_width required for proper scaling in the frontend
+    // Return as a string to save playwright for juggling thousands of objects
+    return JSON.stringify({'size_pos': size_pos, 'browser_width': window.innerWidth});
+}

-// Window.width required for proper scaling in the frontend
-return {'size_pos': size_pos, 'browser_width': window.innerWidth};
--- a/changedetectionio/content_fetchers/screenshot_handler.py
+++ b/changedetectionio/content_fetchers/screenshot_handler.py
@ -0,0 +1,73 @@
+# Pages with a vertical height longer than this will use the 'stitch together' method.
+
+# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
+# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
+# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
+
+from loguru import logger
+
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
+
+
+def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
+    import os
+    import io
+    from PIL import Image, ImageDraw, ImageFont
+
+    try:
+
+        # Load images from byte chunks
+        images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
+        total_height = sum(im.height for im in images)
+        max_width = max(im.width for im in images)
+
+        # Create stitched image
+        stitched = Image.new('RGB', (max_width, total_height))
+        y_offset = 0
+        for im in images:
+            stitched.paste(im, (0, y_offset))
+            y_offset += im.height
+
+        # Draw caption on top (overlaid, not extending canvas)
+        draw = ImageDraw.Draw(stitched)
+
+
+        caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
+        padding = 10
+        font_size = 35
+        font_color = (255, 0, 0)
+        background_color = (255, 255, 255)
+
+
+        # Try to load a proper font
+        try:
+            font = ImageFont.truetype("arial.ttf", font_size)
+        except IOError:
+            font = ImageFont.load_default()
+
+        bbox = draw.textbbox((0, 0), caption_text, font=font)
+        text_width = bbox[2] - bbox[0]
+        text_height = bbox[3] - bbox[1]
+
+        # Draw white rectangle background behind text
+        rect_top = 0
+        rect_bottom = text_height + 2 * padding
+        draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
+
+        # Draw text centered horizontally, 10px padding from top of the rectangle
+        text_x = (max_width - text_width) // 2
+        text_y = padding
+        draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
+
+        # Encode and send image
+        output = io.BytesIO()
+        stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
+        pipe_conn.send_bytes(output.getvalue())
+
+        stitched.close()
+    except Exception as e:
+        pipe_conn.send(f"error:{e}")
+    finally:
+        pipe_conn.close()
+
+
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@ -394,7 +394,7 @@ def changedetection_app(config=None, datastore_o=None):
                    response.headers['Content-Type'] = 'application/json'
                    response.headers['Content-Encoding'] = 'deflate'
                else:
-                    logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.')
+                    logger.error(f'Request elements.deflate at "{watch_directory}" but was not found.')
                    abort(404)

                if response:
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@ -553,7 +553,10 @@ class model(watch_base):
        self.ensure_data_dir_exists()

        with open(target_path, 'wb') as f:
-            f.write(zlib.compress(json.dumps(data).encode()))
+            if not isinstance(data, str):
+                f.write(zlib.compress(json.dumps(data).encode()))
+            else:
+                f.write(zlib.compress(data.encode()))
            f.close()

    # Save as PNG, PNG is larger but better for doing visual diff in the future
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -592,6 +592,7 @@ class update_worker(threading.Thread):

                self.current_uuid = None  # Done
                self.q.task_done()
+                update_handler = None
                logger.debug(f"Watch {uuid} done in {time.time()-fetch_start_time:.2f}s")

                # Give the CPU time to interrupt