diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 14384edd..c3b4971c 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -472,12 +472,18 @@ class base_html_playwright(Fetcher): browsersteps_interface = steppable_browser_interface() browsersteps_interface.page = self.page - try: - response = browsersteps_interface.action_goto_url(value=url) + response = browsersteps_interface.action_goto_url(value=url) + self.headers = response.all_headers() + if response is None: + context.close() + browser.close() + print("Content Fetcher > Response object was none") + raise EmptyReply(url=url, status_code=None) + + try: if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) - except playwright._impl._api_types.TimeoutError as e: context.close() browser.close() @@ -489,31 +495,24 @@ class base_html_playwright(Fetcher): browser.close() raise PageUnloadable(url=url, status_code=None, message=str(e)) - if response is None: - context.close() - browser.close() - print("Content Fetcher > Response object was none") - raise EmptyReply(url=url, status_code=None) - - extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay - self.page.wait_for_timeout(extra_wait * 1000) - - # Run Browser Steps here - self.iterate_browser_steps() - extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay self.page.wait_for_timeout(extra_wait * 1000) self.content = self.page.content() self.status_code = response.status + + if self.status_code != 200 and not ignore_status_codes: + raise Non200ErrorCodeReceived(url=url, status_code=self.status_code) + if len(self.page.content().strip()) == 0: context.close() browser.close() print("Content Fetcher > Content was empty") raise EmptyReply(url=url, status_code=response.status) - self.status_code = response.status - self.headers = response.all_headers() + # Run Browser Steps here + self.iterate_browser_steps() + self.page.wait_for_timeout(extra_wait * 1000) # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) if current_include_filters is not None: @@ -539,7 +538,7 @@ class base_html_playwright(Fetcher): except Exception as e: context.close() browser.close() - raise ScreenshotUnavailable(url=url, status_code=None) + raise ScreenshotUnavailable(url=url, status_code=response.status_code) context.close() browser.close() diff --git a/changedetectionio/tests/visualselector/test_fetch_data.py b/changedetectionio/tests/visualselector/test_fetch_data.py index 08180bb0..de8112dd 100644 --- a/changedetectionio/tests/visualselector/test_fetch_data.py +++ b/changedetectionio/tests/visualselector/test_fetch_data.py @@ -1,18 +1,19 @@ #!/usr/bin/python3 import time +import os from flask import url_for from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client +def test_setup(client, live_server): + live_server_setup(live_server) + # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready def test_visual_selector_content_ready(client, live_server): import os import json assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" - time.sleep(1) - live_server_setup(live_server) - # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url test_url = "https://changedetection.io/ci-test/test-runjs.html" @@ -60,4 +61,67 @@ def test_visual_selector_content_ready(client, live_server): follow_redirects=True ) assert b'notification_screenshot' in res.data + client.get( + url_for("form_delete", uuid="all"), + follow_redirects=True + ) +def test_basic_browserstep(client, live_server): + + assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" + #live_server_setup(live_server) + + # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url + test_url = "https://changedetection.io/ci-test/test-runjs.html" + + res = client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, + follow_redirects=True + ) + assert b"Watch added in Paused state, saving will unpause" in res.data + + res = client.post( + url_for("edit_page", uuid="first", unpause_on_save=1), + data={ + "url": test_url, + "tags": "", + "headers": "", + 'fetch_backend': "html_webdriver", + 'browser_steps-0-operation': 'Goto site', + 'browser_steps-1-operation': 'Click element', + 'browser_steps-1-selector': 'button[name=test-button]', + 'browser_steps-1-value': '' + }, + follow_redirects=True + ) + assert b"unpaused" in res.data + wait_for_all_checks(client) + uuid = extract_UUID_from_client(client) + + + # now test for 404 errors + res = client.post( + url_for("edit_page", uuid=uuid, unpause_on_save=1), + data={ + "url": "https://changedetection.io/404", + "tags": "", + "headers": "", + 'fetch_backend': "html_webdriver", + 'browser_steps-0-operation': 'Goto site', + 'browser_steps-1-operation': 'Click element', + 'browser_steps-1-selector': 'button[name=test-button]', + 'browser_steps-1-value': '' + }, + follow_redirects=True + ) + assert b"unpaused" in res.data + wait_for_all_checks(client) + + res = client.get(url_for("index")) + assert b'Error - 404' in res.data + + client.get( + url_for("form_delete", uuid="all"), + follow_redirects=True + ) \ No newline at end of file