diff --git a/Dockerfile b/Dockerfile index 9aac044b..34e9703f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,7 +68,7 @@ COPY changedetection.py /app/changedetection.py # Github Action test purpose(test-only.yml). # On production, it is effectively LOGGER_LEVEL=''. ARG LOGGER_LEVEL='' -ENV LOGGER_LEVEL "$LOGGER_LEVEL" +ENV LOGGER_LEVEL="$LOGGER_LEVEL" WORKDIR /app CMD ["python", "./changedetection.py", "-d", "/datastore"] diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py index 0f6ac006..b3509b13 100644 --- a/changedetectionio/blueprint/ui/edit.py +++ b/changedetectionio/blueprint/ui/edit.py @@ -227,9 +227,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe if request.method == 'POST' and not form.validate(): flash("An error occurred, please see below.", "error") - visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid) - - # JQ is difficult to install on windows and must be manually added (outside requirements.txt) jq_support = True try: @@ -239,11 +236,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe watch = datastore.data['watching'].get(uuid) + # if system or watch is configured to need a chrome type browser system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' - - watch_uses_webdriver = False + watch_needs_selenium_or_playwright = False if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): - watch_uses_webdriver = True + watch_needs_selenium_or_playwright = True + from zoneinfo import available_timezones @@ -265,11 +263,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe 'lev_info': levenshtein_ratio_recent_history(watch), 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), 'settings_application': datastore.data['settings']['application'], + 'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'), + 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), + 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), 'using_global_webdriver_wait': not default['webdriver_delay'], 'uuid': uuid, 'watch': watch, - 'watch_uses_webdriver': watch_uses_webdriver, + 'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright, } included_content = None diff --git a/changedetectionio/content_fetchers/webdriver_selenium.py b/changedetectionio/content_fetchers/webdriver_selenium.py index 71c5df03..1aafe7cf 100644 --- a/changedetectionio/content_fetchers/webdriver_selenium.py +++ b/changedetectionio/content_fetchers/webdriver_selenium.py @@ -65,7 +65,17 @@ class fetcher(Fetcher): # request_body, request_method unused for now, until some magic in the future happens. options = ChromeOptions() - options.add_argument("--headless") + + # Load Chrome options from env + CHROME_OPTIONS = [ + line.strip() + for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines() + if line.strip() + ] + + for opt in CHROME_OPTIONS: + options.add_argument(opt) + if self.proxy: options.proxy = self.proxy @@ -80,7 +90,9 @@ class fetcher(Fetcher): self.quit() raise - self.driver.set_window_size(1280, 1024) + if not "--window-size" in os.getenv("CHROME_OPTIONS", ""): + self.driver.set_window_size(1280, 1024) + self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) if self.webdriver_js_execute_code is not None: @@ -88,6 +100,7 @@ class fetcher(Fetcher): # Selenium doesn't automatically wait for actions as good as Playwright, so wait again self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) + # @todo - how to check this? is it possible? self.status_code = 200 # @todo somehow we should try to get this working for WebDriver diff --git a/changedetectionio/templates/_helpers.html b/changedetectionio/templates/_helpers.html index 669755ee..b7a8cc41 100644 --- a/changedetectionio/templates/_helpers.html +++ b/changedetectionio/templates/_helpers.html @@ -98,15 +98,13 @@ {% macro playwright_warning() %} -

Error - Playwright support for Chrome based fetching is not enabled. Alternatively try our very affordable subscription based service which has all this setup for you.

+

Error - This watch needs Chrome (with playwright/sockpuppetbrowser), but Chrome based fetching is not enabled. Alternatively try our very affordable subscription based service which has all this setup for you.

You may need to Enable playwright environment variable and uncomment the sockpuppetbrowser in the docker-compose.yml file.


-

(Also Selenium/WebDriver can not extract full page screenshots reliably so Playwright is recommended here)

- {% endmacro %} -{% macro only_webdriver_type_watches_warning() %} -

Sorry, this functionality only works with Playwright/Chrome enabled watches.
You need to Set the fetch method to Playwright/Chrome mode and resave and have the Playwright connection enabled.


+{% macro only_playwright_type_watches_warning() %} +

Sorry, this functionality only works with Playwright/Chrome enabled watches.
You need to Set the fetch method to Playwright/Chrome mode and resave and have the SockpuppetBrowser/Playwright or Selenium enabled.


{% endmacro %} {% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %} diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 925af81d..08666cc2 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -1,6 +1,6 @@ {% extends 'base.html' %} {% block content %} -{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %} +{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %} {% from '_common_fields.html' import render_common_settings_form %} @@ -204,7 +204,9 @@ Math: {{ 1 + 1 }}") }}
- {% if playwright_enabled and watch_uses_webdriver %} + {% if watch_needs_selenium_or_playwright %} + {# Only works with playwright #} + {% if system_has_playwright_configured %}
@@ -223,7 +225,6 @@ Math: {{ 1 + 1 }}") }}
-
@@ -245,15 +246,16 @@ Math: {{ 1 + 1 }}") }}
{% else %} - - {% if not watch_uses_webdriver %} - {{ only_webdriver_type_watches_warning() }} - {% endif %} - {% if not playwright_enabled %} - {{ playwright_warning() }} - {% endif %} - + {# it's configured to use selenium or chrome but system says its not configured #} + {{ playwright_warning() }} + {% if system_has_webdriver_configured %} + Selenium/Webdriver cant be used here because it wont fetch screenshots reliably. + {% endif %} {% endif %} + {% else %} + {# "This functionality needs chrome.." #} + {{ only_playwright_type_watches_warning() }} + {% endif %}
@@ -379,7 +381,9 @@ Math: {{ 1 + 1 }}") }}
- {% if playwright_enabled and watch_uses_webdriver %} + {% if watch_needs_selenium_or_playwright %} + {% if system_has_playwright_configured %} + {% if visual_selector_data_ready %} The Visual Selector tool lets you select the text elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the Filters & Triggers tab. Use Shift+Click to select multiple items. @@ -396,14 +400,20 @@ Math: {{ 1 + 1 }}") }}
Currently: Loading...
+ {% else %} + Error, The Visual selector data is not ready, it needs to complete atleast one fetch, please queue the item and reload. + {% endif %} {% else %} - {% if not watch_uses_webdriver %} - {{ only_webdriver_type_watches_warning() }} - {% endif %} - {% if not playwright_enabled %} - {{ playwright_warning() }} - {% endif %} + {# The watch needed chrome but system says that playwright is not ready #} + {{ playwright_warning() }} {% endif %} + {% if system_has_webdriver_configured %} + Selenium/Webdriver cant be used here because it wont fetch screenshots reliably. + {% endif %} + {% else %} + {# "This functionality needs chrome.." #} + {{ only_playwright_type_watches_warning() }} + {% endif %}
diff --git a/docker-compose.yml b/docker-compose.yml index 370709e7..9899ee19 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,15 +9,20 @@ services: # - ./proxies.json:/datastore/proxies.json # environment: - # Default listening port, can also be changed with the -p option + # Default listening port, can also be changed with the -p option (not to be confused with ports: below) # - PORT=5000 # # Log levels are in descending order. (TRACE is the most detailed one) # Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL # - LOGGER_LEVEL=TRACE # - # Alternative WebDriver/selenium URL, do not use "'s or 's! - # - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub + # + # Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol) + # - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000 + # + # + # Alternative WebDriver/selenium URL, do not use "'s or 's! (old, deprecated, does not support screenshots very well) + # - WEBDRIVER_URL=http://browser-selenium-chrome:4444/wd/hub # # WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy, # webdriver_proxyAutoconfigUrl, webdriver_autodetect, @@ -25,9 +30,6 @@ services: # # https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy # - # Alternative target "Chrome" Playwright URL, do not use "'s or 's! - # "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser. - # - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 # # Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password # @@ -86,8 +88,8 @@ services: # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages. # RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container -# sockpuppetbrowser: -# hostname: sockpuppetbrowser +# browser-sockpuppet-chrome: +# hostname: browser-sockpuppet-chrome # image: dgtlmoon/sockpuppetbrowser:latest # cap_add: # - SYS_ADMIN @@ -102,14 +104,18 @@ services: # Used for fetching pages via Playwright+Chrome where you need Javascript support. # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) # Does not report status codes (200, 404, 403) and other issues -# browser-chrome: -# hostname: browser-chrome +# browser-selenium-chrome: +# hostname: browser-selenium-chrome # image: selenium/standalone-chrome:4 # environment: # - VNC_NO_PASSWORD=1 # - SCREEN_WIDTH=1920 # - SCREEN_HEIGHT=1080 # - SCREEN_DEPTH=24 +# CHROME_OPTIONS: | +# --window-size=1280,1024 +# --headless +# --disable-gpu # volumes: # # Workaround to avoid the browser crashing inside a docker container # # See https://github.com/SeleniumHQ/docker-selenium#quick-start