kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Custom headers fix in Browser Steps and Playwright/Puppeteer fetchers ( #2197 )
rodzic
52c895b2e8
commit
14e632bc19
|
@ -72,7 +72,11 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
# Playwright via Sockpuppetbrowser fetch
|
# Playwright via Sockpuppetbrowser fetch
|
||||||
# tests/visualselector/test_fetch_data.py will do browser steps
|
# tests/visualselector/test_fetch_data.py will do browser steps
|
||||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||||
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||||
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||||
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||||
|
|
||||||
|
|
||||||
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
||||||
run: |
|
run: |
|
||||||
|
@ -87,8 +91,11 @@ jobs:
|
||||||
# STRAIGHT TO CDP
|
# STRAIGHT TO CDP
|
||||||
- name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
|
- name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
|
||||||
run: |
|
run: |
|
||||||
# Playwright via Sockpuppetbrowser fetch
|
# Playwright via Sockpuppetbrowser fetch
|
||||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" -e "FAST_PUPPETEER_CHROME_FETCHER=True" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||||
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||||
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||||
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||||
|
|
||||||
- name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
|
- name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -6,6 +6,8 @@ import re
|
||||||
from random import randint
|
from random import randint
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||||
|
|
||||||
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
||||||
# 0- off, 1- on
|
# 0- off, 1- on
|
||||||
browser_step_ui_config = {'Choose one': '0 0',
|
browser_step_ui_config = {'Choose one': '0 0',
|
||||||
|
@ -178,6 +180,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||||
stale = False
|
stale = False
|
||||||
# bump and kill this if idle after X sec
|
# bump and kill this if idle after X sec
|
||||||
age_start = 0
|
age_start = 0
|
||||||
|
headers = {}
|
||||||
|
|
||||||
# use a special driver, maybe locally etc
|
# use a special driver, maybe locally etc
|
||||||
command_executor = os.getenv(
|
command_executor = os.getenv(
|
||||||
|
@ -192,7 +195,8 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||||
|
|
||||||
browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||||
|
|
||||||
def __init__(self, playwright_browser, proxy=None):
|
def __init__(self, playwright_browser, proxy=None, headers=None):
|
||||||
|
self.headers = headers or {}
|
||||||
self.age_start = time.time()
|
self.age_start = time.time()
|
||||||
self.playwright_browser = playwright_browser
|
self.playwright_browser = playwright_browser
|
||||||
if self.context is None:
|
if self.context is None:
|
||||||
|
@ -206,16 +210,17 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||||
|
|
||||||
# @todo handle multiple contexts, bind a unique id from the browser on each req?
|
# @todo handle multiple contexts, bind a unique id from the browser on each req?
|
||||||
self.context = self.playwright_browser.new_context(
|
self.context = self.playwright_browser.new_context(
|
||||||
# @todo
|
accept_downloads=False, # Should never be needed
|
||||||
# user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
|
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
||||||
# proxy=self.proxy,
|
extra_http_headers=self.headers,
|
||||||
# This is needed to enable JavaScript execution on GitHub and others
|
ignore_https_errors=True,
|
||||||
bypass_csp=True,
|
proxy=proxy,
|
||||||
# Should never be needed
|
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
|
||||||
accept_downloads=False,
|
# Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
|
||||||
proxy=proxy
|
user_agent=manage_user_agent(headers=self.headers),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
self.page = self.context.new_page()
|
self.page = self.context.new_page()
|
||||||
|
|
||||||
# self.page.set_default_navigation_timeout(keep_open)
|
# self.page.set_default_navigation_timeout(keep_open)
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
from playwright.sync_api import PlaywrightContextManager
|
from playwright.sync_api import PlaywrightContextManager
|
||||||
import asyncio
|
|
||||||
|
|
||||||
# So playwright wants to run as a context manager, but we do something horrible and hacky
|
# So playwright wants to run as a context manager, but we do something horrible and hacky
|
||||||
# we are holding the session open for as long as possible, then shutting it down, and opening a new one
|
# we are holding the session open for as long as possible, then shutting it down, and opening a new one
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import sys
|
import sys
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
|
from loguru import logger
|
||||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
|
from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
@ -29,10 +29,15 @@ def available_fetchers():
|
||||||
# rather than site-specific.
|
# rather than site-specific.
|
||||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||||
if use_playwright_as_chrome_fetcher:
|
if use_playwright_as_chrome_fetcher:
|
||||||
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
|
# @note - For now, browser steps always uses playwright
|
||||||
|
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) or False:
|
||||||
|
logger.debug('Using Playwright library as fetcher')
|
||||||
from .playwright import fetcher as html_webdriver
|
from .playwright import fetcher as html_webdriver
|
||||||
else:
|
else:
|
||||||
|
logger.debug('Using direct Python Puppeteer library as fetcher')
|
||||||
from .puppeteer import fetcher as html_webdriver
|
from .puppeteer import fetcher as html_webdriver
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
logger.debug("Falling back to selenium as fetcher")
|
||||||
from .webdriver_selenium import fetcher as html_webdriver
|
from .webdriver_selenium import fetcher as html_webdriver
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,40 @@ from loguru import logger
|
||||||
from changedetectionio.content_fetchers import BrowserStepsStepException
|
from changedetectionio.content_fetchers import BrowserStepsStepException
|
||||||
|
|
||||||
|
|
||||||
|
def manage_user_agent(headers, current_ua=''):
|
||||||
|
"""
|
||||||
|
Basic setting of user-agent
|
||||||
|
|
||||||
|
NOTE!!!!!! The service that does the actual Chrome fetching should handle any anti-robot techniques
|
||||||
|
THERE ARE MANY WAYS THAT IT CAN BE DETECTED AS A ROBOT!!
|
||||||
|
This does not take care of
|
||||||
|
- Scraping of 'navigator' (platform, productSub, vendor, oscpu etc etc) browser object (navigator.appVersion) etc
|
||||||
|
- TCP/IP fingerprint JA3 etc
|
||||||
|
- Graphic rendering fingerprinting
|
||||||
|
- Your IP being obviously in a pool of bad actors
|
||||||
|
- Too many requests
|
||||||
|
- Scraping of SCH-UA browser replies (thanks google!!)
|
||||||
|
- Scraping of ServiceWorker, new window calls etc
|
||||||
|
|
||||||
|
See https://filipvitas.medium.com/how-to-set-user-agent-header-with-puppeteer-js-and-not-fail-28c7a02165da
|
||||||
|
Puppeteer requests https://github.com/dgtlmoon/pyppeteerstealth
|
||||||
|
|
||||||
|
:param page:
|
||||||
|
:param headers:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
# Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default
|
||||||
|
ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None)
|
||||||
|
if ua_in_custom_headers:
|
||||||
|
return ua_in_custom_headers
|
||||||
|
|
||||||
|
if not ua_in_custom_headers and current_ua:
|
||||||
|
current_ua = current_ua.replace('HeadlessChrome', 'Chrome')
|
||||||
|
return current_ua
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Fetcher():
|
class Fetcher():
|
||||||
browser_connection_is_custom = None
|
browser_connection_is_custom = None
|
||||||
browser_connection_url = None
|
browser_connection_url = None
|
||||||
|
|
|
@ -3,7 +3,8 @@ import os
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from changedetectionio.content_fetchers.base import Fetcher
|
|
||||||
|
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||||
|
|
||||||
class fetcher(Fetcher):
|
class fetcher(Fetcher):
|
||||||
|
@ -102,19 +103,16 @@ class fetcher(Fetcher):
|
||||||
# Set user agent to prevent Cloudflare from blocking the browser
|
# Set user agent to prevent Cloudflare from blocking the browser
|
||||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||||
context = browser.new_context(
|
context = browser.new_context(
|
||||||
user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
|
accept_downloads=False, # Should never be needed
|
||||||
|
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
||||||
|
extra_http_headers=request_headers,
|
||||||
|
ignore_https_errors=True,
|
||||||
proxy=self.proxy,
|
proxy=self.proxy,
|
||||||
# This is needed to enable JavaScript execution on GitHub and others
|
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'), # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
|
||||||
bypass_csp=True,
|
user_agent=manage_user_agent(headers=request_headers),
|
||||||
# Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
|
|
||||||
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
|
|
||||||
# Should never be needed
|
|
||||||
accept_downloads=False
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.page = context.new_page()
|
self.page = context.new_page()
|
||||||
if len(request_headers):
|
|
||||||
context.set_extra_http_headers(request_headers)
|
|
||||||
|
|
||||||
# Listen for all console events and handle errors
|
# Listen for all console events and handle errors
|
||||||
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||||
|
|
|
@ -5,7 +5,8 @@ import websockets.exceptions
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from changedetectionio.content_fetchers.base import Fetcher
|
|
||||||
|
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
|
||||||
|
|
||||||
|
|
||||||
|
@ -100,10 +101,11 @@ class fetcher(Fetcher):
|
||||||
else:
|
else:
|
||||||
self.page = await browser.newPage()
|
self.page = await browser.newPage()
|
||||||
|
|
||||||
|
await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent')))
|
||||||
|
|
||||||
await self.page.setBypassCSP(True)
|
await self.page.setBypassCSP(True)
|
||||||
if request_headers:
|
if request_headers:
|
||||||
await self.page.setExtraHTTPHeaders(request_headers)
|
await self.page.setExtraHTTPHeaders(request_headers)
|
||||||
# @todo check user-agent worked
|
|
||||||
|
|
||||||
# SOCKS5 with authentication is not supported (yet)
|
# SOCKS5 with authentication is not supported (yet)
|
||||||
# https://github.com/microsoft/playwright/issues/10567
|
# https://github.com/microsoft/playwright/issues/10567
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
import os
|
||||||
|
from flask import url_for
|
||||||
|
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
|
|
||||||
|
|
||||||
|
def test_execute_custom_js(client, live_server):
|
||||||
|
|
||||||
|
live_server_setup(live_server)
|
||||||
|
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||||
|
|
||||||
|
test_url = url_for('test_interactive_html_endpoint', _external=True)
|
||||||
|
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||||
|
test_url = test_url.replace('localhost', 'cdio')
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first", unpause_on_save=1),
|
||||||
|
data={
|
||||||
|
"url": test_url,
|
||||||
|
"tags": "",
|
||||||
|
'fetch_backend': "html_webdriver",
|
||||||
|
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();',
|
||||||
|
'headers': "testheader: yes\buser-agent: MyCustomAgent",
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"unpaused" in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
uuid = extract_UUID_from_client(client)
|
||||||
|
assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)"
|
||||||
|
|
||||||
|
assert b"This text should be removed" not in res.data
|
||||||
|
|
||||||
|
# Check HTML conversion detected and workd
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid=uuid),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"This text should be removed" not in res.data
|
||||||
|
assert b"I smell JavaScript because the button was pressed" in res.data
|
||||||
|
|
||||||
|
assert b"testheader: yes" in res.data
|
||||||
|
assert b"user-agent: mycustomagent" in res.data
|
||||||
|
|
||||||
|
client.get(
|
||||||
|
url_for("form_delete", uuid="all"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
|
@ -29,7 +29,8 @@ def test_fetch_pdf(client, live_server):
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
assert b'PDF-1.5' not in res.data
|
# PDF header should not be there (it was converted to text)
|
||||||
|
assert b'PDF' not in res.data[:10]
|
||||||
assert b'hello world' in res.data
|
assert b'hello world' in res.data
|
||||||
|
|
||||||
# So we know if the file changes in other ways
|
# So we know if the file changes in other ways
|
||||||
|
|
|
@ -242,5 +242,28 @@ def live_server_setup(live_server):
|
||||||
resp.headers['Content-Type'] = 'application/pdf'
|
resp.headers['Content-Type'] = 'application/pdf'
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
@live_server.app.route('/test-interactive-html-endpoint')
|
||||||
|
def test_interactive_html_endpoint():
|
||||||
|
header_text=""
|
||||||
|
for k,v in request.headers.items():
|
||||||
|
header_text += f"{k}: {v}<br>"
|
||||||
|
|
||||||
|
resp = make_response(f"""
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
Primitive JS check for <pre>changedetectionio/tests/visualselector/test_fetch_data.py</pre>
|
||||||
|
<p id="remove">This text should be removed</p>
|
||||||
|
<form onsubmit="event.preventDefault();">
|
||||||
|
<!-- obfuscated text so that we dont accidentally get a false positive due to conversion of the source :) --->
|
||||||
|
<button name="test-button" onclick="getElementById('remove').remove();getElementById('some-content').innerHTML = atob('SSBzbWVsbCBKYXZhU2NyaXB0IGJlY2F1c2UgdGhlIGJ1dHRvbiB3YXMgcHJlc3NlZCE=')">Click here</button>
|
||||||
|
<div id=some-content></div>
|
||||||
|
<pre>
|
||||||
|
{header_text.lower()}
|
||||||
|
</pre>
|
||||||
|
</body>
|
||||||
|
</html>""", 200)
|
||||||
|
resp.headers['Content-Type'] = 'text/html'
|
||||||
|
return resp
|
||||||
|
|
||||||
live_server.start()
|
live_server.start()
|
||||||
|
|
||||||
|
|
|
@ -7,15 +7,19 @@ from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_cli
|
||||||
def test_setup(client, live_server):
|
def test_setup(client, live_server):
|
||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
|
||||||
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||||
def test_visual_selector_content_ready(client, live_server):
|
def test_visual_selector_content_ready(client, live_server):
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
|
||||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||||
|
|
||||||
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
||||||
test_url = "https://changedetection.io/ci-test/test-runjs.html"
|
test_url = url_for('test_interactive_html_endpoint', _external=True)
|
||||||
|
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||||
|
test_url = test_url.replace('localhost', 'cdio')
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("form_quick_watch_add"),
|
url_for("form_quick_watch_add"),
|
||||||
|
@ -23,28 +27,31 @@ def test_visual_selector_content_ready(client, live_server):
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||||
|
uuid = extract_UUID_from_client(client)
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first", unpause_on_save=1),
|
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||||
data={
|
data={
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
# For now, cookies doesnt work in headers because it must be a full cookiejar object
|
||||||
'fetch_backend': "html_webdriver",
|
'headers': "testheader: yes\buser-agent: MyCustomAgent",
|
||||||
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
|
'fetch_backend': "html_webdriver",
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"unpaused" in res.data
|
assert b"unpaused" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
uuid = extract_UUID_from_client(client)
|
|
||||||
|
|
||||||
# Check the JS execute code before extract worked
|
|
||||||
|
assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)"
|
||||||
|
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("preview_page", uuid="first"),
|
url_for("preview_page", uuid=uuid),
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b'I smell JavaScript' in res.data
|
assert b"testheader: yes" in res.data
|
||||||
|
assert b"user-agent: mycustomagent" in res.data
|
||||||
|
|
||||||
|
|
||||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
||||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
||||||
|
@ -74,30 +81,33 @@ def test_visual_selector_content_ready(client, live_server):
|
||||||
|
|
||||||
def test_basic_browserstep(client, live_server):
|
def test_basic_browserstep(client, live_server):
|
||||||
|
|
||||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
|
||||||
#live_server_setup(live_server)
|
#live_server_setup(live_server)
|
||||||
|
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||||
|
|
||||||
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
test_url = url_for('test_interactive_html_endpoint', _external=True)
|
||||||
test_url = "https://changedetection.io/ci-test/test-runjs.html"
|
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||||
|
test_url = test_url.replace('localhost', 'cdio')
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("form_quick_watch_add"),
|
url_for("form_quick_watch_add"),
|
||||||
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first", unpause_on_save=1),
|
url_for("edit_page", uuid="first", unpause_on_save=1),
|
||||||
data={
|
data={
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
'fetch_backend': "html_webdriver",
|
||||||
'fetch_backend': "html_webdriver",
|
'browser_steps-0-operation': 'Goto site',
|
||||||
'browser_steps-0-operation': 'Goto site',
|
'browser_steps-1-operation': 'Click element',
|
||||||
'browser_steps-1-operation': 'Click element',
|
'browser_steps-1-selector': 'button[name=test-button]',
|
||||||
'browser_steps-1-selector': 'button[name=test-button]',
|
'browser_steps-1-optional_value': '',
|
||||||
'browser_steps-1-optional_value': ''
|
# For now, cookies doesnt work in headers because it must be a full cookiejar object
|
||||||
|
'headers': "testheader: yes\buser-agent: MyCustomAgent",
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
@ -105,6 +115,9 @@ def test_basic_browserstep(client, live_server):
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
uuid = extract_UUID_from_client(client)
|
uuid = extract_UUID_from_client(client)
|
||||||
|
assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)"
|
||||||
|
|
||||||
|
assert b"This text should be removed" not in res.data
|
||||||
|
|
||||||
# Check HTML conversion detected and workd
|
# Check HTML conversion detected and workd
|
||||||
res = client.get(
|
res = client.get(
|
||||||
|
@ -114,13 +127,19 @@ def test_basic_browserstep(client, live_server):
|
||||||
assert b"This text should be removed" not in res.data
|
assert b"This text should be removed" not in res.data
|
||||||
assert b"I smell JavaScript because the button was pressed" in res.data
|
assert b"I smell JavaScript because the button was pressed" in res.data
|
||||||
|
|
||||||
|
assert b"testheader: yes" in res.data
|
||||||
|
assert b"user-agent: mycustomagent" in res.data
|
||||||
|
|
||||||
|
four_o_four_url = url_for('test_endpoint', status_code=404, _external=True)
|
||||||
|
four_o_four_url = four_o_four_url.replace('localhost.localdomain', 'cdio')
|
||||||
|
four_o_four_url = four_o_four_url.replace('localhost', 'cdio')
|
||||||
|
|
||||||
# now test for 404 errors
|
# now test for 404 errors
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||||
data={
|
data={
|
||||||
"url": "https://changedetection.io/404",
|
"url": four_o_four_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
|
||||||
'fetch_backend': "html_webdriver",
|
'fetch_backend': "html_webdriver",
|
||||||
'browser_steps-0-operation': 'Goto site',
|
'browser_steps-0-operation': 'Goto site',
|
||||||
'browser_steps-1-operation': 'Click element',
|
'browser_steps-1-operation': 'Click element',
|
||||||
|
|
Ładowanie…
Reference in New Issue