kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Fetching - Custom browser on experimental/puppeteer fetcher - Don't switch to custom puppeteer mode if external browser URL is active (#2068)
rodzic
3d1e1025d2
commit
273bd45ad7
|
@ -91,19 +91,20 @@ class ReplyWithContentButNoText(Exception):
|
||||||
|
|
||||||
|
|
||||||
class Fetcher():
|
class Fetcher():
|
||||||
|
browser_connection_is_custom = None
|
||||||
|
browser_connection_url = None
|
||||||
browser_steps = None
|
browser_steps = None
|
||||||
browser_steps_screenshot_path = None
|
browser_steps_screenshot_path = None
|
||||||
content = None
|
content = None
|
||||||
error = None
|
error = None
|
||||||
fetcher_description = "No description"
|
fetcher_description = "No description"
|
||||||
browser_connection_url = None
|
|
||||||
headers = {}
|
headers = {}
|
||||||
|
instock_data = None
|
||||||
|
instock_data_js = ""
|
||||||
status_code = None
|
status_code = None
|
||||||
webdriver_js_execute_code = None
|
webdriver_js_execute_code = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
xpath_element_js = ""
|
xpath_element_js = ""
|
||||||
instock_data = None
|
|
||||||
instock_data_js = ""
|
|
||||||
|
|
||||||
# Will be needed in the future by the VisualSelector, always get this where possible.
|
# Will be needed in the future by the VisualSelector, always get this where possible.
|
||||||
screenshot = False
|
screenshot = False
|
||||||
|
@ -252,16 +253,19 @@ class base_html_playwright(Fetcher):
|
||||||
|
|
||||||
proxy = None
|
proxy = None
|
||||||
|
|
||||||
def __init__(self, proxy_override=None, browser_connection_url=None):
|
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||||
|
|
||||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
if custom_browser_connection_url:
|
||||||
if not browser_connection_url:
|
self.browser_connection_is_custom = True
|
||||||
self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')
|
self.browser_connection_url = custom_browser_connection_url
|
||||||
else:
|
else:
|
||||||
self.browser_connection_url = browser_connection_url
|
# Fallback to fetching from system
|
||||||
|
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||||
|
self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')
|
||||||
|
|
||||||
|
|
||||||
# If any proxy settings are enabled, then we should setup the proxy object
|
# If any proxy settings are enabled, then we should setup the proxy object
|
||||||
proxy_args = {}
|
proxy_args = {}
|
||||||
|
@ -421,8 +425,10 @@ class base_html_playwright(Fetcher):
|
||||||
current_include_filters=None,
|
current_include_filters=None,
|
||||||
is_binary=False):
|
is_binary=False):
|
||||||
|
|
||||||
|
|
||||||
# For now, USE_EXPERIMENTAL_PUPPETEER_FETCH is not supported by watches with BrowserSteps (for now!)
|
# For now, USE_EXPERIMENTAL_PUPPETEER_FETCH is not supported by watches with BrowserSteps (for now!)
|
||||||
if not self.browser_steps and os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'):
|
# browser_connection_is_custom doesnt work with puppeteer style fetch (use playwright native too in this case)
|
||||||
|
if not self.browser_connection_is_custom and not self.browser_steps and os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'):
|
||||||
if strtobool(os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH')):
|
if strtobool(os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH')):
|
||||||
# Temporary backup solution until we rewrite the playwright code
|
# Temporary backup solution until we rewrite the playwright code
|
||||||
return self.run_fetch_browserless_puppeteer(
|
return self.run_fetch_browserless_puppeteer(
|
||||||
|
@ -569,15 +575,16 @@ class base_html_webdriver(Fetcher):
|
||||||
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
|
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
|
||||||
proxy = None
|
proxy = None
|
||||||
|
|
||||||
def __init__(self, proxy_override=None, browser_connection_url=None):
|
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
||||||
|
|
||||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||||
if not browser_connection_url:
|
if not custom_browser_connection_url:
|
||||||
self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
|
self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
|
||||||
else:
|
else:
|
||||||
self.browser_connection_url = browser_connection_url
|
self.browser_connection_is_custom = True
|
||||||
|
self.browser_connection_url = custom_browser_connection_url
|
||||||
|
|
||||||
# If any proxy settings are enabled, then we should setup the proxy object
|
# If any proxy settings are enabled, then we should setup the proxy object
|
||||||
proxy_args = {}
|
proxy_args = {}
|
||||||
|
@ -674,7 +681,7 @@ class base_html_webdriver(Fetcher):
|
||||||
class html_requests(Fetcher):
|
class html_requests(Fetcher):
|
||||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||||
|
|
||||||
def __init__(self, proxy_override=None, browser_connection_url=None):
|
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.proxy_override = proxy_override
|
self.proxy_override = proxy_override
|
||||||
# browser_connection_url is none because its always 'launched locally'
|
# browser_connection_url is none because its always 'launched locally'
|
||||||
|
|
|
@ -43,14 +43,14 @@ class difference_detection_processor():
|
||||||
|
|
||||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||||
browser_connection_url = None
|
custom_browser_connection_url = None
|
||||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||||
connection = list(
|
connection = list(
|
||||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||||
if connection:
|
if connection:
|
||||||
prefer_fetch_backend = 'base_html_playwright'
|
prefer_fetch_backend = 'base_html_playwright'
|
||||||
browser_connection_url = connection[0].get('browser_connection_url')
|
custom_browser_connection_url = connection[0].get('browser_connection_url')
|
||||||
|
|
||||||
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
||||||
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
||||||
|
@ -74,7 +74,7 @@ class difference_detection_processor():
|
||||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||||
browser_connection_url=browser_connection_url
|
custom_browser_connection_url=custom_browser_connection_url
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.watch.has_browser_steps:
|
if self.watch.has_browser_steps:
|
||||||
|
|
Ładowanie…
Reference in New Issue