kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Fetching pages - Custom browser endpoints should not have default proxy info added
rodzic
4163030805
commit
ccb42bcb12
|
@ -36,6 +36,13 @@ class BrowserConnectError(Exception):
|
||||||
logger.error(f"Browser connection error {msg}")
|
logger.error(f"Browser connection error {msg}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
class BrowserFetchTimedOut(Exception):
|
||||||
|
msg = ''
|
||||||
|
def __init__(self, msg):
|
||||||
|
self.msg = msg
|
||||||
|
logger.error(f"Browser processing took too long - {msg}")
|
||||||
|
return
|
||||||
|
|
||||||
class BrowserStepsStepException(Exception):
|
class BrowserStepsStepException(Exception):
|
||||||
def __init__(self, step_n, original_e):
|
def __init__(self, step_n, original_e):
|
||||||
self.step_n = step_n
|
self.step_n = step_n
|
||||||
|
|
|
@ -6,7 +6,7 @@ from urllib.parse import urlparse
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from changedetectionio.content_fetchers.base import Fetcher
|
from changedetectionio.content_fetchers.base import Fetcher
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable, BrowserConnectError
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
|
||||||
|
|
||||||
|
|
||||||
class fetcher(Fetcher):
|
class fetcher(Fetcher):
|
||||||
|
@ -221,14 +221,21 @@ class fetcher(Fetcher):
|
||||||
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
||||||
current_include_filters=None, is_binary=False):
|
current_include_filters=None, is_binary=False):
|
||||||
|
|
||||||
|
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||||
|
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
|
||||||
|
|
||||||
# This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only
|
# This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only
|
||||||
asyncio.run(self.main(
|
try:
|
||||||
url=url,
|
asyncio.run(asyncio.wait_for(self.main(
|
||||||
timeout=timeout,
|
url=url,
|
||||||
request_headers=request_headers,
|
timeout=timeout,
|
||||||
request_body=request_body,
|
request_headers=request_headers,
|
||||||
request_method=request_method,
|
request_body=request_body,
|
||||||
ignore_status_codes=ignore_status_codes,
|
request_method=request_method,
|
||||||
current_include_filters=current_include_filters,
|
ignore_status_codes=ignore_status_codes,
|
||||||
is_binary=is_binary
|
current_include_filters=current_include_filters,
|
||||||
))
|
is_binary=is_binary
|
||||||
|
), timeout=max_time))
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||||
|
|
||||||
|
|
|
@ -75,8 +75,12 @@ class difference_detection_processor():
|
||||||
|
|
||||||
proxy_url = None
|
proxy_url = None
|
||||||
if preferred_proxy_id:
|
if preferred_proxy_id:
|
||||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
# Custom browser endpoints should not have a proxy added
|
||||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
if not preferred_proxy_id.startswith('ui-'):
|
||||||
|
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||||
|
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||||
|
else:
|
||||||
|
logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified.")
|
||||||
|
|
||||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||||
|
|
|
@ -369,6 +369,12 @@ class update_worker(threading.Thread):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
|
except content_fetchers.exceptions.BrowserFetchTimedOut as e:
|
||||||
|
self.datastore.update_watch(uuid=uuid,
|
||||||
|
update_obj={'last_error': e.msg
|
||||||
|
}
|
||||||
|
)
|
||||||
|
process_changedetection_results = False
|
||||||
except content_fetchers.exceptions.BrowserStepsStepException as e:
|
except content_fetchers.exceptions.BrowserStepsStepException as e:
|
||||||
|
|
||||||
if not self.datastore.data['watching'].get(uuid):
|
if not self.datastore.data['watching'].get(uuid):
|
||||||
|
|
Ładowanie…
Reference in New Issue