kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Tweaks to playwright fetch code - better timeout handling
rodzic
a07ca4b136
commit
358a365303
|
@ -287,7 +287,8 @@ class base_html_playwright(Fetcher):
|
||||||
|
|
||||||
# Seemed to cause a connection Exception even tho I can see it connect
|
# Seemed to cause a connection Exception even tho I can see it connect
|
||||||
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
|
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
|
||||||
browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
|
# 60,000 connection timeout only
|
||||||
|
browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
|
||||||
|
|
||||||
# Set user agent to prevent Cloudflare from blocking the browser
|
# Set user agent to prevent Cloudflare from blocking the browser
|
||||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||||
|
@ -302,19 +303,24 @@ class base_html_playwright(Fetcher):
|
||||||
|
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
try:
|
try:
|
||||||
|
page.set_default_navigation_timeout(90000)
|
||||||
|
page.set_default_timeout(90000)
|
||||||
|
|
||||||
# Bug - never set viewport size BEFORE page.goto
|
# Bug - never set viewport size BEFORE page.goto
|
||||||
response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
|
|
||||||
# Wait_until = commit
|
# Waits for the next navigation. Using Python context manager
|
||||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
# prevents a race condition between clicking and waiting for a navigation.
|
||||||
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
with page.expect_navigation():
|
||||||
# This seemed to solve nearly all 'TimeoutErrors'
|
response = page.goto(url, wait_until='load')
|
||||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
|
||||||
page.wait_for_timeout(extra_wait * 1000)
|
|
||||||
except playwright._impl._api_types.TimeoutError as e:
|
except playwright._impl._api_types.TimeoutError as e:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
raise EmptyReply(url=url, status_code=None)
|
# This can be ok, we will try to grab what we could retrieve
|
||||||
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print ("other exception when page.goto")
|
||||||
|
print (str(e))
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
raise PageUnloadable(url=url, status_code=None)
|
raise PageUnloadable(url=url, status_code=None)
|
||||||
|
@ -322,18 +328,23 @@ class base_html_playwright(Fetcher):
|
||||||
if response is None:
|
if response is None:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
raise EmptyReply(url=url, status_code=None)
|
print ("response object was none")
|
||||||
|
print (str(e))
|
||||||
if len(page.content().strip()) == 0:
|
|
||||||
context.close()
|
|
||||||
browser.close()
|
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
# Bug 2(?) Set the viewport size AFTER loading the page
|
# Bug 2(?) Set the viewport size AFTER loading the page
|
||||||
page.set_viewport_size({"width": 1280, "height": 1024})
|
page.set_viewport_size({"width": 1280, "height": 1024})
|
||||||
|
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||||
self.status_code = response.status
|
time.sleep(extra_wait)
|
||||||
self.content = page.content()
|
self.content = page.content()
|
||||||
|
self.status_code = response.status
|
||||||
|
|
||||||
|
if len(self.content.strip()) == 0:
|
||||||
|
context.close()
|
||||||
|
browser.close()
|
||||||
|
print ("Content was empty")
|
||||||
|
print (str(e))
|
||||||
|
raise EmptyReply(url=url, status_code=None)
|
||||||
self.headers = response.all_headers()
|
self.headers = response.all_headers()
|
||||||
|
|
||||||
if current_css_filter is not None:
|
if current_css_filter is not None:
|
||||||
|
|
Ładowanie…
Reference in New Issue