kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Page fetching - Fixed possible incorrect browser user-agent header in playwright/puppeteer/browserless fetchers (#1811)
rodzic
51e2e8a226
commit
57de4ffe4f
|
@ -343,8 +343,8 @@ class base_html_playwright(Fetcher):
|
||||||
'req_headers': request_headers,
|
'req_headers': request_headers,
|
||||||
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
|
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
|
||||||
'url': url,
|
'url': url,
|
||||||
'user_agent': request_headers.get('User-Agent', 'Mozilla/5.0'),
|
'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
|
||||||
'proxy_username': self.proxy.get('username','') if self.proxy else False,
|
'proxy_username': self.proxy.get('username', '') if self.proxy else False,
|
||||||
'proxy_password': self.proxy.get('password', '') if self.proxy else False,
|
'proxy_password': self.proxy.get('password', '') if self.proxy else False,
|
||||||
'no_cache_list': [
|
'no_cache_list': [
|
||||||
'twitter',
|
'twitter',
|
||||||
|
@ -443,7 +443,7 @@ class base_html_playwright(Fetcher):
|
||||||
# Set user agent to prevent Cloudflare from blocking the browser
|
# Set user agent to prevent Cloudflare from blocking the browser
|
||||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||||
context = browser.new_context(
|
context = browser.new_context(
|
||||||
user_agent=request_headers.get('User-Agent', 'Mozilla/5.0'),
|
user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
|
||||||
proxy=self.proxy,
|
proxy=self.proxy,
|
||||||
# This is needed to enable JavaScript execution on GitHub and others
|
# This is needed to enable JavaScript execution on GitHub and others
|
||||||
bypass_csp=True,
|
bypass_csp=True,
|
||||||
|
@ -684,7 +684,7 @@ class html_requests(Fetcher):
|
||||||
is_binary=False):
|
is_binary=False):
|
||||||
|
|
||||||
# Make requests use a more modern looking user-agent
|
# Make requests use a more modern looking user-agent
|
||||||
if not 'User-Agent' in request_headers:
|
if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None):
|
||||||
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,9 @@ module.exports = async ({page, context}) => {
|
||||||
|
|
||||||
await page.setBypassCSP(true)
|
await page.setBypassCSP(true)
|
||||||
await page.setExtraHTTPHeaders(req_headers);
|
await page.setExtraHTTPHeaders(req_headers);
|
||||||
await page.setUserAgent(user_agent);
|
if (user_agent) {
|
||||||
|
await page.setUserAgent(user_agent);
|
||||||
|
}
|
||||||
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
|
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
|
||||||
|
|
||||||
await page.setDefaultNavigationTimeout(0);
|
await page.setDefaultNavigationTimeout(0);
|
||||||
|
|
Ładowanie…
Reference in New Issue