kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Dont use default Requests `user-agent` and `accept` headers in playwright+selenium requests, breaks sites such as united.com. (#1004)
rodzic
17d37fb626
commit
669fd3ae0b
|
@ -575,6 +575,11 @@ class html_requests(Fetcher):
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_css_filter=None):
|
current_css_filter=None):
|
||||||
|
|
||||||
|
# Make requests use a more modern looking user-agent
|
||||||
|
if not 'User-Agent' in request_headers:
|
||||||
|
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||||
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
||||||
|
|
||||||
proxies = {}
|
proxies = {}
|
||||||
|
|
||||||
# Allows override the proxy on a per-request basis
|
# Allows override the proxy on a per-request basis
|
||||||
|
|
|
@ -13,10 +13,6 @@ class model(dict):
|
||||||
'watching': {},
|
'watching': {},
|
||||||
'settings': {
|
'settings': {
|
||||||
'headers': {
|
'headers': {
|
||||||
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
|
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
||||||
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
|
||||||
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
|
||||||
},
|
},
|
||||||
'requests': {
|
'requests': {
|
||||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||||
|
|
|
@ -575,3 +575,11 @@ class ChangeDetectionStore:
|
||||||
continue
|
continue
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
# We incorrectly used common header overrides that should only apply to Requests
|
||||||
|
# These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
|
||||||
|
def update_7(self):
|
||||||
|
# These were hard-coded in early versions
|
||||||
|
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||||
|
if self.data['settings']['headers'].get(v):
|
||||||
|
del self.data['settings']['headers'][v]
|
||||||
|
|
|
@ -10,7 +10,10 @@ flask_restful
|
||||||
pytz
|
pytz
|
||||||
|
|
||||||
# Set these versions together to avoid a RequestsDependencyWarning
|
# Set these versions together to avoid a RequestsDependencyWarning
|
||||||
requests[socks] ~= 2.26
|
# >= 2.26 also adds Brotli support if brotli is installed
|
||||||
|
brotli ~= 1.0
|
||||||
|
requests[socks] ~= 2.28
|
||||||
|
|
||||||
urllib3 > 1.26
|
urllib3 > 1.26
|
||||||
chardet > 2.3.0
|
chardet > 2.3.0
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue