kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Fetching/Requests - Fixing user agent header overrides per-watch of global settings (#2409)
rodzic
de48892243
commit
c6ee6687b5
|
@ -28,7 +28,7 @@ def manage_user_agent(headers, current_ua=''):
|
|||
:return:
|
||||
"""
|
||||
# Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default
|
||||
ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None)
|
||||
ua_in_custom_headers = headers.get('User-Agent')
|
||||
if ua_in_custom_headers:
|
||||
return ua_in_custom_headers
|
||||
|
||||
|
|
|
@ -115,12 +115,11 @@ class fetcher(Fetcher):
|
|||
|
||||
# This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..)
|
||||
user_agent = None
|
||||
if request_headers:
|
||||
user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), None)
|
||||
if user_agent:
|
||||
await self.page.setUserAgent(user_agent)
|
||||
# Remove it so it's not sent again with headers after
|
||||
[request_headers.pop(key) for key in list(request_headers) if key.lower().strip() == 'user-agent'.lower().strip()]
|
||||
if request_headers and request_headers.get('User-Agent'):
|
||||
# Request_headers should now be CaaseInsensitiveDict
|
||||
# Remove it so it's not sent again with headers after
|
||||
user_agent = request_headers.pop('User-Agent').strip()
|
||||
await self.page.setUserAgent(user_agent)
|
||||
|
||||
if not user_agent:
|
||||
# Attempt to strip 'HeadlessChrome' etc
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from abc import abstractmethod
|
||||
import os
|
||||
import hashlib
|
||||
import re
|
||||
from copy import deepcopy
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
|
||||
class difference_detection_processor():
|
||||
|
||||
|
@ -21,7 +21,7 @@ class difference_detection_processor():
|
|||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
def call_browser(self):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
|
@ -93,14 +93,16 @@ class difference_detection_processor():
|
|||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
request_headers = self.watch.get('headers', [])
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
request_headers.update(self.watch.get('headers', {}))
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
|
|
|
@ -253,6 +253,62 @@ def test_method_in_request(client, live_server):
|
|||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
# Re #2408 - user-agent override test, also should handle case-insensitive header deduplication
|
||||
def test_ua_global_override(client, live_server):
|
||||
# live_server_setup(live_server)
|
||||
test_url = url_for('test_headers', _external=True)
|
||||
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
"requests-default_ua-html_requests": "html-requests-user-agent"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"html-requests-user-agent" in res.data
|
||||
# default user-agent should have shown by now
|
||||
# now add a custom one in the headers
|
||||
|
||||
|
||||
# Add some headers to a request
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "testtag",
|
||||
"fetch_backend": 'html_requests',
|
||||
# Important - also test case-insensitive
|
||||
"headers": "User-AGent: agent-from-watch"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"agent-from-watch" in res.data
|
||||
assert b"html-requests-user-agent" not in res.data
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_headers_textfile_in_request(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
# Add our URL to the import page
|
||||
|
@ -333,7 +389,7 @@ def test_headers_textfile_in_request(client, live_server):
|
|||
# Not needed anymore
|
||||
os.unlink('test-datastore/headers.txt')
|
||||
os.unlink('test-datastore/headers-testtag.txt')
|
||||
os.unlink('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt')
|
||||
|
||||
# The service should echo back the request verb
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
|
|
Ładowanie…
Reference in New Issue