kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Fetching/Requests - Fixing user agent header overrides per-watch of global settings (#2409)
rodzic
de48892243
commit
c6ee6687b5
|
@ -28,7 +28,7 @@ def manage_user_agent(headers, current_ua=''):
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
# Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default
|
# Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default
|
||||||
ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None)
|
ua_in_custom_headers = headers.get('User-Agent')
|
||||||
if ua_in_custom_headers:
|
if ua_in_custom_headers:
|
||||||
return ua_in_custom_headers
|
return ua_in_custom_headers
|
||||||
|
|
||||||
|
|
|
@ -115,12 +115,11 @@ class fetcher(Fetcher):
|
||||||
|
|
||||||
# This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..)
|
# This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..)
|
||||||
user_agent = None
|
user_agent = None
|
||||||
if request_headers:
|
if request_headers and request_headers.get('User-Agent'):
|
||||||
user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), None)
|
# Request_headers should now be CaaseInsensitiveDict
|
||||||
if user_agent:
|
|
||||||
await self.page.setUserAgent(user_agent)
|
|
||||||
# Remove it so it's not sent again with headers after
|
# Remove it so it's not sent again with headers after
|
||||||
[request_headers.pop(key) for key in list(request_headers) if key.lower().strip() == 'user-agent'.lower().strip()]
|
user_agent = request_headers.pop('User-Agent').strip()
|
||||||
|
await self.page.setUserAgent(user_agent)
|
||||||
|
|
||||||
if not user_agent:
|
if not user_agent:
|
||||||
# Attempt to strip 'HeadlessChrome' etc
|
# Attempt to strip 'HeadlessChrome' etc
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
import os
|
|
||||||
import hashlib
|
|
||||||
import re
|
|
||||||
from copy import deepcopy
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
|
from copy import deepcopy
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
class difference_detection_processor():
|
class difference_detection_processor():
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ class difference_detection_processor():
|
||||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||||
|
|
||||||
def call_browser(self):
|
def call_browser(self):
|
||||||
|
from requests.structures import CaseInsensitiveDict
|
||||||
# Protect against file:// access
|
# Protect against file:// access
|
||||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
||||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||||
|
@ -93,14 +93,16 @@ class difference_detection_processor():
|
||||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||||
|
|
||||||
# Tweak the base config with the per-watch ones
|
# Tweak the base config with the per-watch ones
|
||||||
request_headers = self.watch.get('headers', [])
|
request_headers = CaseInsensitiveDict()
|
||||||
request_headers.update(self.datastore.get_all_base_headers())
|
|
||||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
|
||||||
|
|
||||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||||
if ua and ua.get(prefer_fetch_backend):
|
if ua and ua.get(prefer_fetch_backend):
|
||||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||||
|
|
||||||
|
request_headers.update(self.watch.get('headers', {}))
|
||||||
|
request_headers.update(self.datastore.get_all_base_headers())
|
||||||
|
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||||
|
|
||||||
# https://github.com/psf/requests/issues/4525
|
# https://github.com/psf/requests/issues/4525
|
||||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||||
# do this by accident.
|
# do this by accident.
|
||||||
|
|
|
@ -253,6 +253,62 @@ def test_method_in_request(client, live_server):
|
||||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
assert b'Deleted' in res.data
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
# Re #2408 - user-agent override test, also should handle case-insensitive header deduplication
|
||||||
|
def test_ua_global_override(client, live_server):
|
||||||
|
# live_server_setup(live_server)
|
||||||
|
test_url = url_for('test_headers', _external=True)
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={
|
||||||
|
"application-fetch_backend": "html_requests",
|
||||||
|
"application-minutes_between_check": 180,
|
||||||
|
"requests-default_ua-html_requests": "html-requests-user-agent"
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b'Settings updated' in res.data
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"html-requests-user-agent" in res.data
|
||||||
|
# default user-agent should have shown by now
|
||||||
|
# now add a custom one in the headers
|
||||||
|
|
||||||
|
|
||||||
|
# Add some headers to a request
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={
|
||||||
|
"url": test_url,
|
||||||
|
"tags": "testtag",
|
||||||
|
"fetch_backend": 'html_requests',
|
||||||
|
# Important - also test case-insensitive
|
||||||
|
"headers": "User-AGent: agent-from-watch"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"agent-from-watch" in res.data
|
||||||
|
assert b"html-requests-user-agent" not in res.data
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_headers_textfile_in_request(client, live_server):
|
def test_headers_textfile_in_request(client, live_server):
|
||||||
#live_server_setup(live_server)
|
#live_server_setup(live_server)
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
|
@ -333,7 +389,7 @@ def test_headers_textfile_in_request(client, live_server):
|
||||||
# Not needed anymore
|
# Not needed anymore
|
||||||
os.unlink('test-datastore/headers.txt')
|
os.unlink('test-datastore/headers.txt')
|
||||||
os.unlink('test-datastore/headers-testtag.txt')
|
os.unlink('test-datastore/headers-testtag.txt')
|
||||||
os.unlink('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt')
|
|
||||||
# The service should echo back the request verb
|
# The service should echo back the request verb
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("preview_page", uuid="first"),
|
url_for("preview_page", uuid="first"),
|
||||||
|
|
Ładowanie…
Reference in New Issue