kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Ability to set default User-Agent for either fetching types directly in the UI (#2375)
rodzic
a8959be348
commit
f49eb4567f
|
@ -9,7 +9,6 @@ from loguru import logger
|
|||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Puppeteer/direct {}/Javascript".format(
|
||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||
|
|
|
@ -30,11 +30,6 @@ class fetcher(Fetcher):
|
|||
if self.browser_steps_get_valid_steps():
|
||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||
|
||||
# Make requests use a more modern looking user-agent
|
||||
if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None):
|
||||
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
||||
|
||||
proxies = {}
|
||||
|
||||
# Allows override the proxy on a per-request basis
|
||||
|
|
|
@ -526,6 +526,10 @@ class SingleExtraBrowser(Form):
|
|||
browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
# @todo do the validation here instead
|
||||
|
||||
class DefaultUAInputForm(Form):
|
||||
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
|
||||
html_webdriver = StringField('Chrome requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
|
@ -537,6 +541,8 @@ class globalSettingsRequestForm(Form):
|
|||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||
|
||||
default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides")
|
||||
|
||||
def validate_extra_proxies(self, extra_validators=None):
|
||||
for e in self.data['extra_proxies']:
|
||||
if e.get('proxy_name') or e.get('proxy_url'):
|
||||
|
|
|
@ -6,6 +6,7 @@ from changedetectionio.notification import (
|
|||
)
|
||||
|
||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
||||
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
||||
|
||||
class model(dict):
|
||||
base_config = {
|
||||
|
@ -22,6 +23,10 @@ class model(dict):
|
|||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||
'default_ua': {
|
||||
'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
|
||||
'html_webdriver': None,
|
||||
}
|
||||
},
|
||||
'application': {
|
||||
# Custom notification content
|
||||
|
|
|
@ -97,6 +97,10 @@ class difference_detection_processor():
|
|||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
|
|
|
@ -554,7 +554,6 @@ class ChangeDetectionStore:
|
|||
return os.path.isfile(filepath)
|
||||
|
||||
def get_all_base_headers(self):
|
||||
from .model.App import parse_headers_from_text_file
|
||||
headers = {}
|
||||
# Global app settings
|
||||
headers.update(self.data['settings'].get('headers', {}))
|
||||
|
|
|
@ -108,8 +108,6 @@
|
|||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
<br>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
|
||||
<div class="pure-form-message-inline">
|
||||
|
@ -121,6 +119,18 @@
|
|||
{{ render_field(form.application.form.webdriver_delay) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.default_ua) }}
|
||||
<span class="pure-form-message-inline">
|
||||
Applied to all requests.<br><br>
|
||||
Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">all of the ways that the browser is detected</a>.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="filters">
|
||||
|
@ -190,7 +200,7 @@ nav
|
|||
<a id="chrome-extension-link"
|
||||
title="Try our new Chrome Extension!"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}">
|
||||
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
|
||||
Chrome Webstore
|
||||
</a>
|
||||
</p>
|
||||
|
|
|
@ -256,12 +256,40 @@ def test_method_in_request(client, live_server):
|
|||
def test_headers_textfile_in_request(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
# Add our URL to the import page
|
||||
|
||||
webdriver_ua = "Hello fancy webdriver UA 1.0"
|
||||
requests_ua = "Hello basic requests UA 1.1"
|
||||
|
||||
test_url = url_for('test_headers', _external=True)
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
# Because its no longer calling back to localhost but from the browser container, set in test-only.yml
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
print ("TEST URL IS ",test_url)
|
||||
form_data = {
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
"requests-default_ua-html_requests": requests_ua
|
||||
}
|
||||
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
form_data["requests-default_ua-html_webdriver"] = webdriver_ua
|
||||
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data=form_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
res = client.get(url_for("settings_page"))
|
||||
|
||||
# Only when some kind of real browser is setup
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
assert b'requests-default_ua-html_webdriver' in res.data
|
||||
|
||||
# Field should always be there
|
||||
assert b"requests-default_ua-html_requests" in res.data
|
||||
|
||||
# Add the test URL twice, we will check
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
|
@ -272,15 +300,14 @@ def test_headers_textfile_in_request(client, live_server):
|
|||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Add some headers to a request
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "testtag",
|
||||
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
|
||||
"headers": "xxx:ooo\ncool:yeah\r\n"},
|
||||
"url": test_url,
|
||||
"tags": "testtag",
|
||||
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
|
||||
"headers": "xxx:ooo\ncool:yeah\r\n"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
@ -292,7 +319,7 @@ def test_headers_textfile_in_request(client, live_server):
|
|||
with open('test-datastore/headers.txt', 'w') as f:
|
||||
f.write("global-header: nice\r\nnext-global-header: nice")
|
||||
|
||||
with open('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt', 'w') as f:
|
||||
with open('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt', 'w') as f:
|
||||
f.write("watch-header: nice")
|
||||
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
@ -306,7 +333,7 @@ def test_headers_textfile_in_request(client, live_server):
|
|||
# Not needed anymore
|
||||
os.unlink('test-datastore/headers.txt')
|
||||
os.unlink('test-datastore/headers-testtag.txt')
|
||||
os.unlink('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt')
|
||||
os.unlink('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt')
|
||||
# The service should echo back the request verb
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
|
@ -319,7 +346,12 @@ def test_headers_textfile_in_request(client, live_server):
|
|||
assert b"Watch-Header:nice" in res.data
|
||||
assert b"Tag-Header:test" in res.data
|
||||
|
||||
# Check the custom UA from system settings page made it through
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
assert "User-Agent:".encode('utf-8') + webdriver_ua.encode('utf-8') in res.data
|
||||
else:
|
||||
assert "User-Agent:".encode('utf-8') + requests_ua.encode('utf-8') in res.data
|
||||
|
||||
#unlink headers.txt on start/stop
|
||||
# unlink headers.txt on start/stop
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
assert b'Deleted' in res.data
|
||||
|
|
Ładowanie…
Reference in New Issue