kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Tidy up proxies.json logic, adding tests (#955)
rodzic
8567a83c47
commit
d4715e2bc8
|
@ -1444,12 +1444,7 @@ def ticker_thread_check_time_launch_checks():
|
||||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||||
|
|
||||||
# Proxies can be set to have a limit on seconds between which they can be called
|
# Proxies can be set to have a limit on seconds between which they can be called
|
||||||
watch_proxy = watch.get('proxy')
|
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||||
if not watch_proxy:
|
|
||||||
watch_proxy = datastore.data['settings']['requests']['proxy']
|
|
||||||
if not watch_proxy:
|
|
||||||
watch_proxy = list(datastore.proxy_list.keys())[0]
|
|
||||||
|
|
||||||
if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()):
|
if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()):
|
||||||
# Proxy may also have some threshold minimum
|
# Proxy may also have some threshold minimum
|
||||||
proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0))
|
proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0))
|
||||||
|
|
|
@ -20,36 +20,6 @@ class perform_site_check():
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.datastore = datastore
|
self.datastore = datastore
|
||||||
|
|
||||||
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
|
|
||||||
# Returns the proxy as a URL
|
|
||||||
# if watch.proxy use that
|
|
||||||
# fetcher.proxy_override = watch.proxy or main config proxy
|
|
||||||
# Allows override the proxy on a per-request basis
|
|
||||||
# ALWAYS use the first one is nothing selected
|
|
||||||
|
|
||||||
def set_proxy_from_list(self, watch):
|
|
||||||
proxy_args = None
|
|
||||||
if self.datastore.proxy_list is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# If its a valid one
|
|
||||||
if watch['proxy'] and watch['proxy'] in list(self.datastore.proxy_list.keys()):
|
|
||||||
proxy_args = self.datastore.proxy_list.get(watch['proxy']).get('url')
|
|
||||||
|
|
||||||
# not valid (including None), try the system one
|
|
||||||
else:
|
|
||||||
system_proxy = self.datastore.data['settings']['requests']['proxy']
|
|
||||||
# Is not None and exists
|
|
||||||
if self.datastore.proxy_list.get(system_proxy):
|
|
||||||
proxy_args = self.datastore.proxy_list.get(system_proxy).get('url')
|
|
||||||
|
|
||||||
# Fallback - Did not resolve anything, use the first available
|
|
||||||
if proxy_args is None:
|
|
||||||
first_default = list(self.datastore.proxy_list)[0]
|
|
||||||
proxy_args = self.datastore.proxy_list.get(first_default).get('url')
|
|
||||||
|
|
||||||
return proxy_args
|
|
||||||
|
|
||||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||||
# So convert it to inline flag "foobar(?i)" type configuration
|
# So convert it to inline flag "foobar(?i)" type configuration
|
||||||
def forward_slash_enclosed_regex_to_options(self, regex):
|
def forward_slash_enclosed_regex_to_options(self, regex):
|
||||||
|
@ -114,9 +84,12 @@ class perform_site_check():
|
||||||
# If the klass doesnt exist, just use a default
|
# If the klass doesnt exist, just use a default
|
||||||
klass = getattr(content_fetcher, "html_requests")
|
klass = getattr(content_fetcher, "html_requests")
|
||||||
|
|
||||||
proxy_url = self.set_proxy_from_list(watch)
|
proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||||
if proxy_url:
|
proxy_url = None
|
||||||
|
if proxy_id:
|
||||||
|
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||||
print ("UUID {} Using proxy {}".format(uuid, proxy_url))
|
print ("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||||
|
|
||||||
fetcher = klass(proxy_override=proxy_url)
|
fetcher = klass(proxy_override=proxy_url)
|
||||||
|
|
||||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||||
|
|
|
@ -48,4 +48,48 @@ pytest tests/test_errorhandling.py
|
||||||
pytest tests/visualselector/test_fetch_data.py
|
pytest tests/visualselector/test_fetch_data.py
|
||||||
|
|
||||||
unset PLAYWRIGHT_DRIVER_URL
|
unset PLAYWRIGHT_DRIVER_URL
|
||||||
docker kill $$-test_browserless
|
docker kill $$-test_browserless
|
||||||
|
|
||||||
|
# Test proxy list handling, starting two squids on different ports
|
||||||
|
# Each squid adds a different header to the response, which is the main thing we test for.
|
||||||
|
docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge
|
||||||
|
docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge
|
||||||
|
|
||||||
|
|
||||||
|
# So, basic HTTP as env var test
|
||||||
|
export HTTP_PROXY=http://localhost:3128
|
||||||
|
export HTTPS_PROXY=http://localhost:3128
|
||||||
|
pytest tests/proxy_list/test_proxy.py
|
||||||
|
docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io
|
||||||
|
if [ $? -ne 0 ]
|
||||||
|
then
|
||||||
|
echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)"
|
||||||
|
fi
|
||||||
|
unset HTTP_PROXY
|
||||||
|
unset HTTPS_PROXY
|
||||||
|
|
||||||
|
|
||||||
|
# 2nd test actually choose the preferred proxy from proxies.json
|
||||||
|
cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json
|
||||||
|
# Makes a watch use a preferred proxy
|
||||||
|
pytest tests/proxy_list/test_multiple_proxy.py
|
||||||
|
|
||||||
|
# Should be a request in the default "first" squid
|
||||||
|
docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io
|
||||||
|
if [ $? -ne 0 ]
|
||||||
|
then
|
||||||
|
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# And one in the 'second' squid (user selects this as preferred)
|
||||||
|
docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io
|
||||||
|
if [ $? -ne 0 ]
|
||||||
|
then
|
||||||
|
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# @todo - test system override proxy selection and watch defaults, setup a 3rd squid?
|
||||||
|
docker kill $$-squid-one
|
||||||
|
docker kill $$-squid-two
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -440,6 +440,36 @@ class ChangeDetectionStore:
|
||||||
print ("Registered proxy list", list(self.proxy_list.keys()))
|
print ("Registered proxy list", list(self.proxy_list.keys()))
|
||||||
|
|
||||||
|
|
||||||
|
def get_preferred_proxy_for_watch(self, uuid):
|
||||||
|
"""
|
||||||
|
Returns the preferred proxy by ID key
|
||||||
|
:param uuid: UUID
|
||||||
|
:return: proxy "key" id
|
||||||
|
"""
|
||||||
|
|
||||||
|
proxy_id = None
|
||||||
|
if self.proxy_list is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# If its a valid one
|
||||||
|
watch = self.data['watching'].get(uuid)
|
||||||
|
|
||||||
|
if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
|
||||||
|
return watch.get('proxy')
|
||||||
|
|
||||||
|
# not valid (including None), try the system one
|
||||||
|
else:
|
||||||
|
system_proxy_id = self.data['settings']['requests'].get('proxy')
|
||||||
|
# Is not None and exists
|
||||||
|
if self.proxy_list.get(system_proxy_id):
|
||||||
|
return system_proxy_id
|
||||||
|
|
||||||
|
# Fallback - Did not resolve anything, use the first available
|
||||||
|
if system_proxy_id is None:
|
||||||
|
first_default = list(self.proxy_list)[0]
|
||||||
|
return first_default
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
# Run all updates
|
# Run all updates
|
||||||
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
"""Tests for the app."""
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
from .. import conftest
|
||||||
|
|
||||||
|
#def pytest_addoption(parser):
|
||||||
|
# parser.addoption("--url_suffix", action="store", default="identifier for request")
|
||||||
|
|
||||||
|
|
||||||
|
#def pytest_generate_tests(metafunc):
|
||||||
|
# # This is called for every test. Only get/set command line arguments
|
||||||
|
# # if the argument is specified in the list of test "fixturenames".
|
||||||
|
# option_value = metafunc.config.option.url_suffix
|
||||||
|
# if 'url_suffix' in metafunc.fixturenames and option_value is not None:
|
||||||
|
# metafunc.parametrize("url_suffix", [option_value])
|
|
@ -0,0 +1,10 @@
|
||||||
|
{
|
||||||
|
"proxy-one": {
|
||||||
|
"label": "One",
|
||||||
|
"url": "http://127.0.0.1:3128"
|
||||||
|
},
|
||||||
|
"proxy-two": {
|
||||||
|
"label": "two",
|
||||||
|
"url": "http://127.0.0.1:3129"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
acl localnet src 0.0.0.1-0.255.255.255 # RFC 1122 "this" network (LAN)
|
||||||
|
acl localnet src 10.0.0.0/8 # RFC 1918 local private network (LAN)
|
||||||
|
acl localnet src 100.64.0.0/10 # RFC 6598 shared address space (CGN)
|
||||||
|
acl localnet src 169.254.0.0/16 # RFC 3927 link-local (directly plugged) machines
|
||||||
|
acl localnet src 172.16.0.0/12 # RFC 1918 local private network (LAN)
|
||||||
|
acl localnet src 192.168.0.0/16 # RFC 1918 local private network (LAN)
|
||||||
|
acl localnet src fc00::/7 # RFC 4193 local private network range
|
||||||
|
acl localnet src fe80::/10 # RFC 4291 link-local (directly plugged) machines
|
||||||
|
acl localnet src 159.65.224.174
|
||||||
|
acl SSL_ports port 443
|
||||||
|
acl Safe_ports port 80 # http
|
||||||
|
acl Safe_ports port 21 # ftp
|
||||||
|
acl Safe_ports port 443 # https
|
||||||
|
acl Safe_ports port 70 # gopher
|
||||||
|
acl Safe_ports port 210 # wais
|
||||||
|
acl Safe_ports port 1025-65535 # unregistered ports
|
||||||
|
acl Safe_ports port 280 # http-mgmt
|
||||||
|
acl Safe_ports port 488 # gss-http
|
||||||
|
acl Safe_ports port 591 # filemaker
|
||||||
|
acl Safe_ports port 777 # multiling http
|
||||||
|
acl CONNECT method CONNECT
|
||||||
|
|
||||||
|
http_access deny !Safe_ports
|
||||||
|
http_access deny CONNECT !SSL_ports
|
||||||
|
http_access allow localhost manager
|
||||||
|
http_access deny manager
|
||||||
|
http_access allow localhost
|
||||||
|
http_access allow localnet
|
||||||
|
http_access deny all
|
||||||
|
http_port 3128
|
||||||
|
coredump_dir /var/spool/squid
|
||||||
|
refresh_pattern ^ftp: 1440 20% 10080
|
||||||
|
refresh_pattern ^gopher: 1440 0% 1440
|
||||||
|
refresh_pattern -i (/cgi-bin/|\?) 0 0% 0
|
||||||
|
refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern \/InRelease$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern . 0 20% 4320
|
||||||
|
logfile_rotate 0
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from ..util import live_server_setup
|
||||||
|
|
||||||
|
def test_preferred_proxy(client, live_server):
|
||||||
|
time.sleep(1)
|
||||||
|
live_server_setup(live_server)
|
||||||
|
time.sleep(1)
|
||||||
|
url = "http://chosen.changedetection.io"
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||||
|
# Use plain HTTP or a specific domain-name here
|
||||||
|
data={"urls": url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={
|
||||||
|
"css_filter": "",
|
||||||
|
"fetch_backend": "html_requests",
|
||||||
|
"headers": "",
|
||||||
|
"proxy": "proxy-two",
|
||||||
|
"tag": "",
|
||||||
|
"url": url,
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
time.sleep(2)
|
||||||
|
# Now the request should appear in the second-squid logs
|
|
@ -0,0 +1,19 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
|
|
||||||
|
# just make a request, we will grep in the docker logs to see it actually got called
|
||||||
|
def test_check_basic_change_detection_functionality(client, live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||||
|
# Use plain HTTP or a specific domain-name here
|
||||||
|
data={"urls": "http://one.changedetection.io"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
time.sleep(3)
|
|
@ -6,6 +6,8 @@ services:
|
||||||
hostname: changedetection
|
hostname: changedetection
|
||||||
volumes:
|
volumes:
|
||||||
- changedetection-data:/datastore
|
- changedetection-data:/datastore
|
||||||
|
# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support
|
||||||
|
# - ./proxies.json:/datastore/proxies.json
|
||||||
|
|
||||||
# environment:
|
# environment:
|
||||||
# Default listening port, can also be changed with the -p option
|
# Default listening port, can also be changed with the -p option
|
||||||
|
|
Ładowanie…
Reference in New Issue