kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Tidy up proxies.json logic, adding tests (#955)
rodzic
8567a83c47
commit
d4715e2bc8
|
@ -1444,12 +1444,7 @@ def ticker_thread_check_time_launch_checks():
|
|||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
|
||||
# Proxies can be set to have a limit on seconds between which they can be called
|
||||
watch_proxy = watch.get('proxy')
|
||||
if not watch_proxy:
|
||||
watch_proxy = datastore.data['settings']['requests']['proxy']
|
||||
if not watch_proxy:
|
||||
watch_proxy = list(datastore.proxy_list.keys())[0]
|
||||
|
||||
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()):
|
||||
# Proxy may also have some threshold minimum
|
||||
proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0))
|
||||
|
|
|
@ -20,36 +20,6 @@ class perform_site_check():
|
|||
super().__init__(*args, **kwargs)
|
||||
self.datastore = datastore
|
||||
|
||||
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
|
||||
# Returns the proxy as a URL
|
||||
# if watch.proxy use that
|
||||
# fetcher.proxy_override = watch.proxy or main config proxy
|
||||
# Allows override the proxy on a per-request basis
|
||||
# ALWAYS use the first one is nothing selected
|
||||
|
||||
def set_proxy_from_list(self, watch):
|
||||
proxy_args = None
|
||||
if self.datastore.proxy_list is None:
|
||||
return None
|
||||
|
||||
# If its a valid one
|
||||
if watch['proxy'] and watch['proxy'] in list(self.datastore.proxy_list.keys()):
|
||||
proxy_args = self.datastore.proxy_list.get(watch['proxy']).get('url')
|
||||
|
||||
# not valid (including None), try the system one
|
||||
else:
|
||||
system_proxy = self.datastore.data['settings']['requests']['proxy']
|
||||
# Is not None and exists
|
||||
if self.datastore.proxy_list.get(system_proxy):
|
||||
proxy_args = self.datastore.proxy_list.get(system_proxy).get('url')
|
||||
|
||||
# Fallback - Did not resolve anything, use the first available
|
||||
if proxy_args is None:
|
||||
first_default = list(self.datastore.proxy_list)[0]
|
||||
proxy_args = self.datastore.proxy_list.get(first_default).get('url')
|
||||
|
||||
return proxy_args
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "foobar(?i)" type configuration
|
||||
def forward_slash_enclosed_regex_to_options(self, regex):
|
||||
|
@ -114,9 +84,12 @@ class perform_site_check():
|
|||
# If the klass doesnt exist, just use a default
|
||||
klass = getattr(content_fetcher, "html_requests")
|
||||
|
||||
proxy_url = self.set_proxy_from_list(watch)
|
||||
if proxy_url:
|
||||
proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
proxy_url = None
|
||||
if proxy_id:
|
||||
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||
print ("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
|
||||
fetcher = klass(proxy_override=proxy_url)
|
||||
|
||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||
|
|
|
@ -49,3 +49,47 @@ pytest tests/visualselector/test_fetch_data.py
|
|||
|
||||
unset PLAYWRIGHT_DRIVER_URL
|
||||
docker kill $$-test_browserless
|
||||
|
||||
# Test proxy list handling, starting two squids on different ports
|
||||
# Each squid adds a different header to the response, which is the main thing we test for.
|
||||
docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge
|
||||
docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge
|
||||
|
||||
|
||||
# So, basic HTTP as env var test
|
||||
export HTTP_PROXY=http://localhost:3128
|
||||
export HTTPS_PROXY=http://localhost:3128
|
||||
pytest tests/proxy_list/test_proxy.py
|
||||
docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)"
|
||||
fi
|
||||
unset HTTP_PROXY
|
||||
unset HTTPS_PROXY
|
||||
|
||||
|
||||
# 2nd test actually choose the preferred proxy from proxies.json
|
||||
cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json
|
||||
# Makes a watch use a preferred proxy
|
||||
pytest tests/proxy_list/test_multiple_proxy.py
|
||||
|
||||
# Should be a request in the default "first" squid
|
||||
docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||
fi
|
||||
|
||||
# And one in the 'second' squid (user selects this as preferred)
|
||||
docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||
fi
|
||||
|
||||
# @todo - test system override proxy selection and watch defaults, setup a 3rd squid?
|
||||
docker kill $$-squid-one
|
||||
docker kill $$-squid-two
|
||||
|
||||
|
||||
|
|
|
@ -440,6 +440,36 @@ class ChangeDetectionStore:
|
|||
print ("Registered proxy list", list(self.proxy_list.keys()))
|
||||
|
||||
|
||||
def get_preferred_proxy_for_watch(self, uuid):
|
||||
"""
|
||||
Returns the preferred proxy by ID key
|
||||
:param uuid: UUID
|
||||
:return: proxy "key" id
|
||||
"""
|
||||
|
||||
proxy_id = None
|
||||
if self.proxy_list is None:
|
||||
return None
|
||||
|
||||
# If its a valid one
|
||||
watch = self.data['watching'].get(uuid)
|
||||
|
||||
if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
|
||||
return watch.get('proxy')
|
||||
|
||||
# not valid (including None), try the system one
|
||||
else:
|
||||
system_proxy_id = self.data['settings']['requests'].get('proxy')
|
||||
# Is not None and exists
|
||||
if self.proxy_list.get(system_proxy_id):
|
||||
return system_proxy_id
|
||||
|
||||
# Fallback - Did not resolve anything, use the first available
|
||||
if system_proxy_id is None:
|
||||
first_default = list(self.proxy_list)[0]
|
||||
return first_default
|
||||
|
||||
return None
|
||||
|
||||
# Run all updates
|
||||
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
"""Tests for the app."""
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
from .. import conftest
|
||||
|
||||
#def pytest_addoption(parser):
|
||||
# parser.addoption("--url_suffix", action="store", default="identifier for request")
|
||||
|
||||
|
||||
#def pytest_generate_tests(metafunc):
|
||||
# # This is called for every test. Only get/set command line arguments
|
||||
# # if the argument is specified in the list of test "fixturenames".
|
||||
# option_value = metafunc.config.option.url_suffix
|
||||
# if 'url_suffix' in metafunc.fixturenames and option_value is not None:
|
||||
# metafunc.parametrize("url_suffix", [option_value])
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"proxy-one": {
|
||||
"label": "One",
|
||||
"url": "http://127.0.0.1:3128"
|
||||
},
|
||||
"proxy-two": {
|
||||
"label": "two",
|
||||
"url": "http://127.0.0.1:3129"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
acl localnet src 0.0.0.1-0.255.255.255 # RFC 1122 "this" network (LAN)
|
||||
acl localnet src 10.0.0.0/8 # RFC 1918 local private network (LAN)
|
||||
acl localnet src 100.64.0.0/10 # RFC 6598 shared address space (CGN)
|
||||
acl localnet src 169.254.0.0/16 # RFC 3927 link-local (directly plugged) machines
|
||||
acl localnet src 172.16.0.0/12 # RFC 1918 local private network (LAN)
|
||||
acl localnet src 192.168.0.0/16 # RFC 1918 local private network (LAN)
|
||||
acl localnet src fc00::/7 # RFC 4193 local private network range
|
||||
acl localnet src fe80::/10 # RFC 4291 link-local (directly plugged) machines
|
||||
acl localnet src 159.65.224.174
|
||||
acl SSL_ports port 443
|
||||
acl Safe_ports port 80 # http
|
||||
acl Safe_ports port 21 # ftp
|
||||
acl Safe_ports port 443 # https
|
||||
acl Safe_ports port 70 # gopher
|
||||
acl Safe_ports port 210 # wais
|
||||
acl Safe_ports port 1025-65535 # unregistered ports
|
||||
acl Safe_ports port 280 # http-mgmt
|
||||
acl Safe_ports port 488 # gss-http
|
||||
acl Safe_ports port 591 # filemaker
|
||||
acl Safe_ports port 777 # multiling http
|
||||
acl CONNECT method CONNECT
|
||||
|
||||
http_access deny !Safe_ports
|
||||
http_access deny CONNECT !SSL_ports
|
||||
http_access allow localhost manager
|
||||
http_access deny manager
|
||||
http_access allow localhost
|
||||
http_access allow localnet
|
||||
http_access deny all
|
||||
http_port 3128
|
||||
coredump_dir /var/spool/squid
|
||||
refresh_pattern ^ftp: 1440 20% 10080
|
||||
refresh_pattern ^gopher: 1440 0% 1440
|
||||
refresh_pattern -i (/cgi-bin/|\?) 0 0% 0
|
||||
refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||
refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims
|
||||
refresh_pattern \/InRelease$ 0 0% 0 refresh-ims
|
||||
refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||
refresh_pattern . 0 20% 4320
|
||||
logfile_rotate 0
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup
|
||||
|
||||
def test_preferred_proxy(client, live_server):
|
||||
time.sleep(1)
|
||||
live_server_setup(live_server)
|
||||
time.sleep(1)
|
||||
url = "http://chosen.changedetection.io"
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||
# Use plain HTTP or a specific domain-name here
|
||||
data={"urls": url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(2)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"css_filter": "",
|
||||
"fetch_backend": "html_requests",
|
||||
"headers": "",
|
||||
"proxy": "proxy-two",
|
||||
"tag": "",
|
||||
"url": url,
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(2)
|
||||
# Now the request should appear in the second-squid logs
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
|
||||
# just make a request, we will grep in the docker logs to see it actually got called
|
||||
def test_check_basic_change_detection_functionality(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||
# Use plain HTTP or a specific domain-name here
|
||||
data={"urls": "http://one.changedetection.io"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
|
@ -6,6 +6,8 @@ services:
|
|||
hostname: changedetection
|
||||
volumes:
|
||||
- changedetection-data:/datastore
|
||||
# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support
|
||||
# - ./proxies.json:/datastore/proxies.json
|
||||
|
||||
# environment:
|
||||
# Default listening port, can also be changed with the -p option
|
||||
|
|
Ładowanie…
Reference in New Issue