kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Fixing proxy checker (#2696)
rodzic
5a768d7db3
commit
c1c8de3104
|
@ -1,4 +1,7 @@
|
||||||
|
import importlib
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
@ -30,7 +33,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
def long_task(uuid, preferred_proxy):
|
def long_task(uuid, preferred_proxy):
|
||||||
import time
|
import time
|
||||||
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
||||||
from changedetectionio.processors.text_json_diff import text_json_diff
|
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
|
||||||
status = {'status': '', 'length': 0, 'text': ''}
|
status = {'status': '', 'length': 0, 'text': ''}
|
||||||
|
@ -38,8 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
contents = ''
|
contents = ''
|
||||||
now = time.time()
|
now = time.time()
|
||||||
try:
|
try:
|
||||||
update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
|
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||||
update_handler.call_browser()
|
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||||
|
watch_uuid=uuid
|
||||||
|
)
|
||||||
|
|
||||||
|
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||||
# title, size is len contents not len xfer
|
# title, size is len contents not len xfer
|
||||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||||
if e.status_code == 404:
|
if e.status_code == 404:
|
||||||
|
@ -48,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
|
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
|
||||||
else:
|
else:
|
||||||
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
|
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
|
||||||
except text_json_diff.FilterNotFoundInResponse:
|
except FilterNotFoundInResponse:
|
||||||
status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
|
status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
|
||||||
except content_fetcher_exceptions.EmptyReply as e:
|
except content_fetcher_exceptions.EmptyReply as e:
|
||||||
if e.status_code == 403 or e.status_code == 401:
|
if e.status_code == 403 or e.status_code == 401:
|
||||||
|
|
|
@ -18,6 +18,7 @@ class difference_detection_processor():
|
||||||
screenshot = None
|
screenshot = None
|
||||||
watch = None
|
watch = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
|
preferred_proxy = None
|
||||||
|
|
||||||
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
@ -26,7 +27,8 @@ class difference_detection_processor():
|
||||||
# Generic fetcher that should be extended (requests, playwright etc)
|
# Generic fetcher that should be extended (requests, playwright etc)
|
||||||
self.fetcher = Fetcher()
|
self.fetcher = Fetcher()
|
||||||
|
|
||||||
def call_browser(self):
|
def call_browser(self, preferred_proxy_id=None):
|
||||||
|
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
# Protect against file:// access
|
# Protect against file:// access
|
||||||
|
@ -42,7 +44,7 @@ class difference_detection_processor():
|
||||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||||
|
|
||||||
# Proxy ID "key"
|
# Proxy ID "key"
|
||||||
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||||
|
|
||||||
# Pluggable content self.fetcher
|
# Pluggable content self.fetcher
|
||||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
$(function () {
|
$(function () {
|
||||||
/* add container before each proxy location to show status */
|
/* add container before each proxy location to show status */
|
||||||
|
|
||||||
var option_li = $('.fetch-backend-proxy li').filter(function() {
|
|
||||||
return $("input",this)[0].value.length >0;
|
|
||||||
});
|
|
||||||
|
|
||||||
//var option_li = $('.fetch-backend-proxy li');
|
|
||||||
var isActive = false;
|
var isActive = false;
|
||||||
$(option_li).prepend('<div class="proxy-status"></div>');
|
|
||||||
$(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
|
function setup_html_widget() {
|
||||||
|
var option_li = $('.fetch-backend-proxy li').filter(function () {
|
||||||
|
return $("input", this)[0].value.length > 0;
|
||||||
|
});
|
||||||
|
$(option_li).prepend('<div class="proxy-status"></div>');
|
||||||
|
$(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
|
||||||
|
}
|
||||||
|
|
||||||
function set_proxy_check_status(proxy_key, state) {
|
function set_proxy_check_status(proxy_key, state) {
|
||||||
// select input by value name
|
// select input by value name
|
||||||
|
@ -59,8 +59,14 @@ $(function () {
|
||||||
}
|
}
|
||||||
|
|
||||||
$('#check-all-proxies').click(function (e) {
|
$('#check-all-proxies').click(function (e) {
|
||||||
|
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
$('body').addClass('proxy-check-active');
|
|
||||||
|
if (!$('body').hasClass('proxy-check-active')) {
|
||||||
|
setup_html_widget();
|
||||||
|
$('body').addClass('proxy-check-active');
|
||||||
|
}
|
||||||
|
|
||||||
$('.proxy-check-details').html('');
|
$('.proxy-check-details').html('');
|
||||||
$('.proxy-status').html('<span class="spinner"></span>').fadeIn();
|
$('.proxy-status').html('<span class="spinner"></span>').fadeIn();
|
||||||
$('.proxy-timing').html('');
|
$('.proxy-timing').html('');
|
||||||
|
|
|
@ -25,15 +25,19 @@ ul#requests-extra_proxies {
|
||||||
|
|
||||||
body.proxy-check-active {
|
body.proxy-check-active {
|
||||||
#request {
|
#request {
|
||||||
|
// Padding set by flex layout
|
||||||
|
/*
|
||||||
.proxy-status {
|
.proxy-status {
|
||||||
width: 2em;
|
width: 2em;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
.proxy-check-details {
|
.proxy-check-details {
|
||||||
font-size: 80%;
|
font-size: 80%;
|
||||||
color: #555;
|
color: #555;
|
||||||
display: block;
|
display: block;
|
||||||
padding-left: 4em;
|
padding-left: 2em;
|
||||||
|
max-width: 500px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.proxy-timing {
|
.proxy-timing {
|
||||||
|
|
|
@ -119,19 +119,22 @@ ul#requests-extra_proxies {
|
||||||
#request label[for=proxy] {
|
#request label[for=proxy] {
|
||||||
display: inline-block; }
|
display: inline-block; }
|
||||||
|
|
||||||
body.proxy-check-active #request .proxy-status {
|
body.proxy-check-active #request {
|
||||||
width: 2em; }
|
/*
|
||||||
|
.proxy-status {
|
||||||
body.proxy-check-active #request .proxy-check-details {
|
width: 2em;
|
||||||
font-size: 80%;
|
}
|
||||||
color: #555;
|
*/ }
|
||||||
display: block;
|
body.proxy-check-active #request .proxy-check-details {
|
||||||
padding-left: 4em; }
|
font-size: 80%;
|
||||||
|
color: #555;
|
||||||
body.proxy-check-active #request .proxy-timing {
|
display: block;
|
||||||
font-size: 80%;
|
padding-left: 2em;
|
||||||
padding-left: 1rem;
|
max-width: 500px; }
|
||||||
color: var(--color-link); }
|
body.proxy-check-active #request .proxy-timing {
|
||||||
|
font-size: 80%;
|
||||||
|
padding-left: 1rem;
|
||||||
|
color: var(--color-link); }
|
||||||
|
|
||||||
#recommended-proxy {
|
#recommended-proxy {
|
||||||
display: grid;
|
display: grid;
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
|
# `subtractive_selectors` should still work in `source:` type requests
|
||||||
|
def test_fetch_pdf(client, live_server, measure_memory_usage):
|
||||||
|
import shutil
|
||||||
|
shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
|
||||||
|
|
||||||
|
live_server_setup(live_server)
|
||||||
|
test_url = url_for('test_pdf_endpoint', _external=True)
|
||||||
|
# Add our URL to the import page
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# PDF header should not be there (it was converted to text)
|
||||||
|
assert b'PDF' not in res.data[:10]
|
||||||
|
assert b'hello world' in res.data
|
||||||
|
|
||||||
|
# So we know if the file changes in other ways
|
||||||
|
import hashlib
|
||||||
|
original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
|
||||||
|
# We should have one
|
||||||
|
assert len(original_md5) > 0
|
||||||
|
# And it's going to be in the document
|
||||||
|
assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data
|
||||||
|
|
||||||
|
shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
|
||||||
|
changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
|
||||||
|
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
assert b'1 watches queued for rechecking.' in res.data
|
||||||
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Now something should be ready, indicated by having a 'unviewed' class
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
# The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
|
||||||
|
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert original_md5.encode('utf-8') not in res.data
|
||||||
|
assert changed_md5.encode('utf-8') in res.data
|
||||||
|
|
||||||
|
res = client.get(
|
||||||
|
url_for("diff_history_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert original_md5.encode('utf-8') in res.data
|
||||||
|
assert changed_md5.encode('utf-8') in res.data
|
||||||
|
|
||||||
|
assert b'here is a change' in res.data
|
Ładowanie…
Reference in New Issue