kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Send notification when CSS/xPath filter is missing after more than 6 (configurable) attempts (#771)
rodzic
c2fe5ae0d1
commit
a82fad7059
|
@ -349,6 +349,8 @@ class watchForm(commonSettingsForm):
|
|||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
|
||||
proxy = RadioField('Proxy')
|
||||
filter_failure_notification_send = BooleanField(
|
||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||
|
||||
def validate(self, **kwargs):
|
||||
if not super().validate():
|
||||
|
@ -387,6 +389,11 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
|
||||
password = SaltyPasswordField()
|
||||
|
||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should contain zero or more attempts")])
|
||||
|
||||
|
||||
class globalSettingsForm(Form):
|
||||
# Define these as FormFields/"sub forms", this way it matches the JSON storage
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import json
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -8,16 +7,23 @@ import re
|
|||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
||||
def css_filter(css_filter, html_content):
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
html_block = ""
|
||||
for item in soup.select(css_filter, separator=""):
|
||||
r = soup.select(css_filter, separator="")
|
||||
if len(r) == 0:
|
||||
raise FilterNotFoundInResponse(css_filter)
|
||||
for item in r:
|
||||
html_block += str(item)
|
||||
|
||||
return html_block + "\n"
|
||||
|
@ -42,8 +48,12 @@ def xpath_filter(xpath_filter, html_content):
|
|||
tree = html.fromstring(bytes(html_content, encoding='utf-8'))
|
||||
html_block = ""
|
||||
|
||||
for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}):
|
||||
html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>"
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
||||
if len(r) == 0:
|
||||
raise FilterNotFoundInResponse(css_filter)
|
||||
|
||||
for item in r:
|
||||
html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "<br/>"
|
||||
|
||||
return html_block
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@ from changedetectionio.notification import (
|
|||
default_notification_title,
|
||||
)
|
||||
|
||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
||||
|
||||
class model(dict):
|
||||
base_config = {
|
||||
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
||||
|
@ -30,6 +32,7 @@ class model(dict):
|
|||
'extract_title_as_title': False,
|
||||
'empty_pages_are_a_change': False,
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'ignore_whitespace': True,
|
||||
|
|
|
@ -41,6 +41,8 @@ class model(dict):
|
|||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
'fetch_backend': None,
|
||||
'filter_failure_notification_send': True,
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_title_as_title': False,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'proxy': None, # Preferred proxy connection
|
||||
|
|
|
@ -34,7 +34,6 @@ def process_notification(n_object, datastore):
|
|||
valid_notification_formats[default_notification_format],
|
||||
)
|
||||
|
||||
|
||||
# Insert variables into the notification content
|
||||
notification_parameters = create_notification_parameters(n_object, datastore)
|
||||
|
||||
|
|
|
@ -62,6 +62,12 @@
|
|||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.extract_title_as_title) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.filter_failure_notification_send) }}
|
||||
<span class="pure-form-message-inline">
|
||||
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
|
||||
</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -36,7 +36,13 @@
|
|||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
||||
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
|
||||
<br/>
|
||||
Set to <strong>0</strong> to disable
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{% if not hide_remove_pass %}
|
||||
{% if current_user.is_authenticated %}
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
import os
|
||||
import time
|
||||
import re
|
||||
from flask import url_for
|
||||
from .util import set_original_response, live_server_setup
|
||||
from changedetectionio.model import App
|
||||
|
||||
|
||||
def set_response_with_filter():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<div id="nope-doesnt-exist">Some text thats the same</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
|
||||
# Hard to just add more live server URLs when one test is already running (I think)
|
||||
# So we add our test here (was in a different file)
|
||||
def test_check_notification(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("form_watch_add"),
|
||||
data={"url": test_url, "tag": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
# Give the thread time to pick up the first version
|
||||
time.sleep(3)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
url = url_for('test_notification_endpoint', _external=True)
|
||||
notification_url = url.replace('http', 'json')
|
||||
|
||||
print(">>>> Notification URL: " + notification_url)
|
||||
|
||||
# Just a regular notification setting, this will be used by the special 'filter not found' notification
|
||||
notification_form_data = {"notification_urls": notification_url,
|
||||
"notification_title": "New ChangeDetection.io Notification - {watch_url}",
|
||||
"notification_body": "BASE URL: {base_url}\n"
|
||||
"Watch URL: {watch_url}\n"
|
||||
"Watch UUID: {watch_uuid}\n"
|
||||
"Watch title: {watch_title}\n"
|
||||
"Watch tag: {watch_tag}\n"
|
||||
"Preview: {preview_url}\n"
|
||||
"Diff URL: {diff_url}\n"
|
||||
"Snapshot: {current_snapshot}\n"
|
||||
"Diff: {diff}\n"
|
||||
"Diff Full: {diff_full}\n"
|
||||
":-)",
|
||||
"notification_format": "Text"}
|
||||
|
||||
notification_form_data.update({
|
||||
"url": test_url,
|
||||
"tag": "my tag",
|
||||
"title": "my title",
|
||||
"headers": "",
|
||||
"css_filter": '#nope-doesnt-exist',
|
||||
"fetch_backend": "html_requests"})
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data=notification_form_data,
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(3)
|
||||
|
||||
# Now the notification should not exist, because we didnt reach the threshold
|
||||
assert not os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(3)
|
||||
|
||||
# We should see something in the frontend
|
||||
assert b'Did the page change its layout' in res.data
|
||||
|
||||
# Now it should exist and contain our "filter not found" alert
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
notification = False
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
notification = f.read()
|
||||
assert 'CSS/xPath filter was not present in the page' in notification
|
||||
assert '#nope-doesnt-exist' in notification
|
||||
|
||||
# Remove it and prove that it doesnt trigger when not expected
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
set_response_with_filter()
|
||||
|
||||
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(3)
|
||||
|
||||
# It should have sent a notification, but..
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
# but it should not contain the info about the failed filter
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
notification = f.read()
|
||||
assert not 'CSS/xPath filter was not present in the page' in notification
|
||||
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
|
@ -3,6 +3,8 @@ import queue
|
|||
import time
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio.html_tools import FilterNotFoundInResponse
|
||||
|
||||
# A single update worker
|
||||
#
|
||||
# Requests for checking on a single site(watch) from a queue of watches
|
||||
|
@ -19,6 +21,32 @@ class update_worker(threading.Thread):
|
|||
self.datastore = datastore
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def send_filter_failure_notification(self, uuid):
|
||||
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
||||
watch = self.datastore.data['watching'].get(uuid, False)
|
||||
|
||||
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||
'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
||||
watch['css_filter'],
|
||||
threshold),
|
||||
'notification_format': 'text'}
|
||||
|
||||
if len(watch['notification_urls']):
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
|
||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
||||
|
||||
# Only prepare to notify if the rules above matched
|
||||
if 'notification_urls' in n_object:
|
||||
n_object.update({
|
||||
'watch_url': watch['url'],
|
||||
'uuid': uuid
|
||||
})
|
||||
self.notification_q.put(n_object)
|
||||
print("Sent filter not found notification for {}".format(uuid))
|
||||
|
||||
def run(self):
|
||||
from changedetectionio import fetch_site_status
|
||||
|
||||
|
@ -55,11 +83,23 @@ class update_worker(threading.Thread):
|
|||
except content_fetcher.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'):
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"})
|
||||
else:
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
|
||||
pass
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
|
||||
except FilterNotFoundInResponse as e:
|
||||
err_text = "Filter '{}' not found - Did the page change its layout?".format(str(e))
|
||||
c = 0
|
||||
if self.datastore.data['watching'].get(uuid, False):
|
||||
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
|
||||
c += 1
|
||||
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
if threshold >0 and c >= threshold:
|
||||
self.send_filter_failure_notification(uuid)
|
||||
c = 0
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'consecutive_filter_failures': c})
|
||||
except content_fetcher.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
|
@ -89,6 +129,7 @@ class update_worker(threading.Thread):
|
|||
fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
||||
|
||||
# Generally update anything interesting returned
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
# A change was detected
|
||||
|
|
Ładowanie…
Reference in New Issue