kopia lustrzana https://github.com/dgtlmoon/changedetection.io
* Global setting option to ignore whitespace when detecting a changepull/344/head^2
rodzic
489671dcca
commit
b5c1fce136
|
@ -405,7 +405,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
# Get the most recent one
|
# Get the most recent one
|
||||||
newest_history_key = datastore.get_val(uuid, 'newest_history_key')
|
newest_history_key = datastore.get_val(uuid, 'newest_history_key')
|
||||||
|
|
||||||
# 0 means that theres only one, so that there should be no 'unviewed' history availabe
|
# 0 means that theres only one, so that there should be no 'unviewed' history available
|
||||||
if newest_history_key == 0:
|
if newest_history_key == 0:
|
||||||
newest_history_key = list(datastore.data['watching'][uuid]['history'].keys())[0]
|
newest_history_key = list(datastore.data['watching'][uuid]['history'].keys())[0]
|
||||||
|
|
||||||
|
@ -418,7 +418,11 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
stripped_content = handler.strip_ignore_text(raw_content,
|
stripped_content = handler.strip_ignore_text(raw_content,
|
||||||
datastore.data['watching'][uuid]['ignore_text'])
|
datastore.data['watching'][uuid]['ignore_text'])
|
||||||
|
|
||||||
checksum = hashlib.md5(stripped_content).hexdigest()
|
if datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||||
|
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
|
||||||
|
else:
|
||||||
|
checksum = hashlib.md5(stripped_content).hexdigest()
|
||||||
|
|
||||||
return checksum
|
return checksum
|
||||||
|
|
||||||
return datastore.data['watching'][uuid]['previous_md5']
|
return datastore.data['watching'][uuid]['previous_md5']
|
||||||
|
@ -553,6 +557,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
form.minutes_between_check.data = int(datastore.data['settings']['requests']['minutes_between_check'])
|
form.minutes_between_check.data = int(datastore.data['settings']['requests']['minutes_between_check'])
|
||||||
form.notification_urls.data = datastore.data['settings']['application']['notification_urls']
|
form.notification_urls.data = datastore.data['settings']['application']['notification_urls']
|
||||||
form.global_ignore_text.data = datastore.data['settings']['application']['global_ignore_text']
|
form.global_ignore_text.data = datastore.data['settings']['application']['global_ignore_text']
|
||||||
|
form.ignore_whitespace.data = datastore.data['settings']['application']['ignore_whitespace']
|
||||||
form.extract_title_as_title.data = datastore.data['settings']['application']['extract_title_as_title']
|
form.extract_title_as_title.data = datastore.data['settings']['application']['extract_title_as_title']
|
||||||
form.fetch_backend.data = datastore.data['settings']['application']['fetch_backend']
|
form.fetch_backend.data = datastore.data['settings']['application']['fetch_backend']
|
||||||
form.notification_title.data = datastore.data['settings']['application']['notification_title']
|
form.notification_title.data = datastore.data['settings']['application']['notification_title']
|
||||||
|
@ -580,7 +585,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
|
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
|
||||||
datastore.data['settings']['application']['base_url'] = form.base_url.data
|
datastore.data['settings']['application']['base_url'] = form.base_url.data
|
||||||
datastore.data['settings']['application']['global_ignore_text'] = form.global_ignore_text.data
|
datastore.data['settings']['application']['global_ignore_text'] = form.global_ignore_text.data
|
||||||
|
datastore.data['settings']['application']['ignore_whitespace'] = form.ignore_whitespace.data
|
||||||
|
|
||||||
if form.trigger_check.data:
|
if form.trigger_check.data:
|
||||||
if len(form.notification_urls.data):
|
if len(form.notification_urls.data):
|
||||||
n_object = {'watch_url': "Test from changedetection.io!",
|
n_object = {'watch_url': "Test from changedetection.io!",
|
||||||
|
|
|
@ -58,8 +58,7 @@ class perform_site_check():
|
||||||
|
|
||||||
watch = self.datastore.data['watching'][uuid]
|
watch = self.datastore.data['watching'][uuid]
|
||||||
|
|
||||||
update_obj = {'previous_md5': self.datastore.data['watching'][uuid]['previous_md5'],
|
update_obj = {
|
||||||
'history': {},
|
|
||||||
"last_checked": timestamp
|
"last_checked": timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,8 +136,16 @@ class perform_site_check():
|
||||||
else:
|
else:
|
||||||
stripped_text_from_html = stripped_text_from_html.encode('utf8')
|
stripped_text_from_html = stripped_text_from_html.encode('utf8')
|
||||||
|
|
||||||
|
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||||
|
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||||
|
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||||
|
else:
|
||||||
|
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
|
||||||
|
|
||||||
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
|
# On the first run of a site, watch['previous_md5'] will be an empty string, set it the current one.
|
||||||
|
if not len(watch['previous_md5']):
|
||||||
|
watch['previous_md5'] = fetched_md5
|
||||||
|
update_obj["previous_md5"] = fetched_md5
|
||||||
|
|
||||||
blocked_by_not_found_trigger_text = False
|
blocked_by_not_found_trigger_text = False
|
||||||
|
|
||||||
|
@ -160,16 +167,12 @@ class perform_site_check():
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
# could be None or False depending on JSON type
|
|
||||||
# On the first run of a site, watch['previous_md5'] will be an empty string
|
|
||||||
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
|
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
|
||||||
changed_detected = True
|
changed_detected = True
|
||||||
|
|
||||||
# Don't confuse people by updating as last-changed, when it actually just changed from None..
|
|
||||||
if self.datastore.get_val(uuid, 'previous_md5'):
|
|
||||||
update_obj["last_changed"] = timestamp
|
|
||||||
|
|
||||||
update_obj["previous_md5"] = fetched_md5
|
update_obj["previous_md5"] = fetched_md5
|
||||||
|
update_obj["last_changed"] = timestamp
|
||||||
|
|
||||||
|
|
||||||
# Extract title as title
|
# Extract title as title
|
||||||
if is_html:
|
if is_html:
|
||||||
|
|
|
@ -258,4 +258,5 @@ class globalSettingsForm(commonSettingsForm):
|
||||||
[validators.NumberRange(min=1)])
|
[validators.NumberRange(min=1)])
|
||||||
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title')
|
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title')
|
||||||
base_url = StringField('Base URL', validators=[validators.Optional()])
|
base_url = StringField('Base URL', validators=[validators.Optional()])
|
||||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||||
|
ignore_whitespace = BooleanField('Ignore whitespace')
|
|
@ -46,6 +46,7 @@ class ChangeDetectionStore:
|
||||||
'extract_title_as_title': False,
|
'extract_title_as_title': False,
|
||||||
'fetch_backend': 'html_requests',
|
'fetch_backend': 'html_requests',
|
||||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
|
'ignore_whitespace': False,
|
||||||
'notification_urls': [], # Apprise URL list
|
'notification_urls': [], # Apprise URL list
|
||||||
# Custom notification content
|
# Custom notification content
|
||||||
'notification_title': None,
|
'notification_title': None,
|
||||||
|
|
|
@ -69,15 +69,24 @@
|
||||||
|
|
||||||
|
|
||||||
<div class="tab-pane-inner" id="filters">
|
<div class="tab-pane-inner" id="filters">
|
||||||
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span>
|
|
||||||
<fieldset class="pure-group">
|
<fieldset class="pure-group">
|
||||||
|
{{ render_field(form.ignore_whitespace) }}
|
||||||
|
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br/>
|
||||||
|
<i>Note:</i> Changing this will change the status of your existing watches, possibily trigger alerts etc.
|
||||||
|
</span>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
|
|
||||||
|
<fieldset class="pure-group">
|
||||||
{{ render_field(form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
|
{{ render_field(form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
|
||||||
/some.regex\d{2}/ for case-INsensitive regex
|
/some.regex\d{2}/ for case-INsensitive regex
|
||||||
") }}
|
") }}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br/>
|
||||||
Each line processed separately, any line matching will be ignored.<br/>
|
<span class="pure-form-message-inline">Each line processed separately, any line matching will be ignored.<br/>
|
||||||
Regular Expression support, wrap the line in forward slash <b>/regex/</b>.
|
Regular Expression support, wrap the line in forward slash <b>/regex/</b>.
|
||||||
</span>
|
</span>
|
||||||
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="actions">
|
<div id="actions">
|
||||||
|
|
|
@ -18,7 +18,8 @@ def cleanup(datastore_path):
|
||||||
'url-watches.json',
|
'url-watches.json',
|
||||||
'notification.txt',
|
'notification.txt',
|
||||||
'count.txt',
|
'count.txt',
|
||||||
'endpoint-content.txt']
|
'endpoint-content.txt'
|
||||||
|
]
|
||||||
for file in files:
|
for file in files:
|
||||||
try:
|
try:
|
||||||
os.unlink("{}/{}".format(datastore_path, file))
|
os.unlink("{}/{}".format(datastore_path, file))
|
||||||
|
|
|
@ -235,4 +235,4 @@ def test_check_global_ignore_text_functionality(client, live_server):
|
||||||
assert b'unviewed' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||||
assert b'Deleted' in res.data
|
assert b'Deleted' in res.data
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from . util import live_server_setup
|
||||||
|
|
||||||
|
def test_setup(live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
|
||||||
|
# Should be the same as set_original_ignore_response() but with a little more whitespacing
|
||||||
|
def set_original_ignore_response_but_with_whitespace():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text</br>
|
||||||
|
<p>
|
||||||
|
|
||||||
|
|
||||||
|
Which is across multiple lines</p>
|
||||||
|
<br>
|
||||||
|
</br>
|
||||||
|
|
||||||
|
So let's see what happens. </br>
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
||||||
|
"""
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
|
def set_original_ignore_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text</br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
</br>
|
||||||
|
So let's see what happens. </br>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||||
|
def test_check_ignore_whitespace(client, live_server):
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
set_original_ignore_response()
|
||||||
|
|
||||||
|
# Goto the settings page, add our ignore text
|
||||||
|
res = client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={
|
||||||
|
"minutes_between_check": 180,
|
||||||
|
"ignore_whitespace": "y",
|
||||||
|
'fetch_backend': "html_requests"
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Settings updated." in res.data
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
set_original_ignore_response_but_with_whitespace()
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
assert b'/test-endpoint' in res.data
|
|
@ -64,74 +64,77 @@ class update_worker(threading.Thread):
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if update_obj:
|
try:
|
||||||
try:
|
watch = self.datastore.data['watching'][uuid]
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
|
||||||
if changed_detected:
|
|
||||||
n_object = {}
|
|
||||||
# A change was detected
|
|
||||||
fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents)
|
|
||||||
|
|
||||||
# Update history with the stripped text for future reference, this will also mean we save the first
|
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||||
# Should always be keyed by string(timestamp)
|
if changed_detected or not watch['last_checked']:
|
||||||
self.datastore.update_watch(uuid, {"history": {str(update_obj["last_checked"]): fname}})
|
# A change was detected
|
||||||
|
fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents)
|
||||||
|
# Should always be keyed by string(timestamp)
|
||||||
|
self.datastore.update_watch(uuid, {"history": {str(update_obj["last_checked"]): fname}})
|
||||||
|
|
||||||
watch = self.datastore.data['watching'][uuid]
|
# Generally update anything interesting returned
|
||||||
|
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||||
|
|
||||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
# A change was detected
|
||||||
|
if changed_detected:
|
||||||
|
n_object = {}
|
||||||
|
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||||
|
|
||||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||||
if len(watch['history']) > 1:
|
if len(watch['history']) > 1:
|
||||||
|
|
||||||
dates = list(watch['history'].keys())
|
dates = list(watch['history'].keys())
|
||||||
# Convert to int, sort and back to str again
|
# Convert to int, sort and back to str again
|
||||||
# @todo replace datastore getter that does this automatically
|
# @todo replace datastore getter that does this automatically
|
||||||
dates = [int(i) for i in dates]
|
dates = [int(i) for i in dates]
|
||||||
dates.sort(reverse=True)
|
dates.sort(reverse=True)
|
||||||
dates = [str(i) for i in dates]
|
dates = [str(i) for i in dates]
|
||||||
|
|
||||||
prev_fname = watch['history'][dates[1]]
|
prev_fname = watch['history'][dates[1]]
|
||||||
|
|
||||||
|
|
||||||
# Did it have any notification alerts to hit?
|
# Did it have any notification alerts to hit?
|
||||||
if len(watch['notification_urls']):
|
if len(watch['notification_urls']):
|
||||||
print(">>> Notifications queued for UUID from watch {}".format(uuid))
|
print(">>> Notifications queued for UUID from watch {}".format(uuid))
|
||||||
n_object['notification_urls'] = watch['notification_urls']
|
n_object['notification_urls'] = watch['notification_urls']
|
||||||
n_object['notification_title'] = watch['notification_title']
|
n_object['notification_title'] = watch['notification_title']
|
||||||
n_object['notification_body'] = watch['notification_body']
|
n_object['notification_body'] = watch['notification_body']
|
||||||
n_object['notification_format'] = watch['notification_format']
|
n_object['notification_format'] = watch['notification_format']
|
||||||
|
|
||||||
# No? maybe theres a global setting, queue them all
|
# No? maybe theres a global setting, queue them all
|
||||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
||||||
print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid))
|
print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid))
|
||||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
||||||
n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
|
n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
|
||||||
n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body']
|
n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body']
|
||||||
n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format']
|
n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format']
|
||||||
|
else:
|
||||||
|
print(">>> NO notifications queued, watch and global notification URLs were empty.")
|
||||||
|
|
||||||
|
# Only prepare to notify if the rules above matched
|
||||||
|
if 'notification_urls' in n_object:
|
||||||
|
# HTML needs linebreak, but MarkDown and Text can use a linefeed
|
||||||
|
if n_object['notification_format'] == 'HTML':
|
||||||
|
line_feed_sep = "</br>"
|
||||||
else:
|
else:
|
||||||
print(">>> NO notifications queued, watch and global notification URLs were empty.")
|
line_feed_sep = "\n"
|
||||||
|
|
||||||
# Only prepare to notify if the rules above matched
|
from changedetectionio import diff
|
||||||
if 'notification_urls' in n_object:
|
n_object.update({
|
||||||
# HTML needs linebreak, but MarkDown and Text can use a linefeed
|
'watch_url': watch['url'],
|
||||||
if n_object['notification_format'] == 'HTML':
|
'uuid': uuid,
|
||||||
line_feed_sep = "</br>"
|
'current_snapshot': contents.decode('utf-8'),
|
||||||
else:
|
'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
|
||||||
line_feed_sep = "\n"
|
'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
|
||||||
|
})
|
||||||
|
|
||||||
from changedetectionio import diff
|
self.notification_q.put(n_object)
|
||||||
n_object.update({
|
|
||||||
'watch_url': watch['url'],
|
|
||||||
'uuid': uuid,
|
|
||||||
'current_snapshot': contents.decode('utf-8'),
|
|
||||||
'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
|
|
||||||
'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
|
|
||||||
})
|
|
||||||
|
|
||||||
self.notification_q.put(n_object)
|
except Exception as e:
|
||||||
|
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||||
except Exception as e:
|
print("!!!! Exception in update_worker !!!\n", e)
|
||||||
print("!!!! Exception in update_worker !!!\n", e)
|
|
||||||
|
|
||||||
self.current_uuid = None # Done
|
self.current_uuid = None # Done
|
||||||
self.q.task_done()
|
self.q.task_done()
|
||||||
|
|
Ładowanie…
Reference in New Issue