From fbd9ecab62c1a400d2a9540ddf15e657fc0f1187 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 2 Jan 2022 22:35:04 +0100 Subject: [PATCH] Re #340 - snapshot should not be modified by ignore text (#344) --- changedetectionio/fetch_site_status.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 0a957114..82108306 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -122,6 +122,9 @@ class perform_site_check(): # get_text() via inscriptis stripped_text_from_html = get_text(html_content) + # Re #340 - return the content before the 'ignore text' was applied + text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') + # We rely on the actual text in the html output.. many sites have random script vars etc, # in the future we'll implement other mechanisms. @@ -181,4 +184,4 @@ class perform_site_check(): update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content) - return changed_detected, update_obj, stripped_text_from_html + return changed_detected, update_obj, text_content_before_ignored_filter