diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index d7a4f835..dab956a5 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -77,11 +77,13 @@ class ScreenshotUnavailable(Exception): class ReplyWithContentButNoText(Exception): - def __init__(self, status_code, url, screenshot=None): + def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url self.screenshot = screenshot + self.has_filters = has_filters + self.html_content = html_content return diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py index fb810f0c..5e69a591 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -314,7 +314,12 @@ class perform_site_check(difference_detection_processor): # Treat pages with no renderable text content as a change? No by default empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: - raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot) + raise content_fetcher.ReplyWithContentButNoText(url=url, + status_code=fetcher.get_last_status_code(), + screenshot=screenshot, + has_filters=has_filter_rule, + html_content=html_content + ) # We rely on the actual text in the html output.. many sites have random script vars etc, # in the future we'll implement other mechanisms. diff --git a/changedetectionio/tests/test_css_selector.py b/changedetectionio/tests/test_css_selector.py index 0dfe2af7..dcc10331 100644 --- a/changedetectionio/tests/test_css_selector.py +++ b/changedetectionio/tests/test_css_selector.py @@ -2,7 +2,7 @@ import time from flask import url_for -from . util import live_server_setup +from .util import live_server_setup, wait_for_all_checks from ..html_tools import * @@ -176,3 +176,77 @@ def test_check_multiple_filters(client, live_server): assert b"Blob A" in res.data # CSS was ok assert b"Blob B" in res.data # xPath was ok assert b"Blob C" not in res.data # Should not be included + +# The filter exists, but did not contain anything useful +# Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector +# Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text +def test_filter_is_empty_help_suggestion(client, live_server): + #live_server_setup(live_server) + + include_filters = "#blob-a" + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write("""
+