Ability to visualise trigger and filter rules against the current snapshot on the preview page

2022-02-23 10:49:25 +01:00 · 2022-02-23 10:49:25 +01:00 · 014fda9058
commit 014fda9058
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -35,6 +35,7 @@ from flask import (
    url_for,
 )
 from flask_login import login_required
+from changedetectionio import html_tools

 __version__ = '0.39.8'

@ -441,7 +442,7 @@ def changedetection_app(config=None, datastore_o=None):
                raw_content = file.read()

                handler = fetch_site_status.perform_site_check(datastore=datastore)
-                stripped_content = handler.strip_ignore_text(raw_content,
+                stripped_content = html_tools.strip_ignore_text(raw_content,
                                                             datastore.data['watching'][uuid]['ignore_text'])

                if datastore.data['settings']['application'].get('ignore_whitespace', False):
@ -546,10 +547,14 @@ def changedetection_app(config=None, datastore_o=None):
                    flash('No notification URLs set, cannot send test.', 'error')

            # Diff page [edit] link should go back to diff page
-            if request.args.get("next") and request.args.get("next") == 'diff':
+            if request.args.get("next") and request.args.get("next") == 'diff' and not form.save_and_preview_button.data:
                return redirect(url_for('diff_history_page', uuid=uuid))
            else:
-                return redirect(url_for('index'))
+                if form.save_and_preview_button.data:
+                    flash('You may need to reload this page to see the new content.')
+                    return redirect(url_for('preview_page', uuid=uuid))
+                else:
+                    return redirect(url_for('index'))

        else:
            if request.method == 'POST' and not form.validate():
@ -721,8 +726,12 @@ def changedetection_app(config=None, datastore_o=None):
        # Save the current newest history as the most recently viewed
        datastore.set_last_viewed(uuid, dates[0])
        newest_file = watch['history'][dates[0]]
-        with open(newest_file, 'r') as f:
-            newest_version_file_contents = f.read()
+
+        try:
+            with open(newest_file, 'r') as f:
+                newest_version_file_contents = f.read()
+        except Exception as e:
+            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)

        previous_version = request.args.get('previous_version')
        try:
@ -731,8 +740,11 @@ def changedetection_app(config=None, datastore_o=None):
            # Not present, use a default value, the second one in the sorted list.
            previous_file = watch['history'][dates[1]]

-        with open(previous_file, 'r') as f:
-            previous_version_file_contents = f.read()
+        try:
+            with open(previous_file, 'r') as f:
+                previous_version_file_contents = f.read()
+        except Exception as e:
+            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)

        output = render_template("diff.html", watch_a=watch,
                                 newest=newest_version_file_contents,
@ -751,6 +763,7 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route("/preview/<string:uuid>", methods=['GET'])
    @login_required
    def preview_page(uuid):
+        content = []

        # More for testing, possible to return the first/only
        if uuid == 'first':
@ -764,14 +777,38 @@ def changedetection_app(config=None, datastore_o=None):
            flash("No history found for the specified link, bad link?", "error")
            return redirect(url_for('index'))

-        newest = list(watch['history'].keys())[-1]
-        with open(watch['history'][newest], 'r') as f:
-            content = f.readlines()
+        if len(watch['history']):
+            timestamps = sorted(watch['history'].keys(), key=lambda x: int(x))
+            filename = watch['history'][timestamps[-1]]
+            try:
+                with open(filename, 'r') as f:
+                    content = f.readlines()
+            except:
+                content.append("File doesnt exist or unable to read file {}".format(filename))
+        else:
+            content.append("No history found")
+
+        # Get what needs to be highlighted
+        ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
+
+        # .readlines will keep the \n, but we will parse it here again, in the future tidy this up
+        ignored_line_numbers = html_tools.strip_ignore_text(content="".join(content),
+                                                            wordlist=ignore_rules,
+                                                            mode='line numbers'
+                                                            )
+
+        trigger_line_numbers = html_tools.strip_ignore_text(content="".join(content),
+                                                            wordlist=watch['trigger_text'],
+                                                            mode='line numbers'
+                                                            )

        output = render_template("preview.html",
                                 content=content,
                                 extra_stylesheets=extra_stylesheets,
+                                 ignored_line_numbers=ignored_line_numbers,
+                                 triggered_line_numbers=trigger_line_numbers,
                                 current_diff_url=watch['url'],
+                                 watch=watch,
                                 uuid=uuid)
        return output

--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@ -1,5 +1,6 @@
 import time
 from changedetectionio import content_fetcher
+from changedetectionio import html_tools
 import hashlib
 from inscriptis import get_text
 import urllib3
@ -16,40 +17,6 @@ class perform_site_check():
        super().__init__(*args, **kwargs)
        self.datastore = datastore

-    def strip_ignore_text(self, content, list_ignore_text):
-        import re
-        ignore = []
-        ignore_regex = []
-        for k in list_ignore_text:
-
-            # Is it a regex?
-            if k[0] == '/':
-                ignore_regex.append(k.strip(" /"))
-            else:
-                ignore.append(k)
-
-        output = []
-        for line in content.splitlines():
-
-            # Always ignore blank lines in this mode. (when this function gets called)
-            if len(line.strip()):
-                regex_matches = False
-
-                # if any of these match, skip
-                for regex in ignore_regex:
-                    try:
-                        if re.search(regex, line, re.IGNORECASE):
-                            regex_matches = True
-                    except Exception as e:
-                        continue
-
-                if not regex_matches and not any(skip_text in line for skip_text in ignore):
-                    output.append(line.encode('utf8'))
-
-        return "\n".encode('utf8').join(output)
-
-
-
    def run(self, uuid):
        timestamp = int(time.time())  # used for storage etc too

@ -147,7 +114,7 @@ class perform_site_check():
            # @todo we could abstract out the get_text() to handle this cleaner
            text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
            if len(text_to_ignore):
-                stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore)
+                stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
            else:
                stripped_text_from_html = stripped_text_from_html.encode('utf8')

@ -165,22 +132,14 @@ class perform_site_check():
            blocked_by_not_found_trigger_text = False

            if len(watch['trigger_text']):
+                # Yeah, lets block first until something matches
                blocked_by_not_found_trigger_text = True
-                for line in watch['trigger_text']:
-                    # Because JSON wont serialize a re.compile object
-                    if line[0] == '/' and line[-1] == '/':
-                        regex = re.compile(line.strip('/'), re.IGNORECASE)
-                        # Found it? so we don't wait for it anymore
-                        r = re.search(regex, str(stripped_text_from_html))
-                        if r:
-                            blocked_by_not_found_trigger_text = False
-                            break
-
-                    elif line.lower() in str(stripped_text_from_html).lower():
-                        # We found it don't wait for it.
-                        blocked_by_not_found_trigger_text = False
-                        break
-
+                # Filter and trigger works the same, so reuse it
+                result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
+                                                      wordlist=watch['trigger_text'],
+                                                      mode="line numbers")
+                if result:
+                    blocked_by_not_found_trigger_text = False


            if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -1,6 +1,7 @@
 from wtforms import Form, SelectField, RadioField, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \
    Field
-from wtforms import widgets
+
+from wtforms import widgets, SubmitField
 from wtforms.validators import ValidationError
 from wtforms.fields import html5
 from changedetectionio import content_fetcher
@ -290,6 +291,9 @@ class watchForm(commonSettingsForm):
    method = SelectField('Request Method', choices=valid_method, default=default_method)
    trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])

+    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
+    save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
+
    def validate(self, **kwargs):
        if not super().validate():
            return False
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@ -1,7 +1,7 @@
 import json
 from bs4 import BeautifulSoup
 from jsonpath_ng.ext import parse
-
+import re

 class JSONNotFound(ValueError):
    def __init__(self, msg):
@ -105,3 +105,50 @@ def extract_json_as_string(content, jsonpath_filter):
        return ''

    return stripped_text_from_html
+
+# Mode     - "content" return the content without the matches (default)
+#          - "line numbers" return a list of line numbers that match (int list)
+#
+# wordlist - list of regex's (str) or words (str)
+def strip_ignore_text(content, wordlist, mode="content"):
+    ignore = []
+    ignore_regex = []
+
+    # @todo check this runs case insensitive
+    for k in wordlist:
+
+        # Is it a regex?
+        if k[0] == '/':
+            ignore_regex.append(k.strip(" /"))
+        else:
+            ignore.append(k)
+
+    i = 0
+    output = []
+    ignored_line_numbers = []
+    for line in content.splitlines():
+        i += 1
+        # Always ignore blank lines in this mode. (when this function gets called)
+        if len(line.strip()):
+            regex_matches = False
+
+            # if any of these match, skip
+            for regex in ignore_regex:
+                try:
+                    if re.search(regex, line, re.IGNORECASE):
+                        regex_matches = True
+                except Exception as e:
+                    continue
+
+            if not regex_matches and not any(skip_text in line for skip_text in ignore):
+                output.append(line.encode('utf8'))
+            else:
+                ignored_line_numbers.append(i)
+
+
+
+    # Used for finding out what to highlight
+    if mode == "line numbers":
+        return ignored_line_numbers
+
+    return "\n".encode('utf8').join(output)
--- a/changedetectionio/static/styles/diff.css
+++ b/changedetectionio/static/styles/diff.css
@ -54,3 +54,19 @@ ins {
  body {
    height: 99%;
    /* Hide scroll bar in Firefox */ } }
+
+td#diff-col div {
+  text-align: justify;
+  white-space: pre-wrap; }
+
+.ignored {
+  background-color: #ccc;
+  /*  border: #0d91fa 1px solid; */
+  opacity: 0.7; }
+
+.triggered {
+  background-color: #1b98f8; }
+
+/* ignored and triggered? make it obvious error */
+.ignored.triggered {
+  background-color: #ff0000; }
--- a/changedetectionio/static/styles/diff.scss
+++ b/changedetectionio/static/styles/diff.scss
@ -66,3 +66,23 @@ ins {
 		height: 99%; /* Hide scroll bar in Firefox */
 	}
 }
+
+td#diff-col div {
+    text-align: justify;
+    white-space: pre-wrap;
+}
+
+.ignored {
+    background-color: #ccc;
+   /*  border: #0d91fa 1px solid; */
+    opacity: 0.7;
+}
+
+.triggered {
+    background-color: #1b98f8;
+}
+
+/* ignored and triggered? make it obvious error */
+.ignored.triggered {
+  background-color: #ff0000;
+}
--- a/changedetectionio/static/styles/package.json
+++ b/changedetectionio/static/styles/package.json
@ -4,8 +4,7 @@
  "description": "",
  "main": "index.js",
  "scripts": {
-    "build": "node-sass styles.scss diff.scss -o .",
-    "watch": "node-sass --watch styles.scss diff.scss -o ."
+    "build": "node-sass styles.scss -o .;node-sass diff.scss -o ."
  },
  "author": "",
  "license": "ISC",
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
--- a/changedetectionio/static/styles/styles.scss
+++ b/changedetectionio/static/styles/styles.scss
@ -567,3 +567,8 @@ $form-edge-padding: 20px;
  }
 }

+ul {
+    padding-left: 1em;
+    padding-top: 0px;
+    margin-top: 4px;
+}
--- a/changedetectionio/templates/_helpers.jinja
+++ b/changedetectionio/templates/_helpers.jinja
@ -25,3 +25,6 @@
 {% endmacro %}


+{% macro render_button(field) %}
+  {{ field(**kwargs)|safe }}
+{% endmacro %}
--- a/changedetectionio/templates/diff.html
+++ b/changedetectionio/templates/diff.html
@ -36,6 +36,7 @@
    <a onclick="next_diff();">Jump</a>
 </div>
 <div id="diff-ui">
+    <div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
    <table>
        <tbody>
        <tr>
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@ -1,6 +1,7 @@
 {% extends 'base.html' %}
 {% block content %}
 {% from '_helpers.jinja' import render_field %}
+{% from '_helpers.jinja' import render_button %}
 {% from '_common_fields.jinja' import render_common_settings_form %}
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>

@ -88,6 +89,18 @@ User-Agent: wonderbra 1.0") }}

            <div class="tab-pane-inner" id="filters-and-triggers">
                <fieldset>
+                        <div class="pure-control-group">
+                            <strong>Pro-tips:</strong><br/>
+                            <ul>
+                                <li>
+                                    Use the preview page to see your filters and triggers highlighted.
+                                </li>
+                                <li>
+                                    Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a>
+                                </li>
+                            </ul>
+                    </div>
+
                    <div class="pure-control-group">
                        {{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
                        class="m-d") }}
@ -114,6 +127,7 @@ User-Agent: wonderbra 1.0") }}
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
+                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
                </span>

@ -138,7 +152,8 @@ User-Agent: wonderbra 1.0") }}
            <div id="actions">
                <div class="pure-control-group">

-                    <button type="submit" class="pure-button pure-button-primary">Save</button>
+                      {{ render_button(form.save_button) }} {{ render_button(form.save_and_preview_button) }}
+
                    <a href="{{url_for('api_delete', uuid=uuid)}}"
                       class="pure-button button-small button-error ">Delete</a>
                    <a href="{{url_for('api_clone', uuid=uuid)}}"
--- a/changedetectionio/templates/preview.html
+++ b/changedetectionio/templates/preview.html
@ -3,19 +3,24 @@
 {% block content %}

 <div id="settings">
-    <h1>Current</h1>
+    <h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1>
 </div>

 <div id="diff-ui">
+    <span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
    <table>
        <tbody>
        <tr>
            <td id="diff-col">
-                <span id="result">{% for row in content %}{{row}}{% endfor %}</span>
+                    {% for row in content %}
+                        {% set classes = [] %}
+                           {% if (loop.index in ignored_line_numbers) %}{{ classes.append("ignored") }}{% endif %}
+                           {% if (loop.index in triggered_line_numbers) %}{{ classes.append("triggered") }}{% endif %}
+                    <div class="{{ classes|join(' ') }}">{{row}}</div>
+                    {% endfor %}
            </td>
        </tr>
        </tbody>
    </table>
 </div>
-
 {% endblock %}
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@ -95,6 +95,7 @@
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
+                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
                     </span>
                    </fieldset>
--- a/changedetectionio/tests/test_ignore_regex_text.py
+++ b/changedetectionio/tests/test_ignore_regex_text.py
@ -3,6 +3,7 @@
 import time
 from flask import url_for
 from . util import live_server_setup
+from changedetectionio import html_tools

 def test_setup(live_server):
    live_server_setup(live_server)
@ -23,7 +24,7 @@ def test_strip_regex_text_func():
    ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]

    fetcher = fetch_site_status.perform_site_check(datastore=False)
-    stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines)
+    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)

    assert b"but 1 lines" in stripped_content
    assert b"igNORe-cAse text" not in stripped_content
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@ -3,6 +3,7 @@
 import time
 from flask import url_for
 from . util import live_server_setup
+from changedetectionio import html_tools

 def test_setup(live_server):
    live_server_setup(live_server)
@ -23,7 +24,7 @@ def test_strip_text_func():
    ignore_lines = ["sometimes"]

    fetcher = fetch_site_status.perform_site_check(datastore=False)
-    stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines)
+    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)

    assert b"sometimes" not in stripped_content
    assert b"Some content" in stripped_content
@ -52,6 +53,8 @@ def set_modified_original_ignore_response():
     <p>Which is across multiple lines</p>
     </br>
     So let's see what happens.  </br>
+     <p>new ignore stuff</p>
+     <p>blah</p>
     </body>
     </html>

@ -82,7 +85,7 @@ def set_modified_ignore_response():
 def test_check_ignore_text_functionality(client, live_server):
    sleep_time_for_fetch_thread = 3

-    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
+    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\nnew ignore stuff"
    set_original_ignore_response()

    # Give the endpoint time to spin up
@ -142,13 +145,25 @@ def test_check_ignore_text_functionality(client, live_server):
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data

+
+
+
+
    # Just to be sure.. set a regular modified change..
    set_modified_original_ignore_response()
    client.get(url_for("api_watch_checknow"), follow_redirects=True)
    time.sleep(sleep_time_for_fetch_thread)
+
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

+    # Check the preview/highlighter, we should be able to see what we ignored, but it should be highlighted
+    # We only introduce the "modified" content that includes what we ignore so we can prove the newest version also displays
+    # at /preview
+    res = client.get(url_for("preview_page", uuid="first"))
+    # We should be able to see what we ignored
+    assert b'<div class="ignored">new ignore stuff' in res.data
+
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

--- a/changedetectionio/tests/test_trigger.py
+++ b/changedetectionio/tests/test_trigger.py
@ -129,3 +129,8 @@ def test_trigger_functionality(client, live_server):
    time.sleep(sleep_time_for_fetch_thread)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
+
+    # Check the preview/highlighter, we should be able to see what we triggered on, but it should be highlighted
+    res = client.get(url_for("preview_page", uuid="first"))
+    # We should be able to see what we ignored
+    assert b'<div class="triggered">foobar' in res.data
--- a/changedetectionio/tests/test_xpath_selector.py
+++ b/changedetectionio/tests/test_xpath_selector.py
@ -96,6 +96,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data

+
 def test_xpath_validation(client, live_server):

    # Give the endpoint time to spin up