kopia lustrzana https://github.com/dgtlmoon/changedetection.io
rodzic
f87f7077a6
commit
59d31bf76f
|
@ -91,6 +91,8 @@ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/dat
|
||||||
```bash
|
```bash
|
||||||
docker-compose pull && docker-compose up -d
|
docker-compose pull && docker-compose up -d
|
||||||
```
|
```
|
||||||
|
### Filters
|
||||||
|
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||||
|
|
||||||
### Notifications
|
### Notifications
|
||||||
|
|
||||||
|
|
|
@ -114,15 +114,17 @@ class perform_site_check():
|
||||||
if 'json:' in css_filter_rule:
|
if 'json:' in css_filter_rule:
|
||||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
|
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
|
||||||
is_html = False
|
is_html = False
|
||||||
else:
|
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
|
||||||
stripped_text_from_html = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
|
||||||
|
|
||||||
if is_html:
|
if is_html:
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
html_content = fetcher.content
|
html_content = fetcher.content
|
||||||
if has_filter_rule:
|
if has_filter_rule:
|
||||||
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||||
|
if css_filter_rule[0] == '/':
|
||||||
|
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule, html_content=fetcher.content)
|
||||||
|
else:
|
||||||
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
|
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
||||||
|
|
||||||
# get_text() via inscriptis
|
# get_text() via inscriptis
|
||||||
stripped_text_from_html = get_text(html_content)
|
stripped_text_from_html = get_text(html_content)
|
||||||
|
|
|
@ -181,7 +181,7 @@ class ValidateListRegex(object):
|
||||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||||
raise ValidationError(message % (line))
|
raise ValidationError(message % (line))
|
||||||
|
|
||||||
class ValidateCSSJSONInput(object):
|
class ValidateCSSJSONXPATHInput(object):
|
||||||
"""
|
"""
|
||||||
Filter validation
|
Filter validation
|
||||||
@todo CSS validator ;)
|
@todo CSS validator ;)
|
||||||
|
@ -191,6 +191,24 @@ class ValidateCSSJSONInput(object):
|
||||||
self.message = message
|
self.message = message
|
||||||
|
|
||||||
def __call__(self, form, field):
|
def __call__(self, form, field):
|
||||||
|
|
||||||
|
# Nothing to see here
|
||||||
|
if not len(field.data.strip()):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Does it look like XPath?
|
||||||
|
if field.data.strip()[0] == '/':
|
||||||
|
from lxml import html, etree
|
||||||
|
tree = html.fromstring("<html></html>")
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree.xpath(field.data.strip())
|
||||||
|
except etree.XPathEvalError as e:
|
||||||
|
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||||
|
raise ValidationError(message % (field.data, str(e)))
|
||||||
|
except:
|
||||||
|
raise ValidationError("A system-error occurred when validating your XPath expression")
|
||||||
|
|
||||||
if 'json:' in field.data:
|
if 'json:' in field.data:
|
||||||
from jsonpath_ng.exceptions import JsonPathParserError, JsonPathLexerError
|
from jsonpath_ng.exceptions import JsonPathParserError, JsonPathLexerError
|
||||||
from jsonpath_ng.ext import parse
|
from jsonpath_ng.ext import parse
|
||||||
|
@ -202,6 +220,8 @@ class ValidateCSSJSONInput(object):
|
||||||
except (JsonPathParserError, JsonPathLexerError) as e:
|
except (JsonPathParserError, JsonPathLexerError) as e:
|
||||||
message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
|
message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
|
||||||
raise ValidationError(message % (input, str(e)))
|
raise ValidationError(message % (input, str(e)))
|
||||||
|
except:
|
||||||
|
raise ValidationError("A system-error occurred when validating your JSONPath expression")
|
||||||
|
|
||||||
# Re #265 - maybe in the future fetch the page and offer a
|
# Re #265 - maybe in the future fetch the page and offer a
|
||||||
# warning/notice that its possible the rule doesnt yet match anything?
|
# warning/notice that its possible the rule doesnt yet match anything?
|
||||||
|
@ -229,7 +249,7 @@ class watchForm(commonSettingsForm):
|
||||||
|
|
||||||
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
|
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
|
||||||
[validators.Optional(), validators.NumberRange(min=1)])
|
[validators.Optional(), validators.NumberRange(min=1)])
|
||||||
css_filter = StringField('CSS/JSON Filter', [ValidateCSSJSONInput()])
|
css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()])
|
||||||
title = StringField('Title')
|
title = StringField('Title')
|
||||||
|
|
||||||
ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||||
|
|
|
@ -16,6 +16,21 @@ def css_filter(css_filter, html_content):
|
||||||
|
|
||||||
return html_block + "\n"
|
return html_block + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
# Return str Utf-8 of matched rules
|
||||||
|
def xpath_filter(xpath_filter, html_content):
|
||||||
|
from lxml import html
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
tree = html.fromstring(html_content)
|
||||||
|
html_block = ""
|
||||||
|
|
||||||
|
for item in tree.xpath(xpath_filter.strip()):
|
||||||
|
html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>"
|
||||||
|
|
||||||
|
return html_block
|
||||||
|
|
||||||
|
|
||||||
# Extract/find element
|
# Extract/find element
|
||||||
def extract_element(find='title', html_content=''):
|
def extract_element(find='title', html_content=''):
|
||||||
|
|
||||||
|
|
|
@ -95,8 +95,10 @@ User-Agent: wonderbra 1.0") }}
|
||||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a
|
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a
|
||||||
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
||||||
|
<li>XPATH - Limit text to this XPath rule, simply start with a forward-slash, example <b>//*[contains(@class, 'sametext')]</b>, <a
|
||||||
|
href="http://xpather.com/" target="new">test your XPath here</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
Please be sure that you thoroughly understand how to write CSS or JSONPath selector rules before filing an issue on GitHub! <a
|
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
|
||||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from . util import live_server_setup
|
||||||
|
|
||||||
|
from ..html_tools import *
|
||||||
|
|
||||||
|
def test_setup(live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
def set_original_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text</br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
</br>
|
||||||
|
So let's see what happens. </br>
|
||||||
|
<div class="sametext">Some text thats the same</div>
|
||||||
|
<div class="changetext">Some text that will change</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def set_modified_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text</br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
</br>
|
||||||
|
So let's see what happens. THIS CHANGES AND SHOULDNT TRIGGER A CHANGE</br>
|
||||||
|
<div class="sametext">Some text thats the same</div>
|
||||||
|
<div class="changetext">Some new text</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def test_check_markup_xpath_filter_restriction(client, live_server):
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
xpath_filter = "//*[contains(@class, 'sametext')]"
|
||||||
|
|
||||||
|
set_original_response()
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# Goto the edit page, add our ignore text
|
||||||
|
# Add our URL to the import page
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# view it/reset state back to viewed
|
||||||
|
client.get(url_for("diff_history_page", uuid="first"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Make a change
|
||||||
|
set_modified_response()
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
def test_xpath_validation(client, live_server):
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"is not a valid XPath expression" in res.data
|
|
@ -26,8 +26,11 @@ paho-mqtt
|
||||||
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
|
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
|
||||||
cryptography ~= 3.4
|
cryptography ~= 3.4
|
||||||
|
|
||||||
# Used for CSS filtering, replace with soupsieve and lxml for xpath
|
# Used for CSS filtering
|
||||||
bs4
|
bs4
|
||||||
|
|
||||||
|
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
|
||||||
|
lxml
|
||||||
|
|
||||||
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
||||||
selenium ~= 4.1.0
|
selenium ~= 4.1.0
|
||||||
|
|
Ładowanie…
Reference in New Issue