kopia lustrzana https://github.com/dgtlmoon/changedetection.io
* Re #117 - Experimental JSON selector support by using 'json:' prefix and any JSONpath rulepull/128/head
rodzic
f2643c1b65
commit
e073521f4d
|
@ -88,12 +88,27 @@ class perform_site_check():
|
|||
|
||||
html = r.text
|
||||
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
is_html = True
|
||||
css_filter_rule = self.datastore.data['watching'][uuid]['css_filter']
|
||||
if css_filter_rule and len(css_filter_rule.strip()):
|
||||
html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
|
||||
if 'json:' in css_filter_rule:
|
||||
# POC hack, @todo rename vars, see how it fits in with the javascript version
|
||||
import json
|
||||
from jsonpath_ng import jsonpath, parse
|
||||
|
||||
stripped_text_from_html = get_text(html)
|
||||
json_data = json.loads(html)
|
||||
jsonpath_expression = parse(css_filter_rule.replace('json:',''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
stripped_text_from_html = json.dumps(match[0].value, indent=4)
|
||||
|
||||
is_html = False
|
||||
|
||||
else:
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
|
||||
|
||||
if is_html:
|
||||
stripped_text_from_html = get_text(html)
|
||||
|
||||
# Usually from networkIO/requests level
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
|
||||
|
|
|
@ -82,7 +82,7 @@ class StringDictKeyValue(StringField):
|
|||
else:
|
||||
self.data = {}
|
||||
|
||||
class ListRegex(object):
|
||||
class ValidateListRegex(object):
|
||||
"""
|
||||
Validates that anything that looks like a regex passes as a regex
|
||||
"""
|
||||
|
@ -102,6 +102,28 @@ class ListRegex(object):
|
|||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||
raise ValidationError(message % (line))
|
||||
|
||||
class ValidateCSSJSONInput(object):
|
||||
"""
|
||||
Filter validation
|
||||
@todo CSS validator ;)
|
||||
"""
|
||||
|
||||
def __init__(self, message=None):
|
||||
self.message = message
|
||||
|
||||
def __call__(self, form, field):
|
||||
if 'json:' in field.data:
|
||||
from jsonpath_ng.exceptions import JsonPathParserError
|
||||
from jsonpath_ng import jsonpath, parse
|
||||
|
||||
input = field.data.replace('json:', '')
|
||||
|
||||
try:
|
||||
parse(input)
|
||||
except JsonPathParserError as e:
|
||||
message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
|
||||
raise ValidationError(message % (input, str(e)))
|
||||
|
||||
|
||||
class watchForm(Form):
|
||||
# https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
|
||||
|
@ -111,10 +133,10 @@ class watchForm(Form):
|
|||
tag = StringField('Tag', [validators.Optional(), validators.Length(max=35)])
|
||||
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
|
||||
[validators.Optional(), validators.NumberRange(min=1)])
|
||||
css_filter = StringField('CSS Filter')
|
||||
css_filter = StringField('CSS/JSON Filter', [ValidateCSSJSONInput()])
|
||||
title = StringField('Title')
|
||||
|
||||
ignore_text = StringListField('Ignore Text', [ListRegex()])
|
||||
ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
notification_urls = StringListField('Notification URL List')
|
||||
headers = StringDictKeyValue('Request Headers')
|
||||
trigger_check = BooleanField('Send test notification on save')
|
||||
|
|
|
@ -23,9 +23,12 @@
|
|||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.css_filter, size=25, placeholder=".class-name or #some-id, or other CSS selector rule.") }}
|
||||
<span class="pure-form-message-inline">Limit text to this CSS rule, only text matching this CSS rule is included.<br/>
|
||||
Please be sure that you thoroughly understand how to write CSS selector rules before filing an issue on GitHub!<br/>
|
||||
Go <a href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS or JSONPath selector rules before filing an issue on GitHub! <a href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
||||
</span>
|
||||
</div>
|
||||
<!-- @todo: move to tabs --->
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """
|
||||
{
|
||||
"employees": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Pankaj",
|
||||
"salary": "10000"
|
||||
},
|
||||
{
|
||||
"name": "David",
|
||||
"salary": "5000",
|
||||
"id": 2
|
||||
}
|
||||
],
|
||||
"boss": {
|
||||
"name": "Fat guy"
|
||||
}
|
||||
}
|
||||
"""
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
def set_modified_response():
|
||||
test_return_data = """
|
||||
{
|
||||
"employees": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Pankaj",
|
||||
"salary": "10000"
|
||||
},
|
||||
{
|
||||
"name": "David",
|
||||
"salary": "5000",
|
||||
"id": 2
|
||||
}
|
||||
],
|
||||
"boss": {
|
||||
"name": "Foobar"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
with open("test-datastore/output.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def test_check_json_filter(client, live_server):
|
||||
|
||||
json_filter = 'json:boss.name'
|
||||
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": json_filter, "url": test_url, "tag": "", "headers": ""},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert bytes(json_filter.encode('utf-8')) in res.data
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
# Make a change
|
||||
set_modified_response()
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
|
||||
# It should have 'unviewed' still
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
# Should not see this, because its not in the JSONPath we entered
|
||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
||||
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
|
||||
assert b'Foobar' in res.data
|
|
@ -12,7 +12,7 @@ flask-login ~= 0.5
|
|||
pytz
|
||||
urllib3
|
||||
wtforms ~= 2.3.3
|
||||
|
||||
jsonpath-ng ~= 1.5.3
|
||||
|
||||
|
||||
# Notification library
|
||||
|
|
Ładowanie…
Reference in New Issue