kopia lustrzana https://github.com/dgtlmoon/changedetection.io
				
				
				
			* Re #117 - Experimental JSON selector support by using 'json:' prefix and any JSONpath rulepull/128/head
							rodzic
							
								
									f2643c1b65
								
							
						
					
					
						commit
						e073521f4d
					
				| 
						 | 
					@ -88,12 +88,27 @@ class perform_site_check():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            html = r.text
 | 
					            html = r.text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
 | 
					            is_html = True
 | 
				
			||||||
            css_filter_rule = self.datastore.data['watching'][uuid]['css_filter']
 | 
					            css_filter_rule = self.datastore.data['watching'][uuid]['css_filter']
 | 
				
			||||||
            if css_filter_rule and len(css_filter_rule.strip()):
 | 
					            if css_filter_rule and len(css_filter_rule.strip()):
 | 
				
			||||||
                html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
 | 
					                if 'json:' in css_filter_rule:
 | 
				
			||||||
 | 
					                    # POC hack, @todo rename vars, see how it fits in with the javascript version
 | 
				
			||||||
 | 
					                    import json
 | 
				
			||||||
 | 
					                    from jsonpath_ng import jsonpath, parse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            stripped_text_from_html = get_text(html)
 | 
					                    json_data = json.loads(html)
 | 
				
			||||||
 | 
					                    jsonpath_expression = parse(css_filter_rule.replace('json:',''))
 | 
				
			||||||
 | 
					                    match = jsonpath_expression.find(json_data)
 | 
				
			||||||
 | 
					                    stripped_text_from_html = json.dumps(match[0].value, indent=4)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    is_html = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
 | 
				
			||||||
 | 
					                    html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if is_html:
 | 
				
			||||||
 | 
					                stripped_text_from_html = get_text(html)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Usually from networkIO/requests level
 | 
					        # Usually from networkIO/requests level
 | 
				
			||||||
        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
 | 
					        except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -82,7 +82,7 @@ class StringDictKeyValue(StringField):
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.data = {}
 | 
					            self.data = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ListRegex(object):
 | 
					class ValidateListRegex(object):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Validates that anything that looks like a regex passes as a regex
 | 
					    Validates that anything that looks like a regex passes as a regex
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -102,6 +102,28 @@ class ListRegex(object):
 | 
				
			||||||
                    message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
 | 
					                    message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
 | 
				
			||||||
                    raise ValidationError(message % (line))
 | 
					                    raise ValidationError(message % (line))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ValidateCSSJSONInput(object):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Filter validation
 | 
				
			||||||
 | 
					    @todo CSS validator ;)
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, message=None):
 | 
				
			||||||
 | 
					        self.message = message
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __call__(self, form, field):
 | 
				
			||||||
 | 
					        if 'json:' in field.data:
 | 
				
			||||||
 | 
					            from jsonpath_ng.exceptions import JsonPathParserError
 | 
				
			||||||
 | 
					            from jsonpath_ng import jsonpath, parse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            input = field.data.replace('json:', '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                parse(input)
 | 
				
			||||||
 | 
					            except JsonPathParserError as e:
 | 
				
			||||||
 | 
					                message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
 | 
				
			||||||
 | 
					                raise ValidationError(message % (input, str(e)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class watchForm(Form):
 | 
					class watchForm(Form):
 | 
				
			||||||
    # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
 | 
					    # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
 | 
				
			||||||
| 
						 | 
					@ -111,10 +133,10 @@ class watchForm(Form):
 | 
				
			||||||
    tag = StringField('Tag', [validators.Optional(), validators.Length(max=35)])
 | 
					    tag = StringField('Tag', [validators.Optional(), validators.Length(max=35)])
 | 
				
			||||||
    minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
 | 
					    minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
 | 
				
			||||||
                                               [validators.Optional(), validators.NumberRange(min=1)])
 | 
					                                               [validators.Optional(), validators.NumberRange(min=1)])
 | 
				
			||||||
    css_filter = StringField('CSS Filter')
 | 
					    css_filter = StringField('CSS/JSON Filter', [ValidateCSSJSONInput()])
 | 
				
			||||||
    title = StringField('Title')
 | 
					    title = StringField('Title')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ignore_text = StringListField('Ignore Text', [ListRegex()])
 | 
					    ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
 | 
				
			||||||
    notification_urls = StringListField('Notification URL List')
 | 
					    notification_urls = StringListField('Notification URL List')
 | 
				
			||||||
    headers = StringDictKeyValue('Request Headers')
 | 
					    headers = StringDictKeyValue('Request Headers')
 | 
				
			||||||
    trigger_check = BooleanField('Send test notification on save')
 | 
					    trigger_check = BooleanField('Send test notification on save')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -23,9 +23,12 @@
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div class="pure-control-group">
 | 
					            <div class="pure-control-group">
 | 
				
			||||||
                {{ render_field(form.css_filter, size=25, placeholder=".class-name or #some-id, or other CSS selector rule.") }}
 | 
					                {{ render_field(form.css_filter, size=25, placeholder=".class-name or #some-id, or other CSS selector rule.") }}
 | 
				
			||||||
                <span class="pure-form-message-inline">Limit text to this CSS rule, only text matching this CSS rule is included.<br/>
 | 
					                <span class="pure-form-message-inline">
 | 
				
			||||||
                    Please be sure that you thoroughly understand how to write CSS selector rules before filing an issue on GitHub!<br/>
 | 
					                    <ul>
 | 
				
			||||||
                    Go <a href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>
 | 
					                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
 | 
				
			||||||
 | 
					                        <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
 | 
				
			||||||
 | 
					                    </ul>
 | 
				
			||||||
 | 
					                    Please be sure that you thoroughly understand how to write CSS or JSONPath selector rules before filing an issue on GitHub! <a href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
 | 
				
			||||||
                </span>
 | 
					                </span>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <!-- @todo: move to tabs --->
 | 
					            <!-- @todo: move to tabs --->
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,121 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					from flask import url_for
 | 
				
			||||||
 | 
					from . util import live_server_setup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_setup(live_server):
 | 
				
			||||||
 | 
					    live_server_setup(live_server)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def set_original_response():
 | 
				
			||||||
 | 
					    test_return_data = """
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "employees": [
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					          "id": 1,
 | 
				
			||||||
 | 
					          "name": "Pankaj",
 | 
				
			||||||
 | 
					          "salary": "10000"
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					          "name": "David",
 | 
				
			||||||
 | 
					          "salary": "5000",
 | 
				
			||||||
 | 
					          "id": 2
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      ],
 | 
				
			||||||
 | 
					      "boss": {
 | 
				
			||||||
 | 
					        "name": "Fat guy"
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    with open("test-datastore/output.txt", "w") as f:
 | 
				
			||||||
 | 
					        f.write(test_return_data)
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def set_modified_response():
 | 
				
			||||||
 | 
					    test_return_data = """
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "employees": [
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					          "id": 1,
 | 
				
			||||||
 | 
					          "name": "Pankaj",
 | 
				
			||||||
 | 
					          "salary": "10000"
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					          "name": "David",
 | 
				
			||||||
 | 
					          "salary": "5000",
 | 
				
			||||||
 | 
					          "id": 2
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      ],
 | 
				
			||||||
 | 
					      "boss": {
 | 
				
			||||||
 | 
					        "name": "Foobar"
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("test-datastore/output.txt", "w") as f:
 | 
				
			||||||
 | 
					        f.write(test_return_data)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_check_json_filter(client, live_server):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    json_filter = 'json:boss.name'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    set_original_response()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Give the endpoint time to spin up
 | 
				
			||||||
 | 
					    time.sleep(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Add our URL to the import page
 | 
				
			||||||
 | 
					    test_url = url_for('test_endpoint', _external=True)
 | 
				
			||||||
 | 
					    res = client.post(
 | 
				
			||||||
 | 
					        url_for("import_page"),
 | 
				
			||||||
 | 
					        data={"urls": test_url},
 | 
				
			||||||
 | 
					        follow_redirects=True
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    assert b"1 Imported" in res.data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Trigger a check
 | 
				
			||||||
 | 
					    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Give the thread time to pick it up
 | 
				
			||||||
 | 
					    time.sleep(3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Goto the edit page, add our ignore text
 | 
				
			||||||
 | 
					    # Add our URL to the import page
 | 
				
			||||||
 | 
					    res = client.post(
 | 
				
			||||||
 | 
					        url_for("edit_page", uuid="first"),
 | 
				
			||||||
 | 
					        data={"css_filter": json_filter, "url": test_url, "tag": "", "headers": ""},
 | 
				
			||||||
 | 
					        follow_redirects=True
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    assert b"Updated watch." in res.data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Check it saved
 | 
				
			||||||
 | 
					    res = client.get(
 | 
				
			||||||
 | 
					        url_for("edit_page", uuid="first"),
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    assert bytes(json_filter.encode('utf-8')) in res.data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Trigger a check
 | 
				
			||||||
 | 
					    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Give the thread time to pick it up
 | 
				
			||||||
 | 
					    time.sleep(3)
 | 
				
			||||||
 | 
					    #  Make a change
 | 
				
			||||||
 | 
					    set_modified_response()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Trigger a check
 | 
				
			||||||
 | 
					    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
				
			||||||
 | 
					    # Give the thread time to pick it up
 | 
				
			||||||
 | 
					    time.sleep(3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # It should have 'unviewed' still
 | 
				
			||||||
 | 
					    res = client.get(url_for("index"))
 | 
				
			||||||
 | 
					    assert b'unviewed' in res.data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Should not see this, because its not in the JSONPath we entered
 | 
				
			||||||
 | 
					    res = client.get(url_for("diff_history_page", uuid="first"))
 | 
				
			||||||
 | 
					    # But the change should be there, tho its hard to test the change was detected because it will show old and new versions
 | 
				
			||||||
 | 
					    assert b'Foobar' in res.data
 | 
				
			||||||
| 
						 | 
					@ -12,7 +12,7 @@ flask-login ~= 0.5
 | 
				
			||||||
pytz
 | 
					pytz
 | 
				
			||||||
urllib3
 | 
					urllib3
 | 
				
			||||||
wtforms ~= 2.3.3
 | 
					wtforms ~= 2.3.3
 | 
				
			||||||
 | 
					jsonpath-ng ~= 1.5.3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Notification library
 | 
					# Notification library
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Ładowanie…
	
		Reference in New Issue