kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Filters - Implement jqraw: filter (use this to output nicer JSON format when selecting/filtering by JSON) (#2430)
rodzic
d31fc860cc
commit
ffd160ce0e
|
@ -63,7 +63,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
|||
<ul>
|
||||
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||
{% if jq_support %}
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
|
||||
{% else %}
|
||||
<li>jq support not installed</li>
|
||||
{% endif %}
|
||||
|
|
|
@ -3,8 +3,6 @@ from bs4 import BeautifulSoup
|
|||
from inscriptis import get_text
|
||||
from jsonpath_ng.ext import parse
|
||||
from typing import List
|
||||
from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
|
||||
from inscriptis.html_properties import Display
|
||||
from inscriptis.model.config import ParserConfig
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
import json
|
||||
|
@ -196,12 +194,12 @@ def extract_element(find='title', html_content=''):
|
|||
|
||||
#
|
||||
def _parse_json(json_data, json_filter):
|
||||
if 'json:' in json_filter:
|
||||
if json_filter.startswith("json:"):
|
||||
jsonpath_expression = parse(json_filter.replace('json:', ''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
if 'jq:' in json_filter:
|
||||
if json_filter.startswith("jq:") or json_filter.startswith("jqraw:"):
|
||||
|
||||
try:
|
||||
import jq
|
||||
|
@ -209,10 +207,15 @@ def _parse_json(json_data, json_filter):
|
|||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise Exception("jq not support not found")
|
||||
|
||||
jq_expression = jq.compile(json_filter.replace('jq:', ''))
|
||||
match = jq_expression.input(json_data).all()
|
||||
if json_filter.startswith("jq:"):
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
if json_filter.startswith("jqraw:"):
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return '\n'.join(str(item) for item in match)
|
||||
|
||||
def _get_stripped_text_from_json_match(match):
|
||||
s = []
|
||||
|
|
|
@ -18,7 +18,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
description = 'Detects all text changes where possible'
|
||||
json_filter_prefixes = ['json:', 'jq:']
|
||||
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg):
|
||||
|
|
|
@ -292,7 +292,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
|||
<ul>
|
||||
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||
{% if jq_support %}
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
|
||||
{% else %}
|
||||
<li>jq support not installed</li>
|
||||
{% endif %}
|
||||
|
|
|
@ -41,19 +41,26 @@ and it can also be repeated
|
|||
from .. import html_tools
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(content, "json:$.offers.price")
|
||||
assert text == "23.5"
|
||||
text = html_tools.extract_json_as_string(content, "json:$.offers.priceCurrency")
|
||||
assert text == '"AUD"'
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
|
||||
assert text == "5"
|
||||
|
||||
# also check for jq
|
||||
if jq_support:
|
||||
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
|
||||
assert text == "23.5"
|
||||
text = html_tools.extract_json_as_string(content, "jq:.offers.priceCurrency")
|
||||
assert text == '"AUD"'
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
|
||||
assert text == "5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
|
||||
assert text == "5"
|
||||
text = html_tools.extract_json_as_string(content, "jqraw:.offers.priceCurrency")
|
||||
assert text == "AUD"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "jqraw:.id")
|
||||
assert text == "5"
|
||||
|
||||
|
||||
# When nothing at all is found, it should throw JSONNOTFound
|
||||
# Which is caught and shown to the user in the watch-overview table
|
||||
|
@ -64,6 +71,9 @@ and it can also be repeated
|
|||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jqraw:.id")
|
||||
|
||||
|
||||
def test_unittest_inline_extract_body():
|
||||
content = """
|
||||
|
@ -291,6 +301,10 @@ def test_check_jq_filter(client, live_server):
|
|||
if jq_support:
|
||||
check_json_filter('jq:.boss.name', client, live_server)
|
||||
|
||||
def test_check_jqraw_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter('jqraw:.boss.name', client, live_server)
|
||||
|
||||
def check_json_filter_bool_val(json_filter, client, live_server):
|
||||
set_original_response()
|
||||
|
||||
|
@ -345,6 +359,10 @@ def test_check_jq_filter_bool_val(client, live_server):
|
|||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server)
|
||||
|
||||
def test_check_jqraw_filter_bool_val(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server)
|
||||
|
||||
# Re #265 - Extended JSON selector test
|
||||
# Stuff to consider here
|
||||
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
|
||||
|
@ -491,4 +509,8 @@ def test_check_jsonpath_ext_filter(client, live_server):
|
|||
|
||||
def test_check_jq_ext_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
|
||||
def test_check_jqraw_ext_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
|
|
Ładowanie…
Reference in New Issue