kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Re #2945 - Handle/Strip UTF-8 ByteOrderMark in JSON strings correctly (fixes `"Exception: No parsable JSON found in this document" ` error) (#2947)
rodzic
dbd4adf23a
commit
55da48f719
|
@ -1,5 +1,6 @@
|
|||
from typing import List
|
||||
from loguru import logger
|
||||
from lxml import etree
|
||||
from typing import List
|
||||
import json
|
||||
import re
|
||||
|
||||
|
@ -298,8 +299,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
try:
|
||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||
except json.JSONDecodeError:
|
||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
|
|
|
@ -514,3 +514,15 @@ def test_check_jq_ext_filter(client, live_server, measure_memory_usage):
|
|||
def test_check_jqraw_ext_filter(client, live_server, measure_memory_usage):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
|
||||
def test_jsonpath_BOM_utf8(client, live_server, measure_memory_usage):
|
||||
from .. import html_tools
|
||||
|
||||
# JSON string with BOM and correct double-quoted keys
|
||||
json_str = '\ufeff{"name": "José", "emoji": "😊", "language": "中文", "greeting": "Привет"}'
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(json_str, "json:$.name")
|
||||
assert text == '"José"'
|
||||
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue