kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Bug fix for newer lxml module - module 'lxml.etree' has no attribute '_ElementStringResult' - reimplement _ElementStringResult (#2313 #2312)
rodzic
d4dac23ba1
commit
74707909f1
|
@ -169,15 +169,13 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
|
||||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||||
# (This way each 'match' reliably has a new-line in the diff)
|
# (This way each 'match' reliably has a new-line in the diff)
|
||||||
# Divs are converted to 4 whitespaces by inscriptis
|
# Divs are converted to 4 whitespaces by inscriptis
|
||||||
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||||
|
|
||||||
if type(element) == etree._ElementStringResult:
|
if isinstance(element, str):
|
||||||
html_block += str(element)
|
html_block += element
|
||||||
elif type(element) == etree._ElementUnicodeResult:
|
|
||||||
html_block += str(element)
|
|
||||||
else:
|
else:
|
||||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
html_block += etree.tostring(element, pretty_print=True, encoding='utf-8')
|
||||||
|
|
||||||
return html_block
|
return html_block
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/python3
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
|
@ -255,6 +255,69 @@ def test_xpath23_prefix_validation(client, live_server):
|
||||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
assert b'Deleted' in res.data
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_xpath1_lxml(client, live_server):
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
|
||||||
|
d = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>rpilocator.com</title>
|
||||||
|
<link>https://rpilocator.com</link>
|
||||||
|
<description>Find Raspberry Pi Computers in Stock</description>
|
||||||
|
<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>
|
||||||
|
<image>
|
||||||
|
<url>https://rpilocator.com/favicon.png</url>
|
||||||
|
<title>rpilocator.com</title>
|
||||||
|
<link>https://rpilocator.com/</link>
|
||||||
|
<width>32</width>
|
||||||
|
<height>32</height>
|
||||||
|
</image>
|
||||||
|
<item>
|
||||||
|
<title>Stock Alert (UK): RPi CM4</title>
|
||||||
|
<foo>something else unrelated</foo>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>Stock Alert (UK): Big monitorěěěě</title>
|
||||||
|
<foo>something else unrelated</foo>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>'''.encode('utf-8')
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "wb") as f:
|
||||||
|
f.write(d)
|
||||||
|
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"include_filters": "xpath1://title/text()", "url": test_url, "tags": "", "headers": "",
|
||||||
|
'fetch_backend': "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
##### #2312
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'_ElementStringResult' not in res.data # tested with 5.1.1 when it was removed and 5.1.0
|
||||||
|
assert b'Exception' not in res.data
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"rpilocator.com" in res.data # in selector
|
||||||
|
assert "Stock Alert (UK): Big monitorěěěě".encode('utf-8') in res.data # not in selector
|
||||||
|
|
||||||
|
#####
|
||||||
|
|
||||||
|
|
||||||
def test_xpath1_validation(client, live_server):
|
def test_xpath1_validation(client, live_server):
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
|
|
|
@ -52,6 +52,7 @@ cryptography~=3.4
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
|
|
||||||
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
|
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
|
||||||
|
# #2312 - In 5.1.1 _ElementStringResult was removed - ImportError: cannot import name '_ElementStringResult' from 'lxml.etree'
|
||||||
lxml
|
lxml
|
||||||
|
|
||||||
# XPath 2.0-3.1 support - 4.2.0 broke something?
|
# XPath 2.0-3.1 support - 4.2.0 broke something?
|
||||||
|
|
Ładowanie…
Reference in New Issue