kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Bug fix for newer lxml module - module 'lxml.etree' has no attribute '_ElementStringResult' - reimplement _ElementStringResult (#2313 #2312)
rodzic
d4dac23ba1
commit
74707909f1
|
@ -169,15 +169,13 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
|
|||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
if type(element) == etree._ElementStringResult:
|
||||
html_block += str(element)
|
||||
elif type(element) == etree._ElementUnicodeResult:
|
||||
html_block += str(element)
|
||||
if isinstance(element, str):
|
||||
html_block += element
|
||||
else:
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
||||
html_block += etree.tostring(element, pretty_print=True, encoding='utf-8')
|
||||
|
||||
return html_block
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
|
@ -255,6 +255,69 @@ def test_xpath23_prefix_validation(client, live_server):
|
|||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_xpath1_lxml(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
d = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<title>rpilocator.com</title>
|
||||
<link>https://rpilocator.com</link>
|
||||
<description>Find Raspberry Pi Computers in Stock</description>
|
||||
<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>
|
||||
<image>
|
||||
<url>https://rpilocator.com/favicon.png</url>
|
||||
<title>rpilocator.com</title>
|
||||
<link>https://rpilocator.com/</link>
|
||||
<width>32</width>
|
||||
<height>32</height>
|
||||
</image>
|
||||
<item>
|
||||
<title>Stock Alert (UK): RPi CM4</title>
|
||||
<foo>something else unrelated</foo>
|
||||
</item>
|
||||
<item>
|
||||
<title>Stock Alert (UK): Big monitorěěěě</title>
|
||||
<foo>something else unrelated</foo>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>'''.encode('utf-8')
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "wb") as f:
|
||||
f.write(d)
|
||||
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"include_filters": "xpath1://title/text()", "url": test_url, "tags": "", "headers": "",
|
||||
'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
##### #2312
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'_ElementStringResult' not in res.data # tested with 5.1.1 when it was removed and 5.1.0
|
||||
assert b'Exception' not in res.data
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"rpilocator.com" in res.data # in selector
|
||||
assert "Stock Alert (UK): Big monitorěěěě".encode('utf-8') in res.data # not in selector
|
||||
|
||||
#####
|
||||
|
||||
|
||||
def test_xpath1_validation(client, live_server):
|
||||
# Add our URL to the import page
|
||||
|
|
|
@ -52,6 +52,7 @@ cryptography~=3.4
|
|||
beautifulsoup4
|
||||
|
||||
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
|
||||
# #2312 - In 5.1.1 _ElementStringResult was removed - ImportError: cannot import name '_ElementStringResult' from 'lxml.etree'
|
||||
lxml
|
||||
|
||||
# XPath 2.0-3.1 support - 4.2.0 broke something?
|
||||
|
|
Ładowanie…
Reference in New Issue