From f7f98945a2225d051e724d6664684e8c12aee9a4 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 13 Nov 2023 21:23:43 +0100 Subject: [PATCH] Visual Selector - xPath handling misc fixes (#1976) --- changedetectionio/forms.py | 2 +- .../res/xpath_element_scraper.js | 22 +++++++++++++++---- .../static/js/visual-selector.js | 2 +- changedetectionio/tests/test_extract_regex.py | 3 --- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index c640b218..d8646305 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -351,7 +351,7 @@ class ValidateCSSJSONXPATHInput(object): raise ValidationError("XPath not permitted in this field!") from lxml import etree, html tree = html.fromstring("") - line = line.replace('xpath1:', '') + line = re.sub(r'^xpath1:', '', line) try: tree.xpath(line.strip()) diff --git a/changedetectionio/res/xpath_element_scraper.js b/changedetectionio/res/xpath_element_scraper.js index db927ed6..efe593d0 100644 --- a/changedetectionio/res/xpath_element_scraper.js +++ b/changedetectionio/res/xpath_element_scraper.js @@ -170,9 +170,12 @@ if (include_filters.length) { try { // is it xpath? - if (f.startsWith('/') || f.startsWith('xpath:')) { - q = document.evaluate(f.replace('xpath:', ''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; + if (f.startsWith('/') || f.startsWith('xpath')) { + var qry_f = f.replace(/xpath(:|\d:)/, '') + console.log("[xpath] Scanning for included filter " + qry_f) + q = document.evaluate(qry_f, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; } else { + console.log("[css] Scanning for included filter " + f) q = document.querySelector(f); } } catch (e) { @@ -182,8 +185,18 @@ if (include_filters.length) { } if (q) { + // Try to resolve //something/text() back to its /something so we can atleast get the bounding box + try { + if (typeof q.nodeName == 'string' && q.nodeName === '#text') { + q = q.parentElement + } + } catch (e) { + console.log(e) + console.log("xpath_element_scraper: #text resolver") + } + // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. - if (q.hasOwnProperty('getBoundingClientRect')) { + if (typeof q.getBoundingClientRect == 'function') { bbox = q.getBoundingClientRect(); console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) } else { @@ -192,7 +205,8 @@ if (include_filters.length) { bbox = q.ownerElement.getBoundingClientRect(); console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) } catch (e) { - console.log("xpath_element_scraper: error looking up ownerElement") + console.log(e) + console.log("xpath_element_scraper: error looking up q.ownerElement") } } } diff --git a/changedetectionio/static/js/visual-selector.js b/changedetectionio/static/js/visual-selector.js index 32faaa04..9432ae9f 100644 --- a/changedetectionio/static/js/visual-selector.js +++ b/changedetectionio/static/js/visual-selector.js @@ -149,7 +149,7 @@ $(document).ready(function () { // @todo In the future paint all that match for (const c of current_default_xpath) { for (var i = selector_data['size_pos'].length; i !== 0; i--) { - if (selector_data['size_pos'][i - 1].xpath === c) { + if (selector_data['size_pos'][i - 1].xpath.trim() === c.trim()) { console.log("highlighting " + c); current_selected_i = i - 1; highlight_current_selected_i(); diff --git a/changedetectionio/tests/test_extract_regex.py b/changedetectionio/tests/test_extract_regex.py index 7ff8f3a7..45a84800 100644 --- a/changedetectionio/tests/test_extract_regex.py +++ b/changedetectionio/tests/test_extract_regex.py @@ -227,9 +227,6 @@ def test_regex_error_handling(client, live_server): follow_redirects=True ) - with open('/tmp/fuck.html', 'wb') as f: - f.write(res.data) - assert b'is not a valid regular expression.' in res.data res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)