kopia lustrzana https://github.com/dgtlmoon/changedetection.io
VisualSelector & BrowserSteps - Scraper improvements, remove duplicate code
rodzic
326b7aacbb
commit
69756f20f2
|
|
@ -257,12 +257,10 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||
self.page.evaluate("var include_filters=''")
|
||||
from pkg_resources import resource_string
|
||||
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
|
||||
# @todo dont duplicate these selectors, or just let them both use the same data?
|
||||
xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
|
||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%',
|
||||
'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section')
|
||||
from changedetectionio.content_fetcher import visualselector_xpath_selectors
|
||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
|
||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
|
||||
|
||||
return (screenshot, xpath_data)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import requests
|
|||
import sys
|
||||
import time
|
||||
|
||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
|
||||
|
||||
class Non200ErrorCodeReceived(Exception):
|
||||
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
||||
# Set this so we can use it in other parts of the app
|
||||
|
|
@ -367,7 +369,7 @@ class base_html_playwright(Fetcher):
|
|||
else:
|
||||
self.page.evaluate("var include_filters=''")
|
||||
|
||||
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary') + "}")
|
||||
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ for (var i = 0; i < elements.length; i++) {
|
|||
left: Math.floor(bbox['left']),
|
||||
top: Math.floor(bbox['top']),
|
||||
tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '',
|
||||
tagtype: (elements[i].type) ? elements[i].type.toLowerCase() : ''
|
||||
tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : ''
|
||||
});
|
||||
|
||||
}
|
||||
|
|
|
|||
Ładowanie…
Reference in New Issue