kopia lustrzana https://github.com/dgtlmoon/changedetection.io
64 wiersze
2.9 KiB
Python
64 wiersze
2.9 KiB
Python
import sys
|
||
from changedetectionio.strtobool import strtobool
|
||
from loguru import logger
|
||
from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException
|
||
import os
|
||
|
||
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
||
|
||
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
|
||
SCREENSHOT_DEFAULT_QUALITY = 40
|
||
|
||
# Maximum total height for the final image (When in stitch mode).
|
||
# We limit this to 16000px due to the huge amount of RAM that was being used
|
||
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
|
||
SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||
|
||
# The size at which we will switch to stitching method, when below this (and
|
||
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
||
# screenshot method.
|
||
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
||
|
||
# available_fetchers() will scan this implementation looking for anything starting with html_
|
||
# this information is used in the form selections
|
||
from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
||
|
||
|
||
import importlib.resources
|
||
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||
|
||
|
||
def available_fetchers():
|
||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||
import inspect
|
||
p = []
|
||
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
|
||
if inspect.isclass(obj):
|
||
# @todo html_ is maybe better as fetcher_ or something
|
||
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
|
||
if name.startswith('html_'):
|
||
t = tuple([name, obj.fetcher_description])
|
||
p.append(t)
|
||
|
||
return p
|
||
|
||
|
||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
|
||
# rather than site-specific.
|
||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||
if use_playwright_as_chrome_fetcher:
|
||
# @note - For now, browser steps always uses playwright
|
||
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
|
||
logger.debug('Using Playwright library as fetcher')
|
||
from .playwright import fetcher as html_webdriver
|
||
else:
|
||
logger.debug('Using direct Python Puppeteer library as fetcher')
|
||
from .puppeteer import fetcher as html_webdriver
|
||
|
||
else:
|
||
logger.debug("Falling back to selenium as fetcher")
|
||
from .webdriver_selenium import fetcher as html_webdriver
|
||
|