kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Restock monitor - Only try to process restock information (like scraping for "out of stock" keywords) if the page was actually rendered correctly. (#2645)
rodzic
e830fb2320
commit
e173954cdd
|
@ -158,6 +158,20 @@ class perform_site_check(difference_detection_processor):
|
||||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||||
|
|
||||||
|
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||||
|
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||||
|
from ...html_tools import html_to_text
|
||||||
|
text = html_to_text(self.fetcher.content)
|
||||||
|
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||||
|
if not len(text):
|
||||||
|
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||||
|
raise ReplyWithContentButNoText(url=watch.link,
|
||||||
|
status_code=self.fetcher.get_last_status_code(),
|
||||||
|
screenshot=self.fetcher.screenshot,
|
||||||
|
html_content=self.fetcher.content,
|
||||||
|
xpath_data=self.fetcher.xpath_data
|
||||||
|
)
|
||||||
|
|
||||||
# Which restock settings to compare against?
|
# Which restock settings to compare against?
|
||||||
restock_settings = watch.get('restock_settings', {})
|
restock_settings = watch.get('restock_settings', {})
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue