kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Restock & Price monitor - Huge refactor, set upper and lower price alert limits, set % change, follow the prices and restock amounts directly in the watch-overview list
rodzic
99b0935b42
commit
cffb6d748c
|
|
@ -93,7 +93,7 @@ jobs:
|
|||
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
||||
run: |
|
||||
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
|
||||
|
||||
- name: Playwright and SocketPuppetBrowser - Restock detection
|
||||
run: |
|
||||
|
|
@ -231,9 +231,9 @@ jobs:
|
|||
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
|
||||
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
|
||||
|
||||
- name: Store container log
|
||||
- name: Store everything including test-datastore
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
path: .
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@ FROM python:${PYTHON_VERSION}-slim-bookworm
|
|||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libxslt1.1 \
|
||||
# For presenting price amounts correctly in the restock/price detection overview
|
||||
locales \
|
||||
# For pdftohtml
|
||||
poppler-utils \
|
||||
zlib1g \
|
||||
|
|
|
|||
|
|
@ -12,9 +12,10 @@ import copy
|
|||
# See docs/README.md for rebuilding the docs/apidoc information
|
||||
|
||||
from . import api_schema
|
||||
from ..model import watch_base
|
||||
|
||||
# Build a JSON Schema atleast partially based on our Watch model
|
||||
from changedetectionio.model.Watch import base_config as watch_base_config
|
||||
watch_base_config = watch_base()
|
||||
schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||
|
||||
schema_create_watch = copy.deepcopy(schema)
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||
def long_task(uuid, preferred_proxy):
|
||||
import time
|
||||
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
||||
from changedetectionio.processors import text_json_diff
|
||||
from changedetectionio.processors.text_json_diff import text_json_diff
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
status = {'status': '', 'length': 0, 'text': ''}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
|||
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
|
||||
def accept(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
return redirect(url_for("index"))
|
||||
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||
|
||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
|
||||
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
|
||||
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=default,
|
||||
)
|
||||
form.datastore=datastore # needed?
|
||||
|
|
@ -126,7 +126,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||
|
||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
|
||||
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
|
||||
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=default,
|
||||
)
|
||||
# @todo subclass form so validation works
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from loguru import logger
|
||||
|
||||
|
||||
class Non200ErrorCodeReceived(Exception):
|
||||
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
||||
# Set this so we can use it in other parts of the app
|
||||
|
|
@ -81,7 +80,7 @@ class ScreenshotUnavailable(Exception):
|
|||
self.status_code = status_code
|
||||
self.url = url
|
||||
if page_html:
|
||||
from html_tools import html_to_text
|
||||
from changedetectionio.html_tools import html_to_text
|
||||
self.page_text = html_to_text(page_html)
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -1,18 +1,22 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import datetime
|
||||
import flask_login
|
||||
import locale
|
||||
import os
|
||||
import pytz
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import timeago
|
||||
|
||||
from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
|
||||
from .safe_jinja import render as jinja_render
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
from functools import wraps
|
||||
from threading import Event
|
||||
import flask_login
|
||||
import pytz
|
||||
import timeago
|
||||
|
||||
from feedgen.feed import FeedGenerator
|
||||
from flask import (
|
||||
Flask,
|
||||
|
|
@ -79,6 +83,14 @@ csrf = CSRFProtect()
|
|||
csrf.init_app(app)
|
||||
notification_debug_log=[]
|
||||
|
||||
# get locale ready
|
||||
default_locale = locale.getdefaultlocale()
|
||||
logger.info(f"System locale default is {default_locale}")
|
||||
try:
|
||||
locale.setlocale(locale.LC_ALL, default_locale)
|
||||
except locale.Error:
|
||||
logger.warning(f"Unable to set locale {default_locale}, locale is not installed maybe?")
|
||||
|
||||
watch_api = Api(app, decorators=[csrf.exempt])
|
||||
|
||||
def init_app_secret(datastore_path):
|
||||
|
|
@ -108,6 +120,14 @@ def get_darkmode_state():
|
|||
def get_css_version():
|
||||
return __version__
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
|
||||
return formatted_value
|
||||
|
||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
|
|
@ -616,11 +636,11 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
@login_optionally_required
|
||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||
|
||||
def edit_page(uuid):
|
||||
from . import forms
|
||||
from .blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
from . import processors
|
||||
import importlib
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if not datastore.data['watching'].keys():
|
||||
|
|
@ -652,9 +672,30 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
# Radio needs '' not None, or incase that the chosen one no longer exists
|
||||
if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
|
||||
default['proxy'] = ''
|
||||
|
||||
# proxy_override set to the json/text list of the items
|
||||
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
|
||||
|
||||
# Does it use some custom form? does one exist?
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in find_processors() if tpl[1] == processor_name), None)
|
||||
if not processor_classes:
|
||||
flash(f"Cannot load the edit form for processor/plugin '{processor_classes[1]}', plugin missing?", 'error')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
parent_module = get_parent_module(processor_classes[0])
|
||||
|
||||
try:
|
||||
# Get the parent of the "processor.py" go up one, get the form (kinda spaghetti but its reusing existing code)
|
||||
forms_module = importlib.import_module(f"{parent_module.__name__}.forms")
|
||||
# Access the 'processor_settings_form' class from the 'forms' module
|
||||
form_class = getattr(forms_module, 'processor_settings_form')
|
||||
except ModuleNotFoundError as e:
|
||||
# .forms didnt exist
|
||||
form_class = forms.processor_text_json_diff_form
|
||||
except AttributeError as e:
|
||||
# .forms exists but no useful form
|
||||
form_class = forms.processor_text_json_diff_form
|
||||
|
||||
form = form_class(formdata=request.form if request.method == 'POST' else None,
|
||||
data=default
|
||||
)
|
||||
|
||||
|
|
@ -679,6 +720,11 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
|
||||
if request.method == 'POST' and form.validate():
|
||||
|
||||
# If they changed processor, it makes sense to reset it.
|
||||
if datastore.data['watching'][uuid].get('processor') != form.data.get('processor'):
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
flash("Reset watch history due to change of processor")
|
||||
|
||||
extra_update_obj = {
|
||||
'consecutive_filter_failures': 0,
|
||||
'last_error' : False
|
||||
|
|
@ -720,10 +766,11 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
datastore.data['watching'][uuid].update(form.data)
|
||||
datastore.data['watching'][uuid].update(extra_update_obj)
|
||||
|
||||
if request.args.get('unpause_on_save'):
|
||||
flash("Updated watch - unpaused!")
|
||||
else:
|
||||
flash("Updated watch.")
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
|
||||
|
||||
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
|
||||
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
|
|
@ -753,6 +800,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
jq_support = False
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
|
|
@ -761,23 +809,41 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
|
||||
# Only works reliably with Playwright
|
||||
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
|
||||
template_args = {
|
||||
'available_processors': processors.available_processors(),
|
||||
'browser_steps_config': browser_step_ui_config,
|
||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
'extra_title': f" - Edit - {watch.label}",
|
||||
'extra_processor_config': form.extra_tab_content(),
|
||||
'form': form,
|
||||
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||
'is_html_webdriver': is_html_webdriver,
|
||||
'jq_support': jq_support,
|
||||
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
'settings_application': datastore.data['settings']['application'],
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'visualselector_enabled': visualselector_enabled,
|
||||
'watch': watch
|
||||
}
|
||||
|
||||
included_content = None
|
||||
if form.extra_form_content():
|
||||
# So that the extra panels can access _helpers.html etc, we set the environment to load from templates/
|
||||
# And then render the code from the module
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
import importlib.resources
|
||||
templates_dir = str(importlib.resources.files("changedetectionio").joinpath('templates'))
|
||||
env = Environment(loader=FileSystemLoader(templates_dir))
|
||||
template = env.from_string(form.extra_form_content())
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
output = render_template("edit.html",
|
||||
available_processors=processors.available_processors(),
|
||||
browser_steps_config=browser_step_ui_config,
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
extra_title=f" - Edit - {watch.label}",
|
||||
form=form,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
has_extra_headers_file=len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||
has_special_tag_options=_watch_has_tag_options_set(watch=watch),
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
jq_support=jq_support,
|
||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
using_global_webdriver_wait=not default['webdriver_delay'],
|
||||
uuid=uuid,
|
||||
visualselector_enabled=visualselector_enabled,
|
||||
watch=watch
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
extra_form_content=included_content,
|
||||
**template_args
|
||||
)
|
||||
|
||||
return output
|
||||
|
|
@ -887,7 +953,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
importer = import_url_list()
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor'))
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
for uuid in importer.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
|
|
@ -1388,7 +1454,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
|
||||
i += 1
|
||||
|
||||
flash("{} watches queued for rechecking.".format(i))
|
||||
flash(f"{i} watches queued for rechecking.")
|
||||
return redirect(url_for('index', tag=tag))
|
||||
|
||||
@app.route("/form/checkbox-operations", methods=['POST'])
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
import re
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from wtforms import (
|
||||
|
|
@ -419,15 +420,18 @@ class quickWatchForm(Form):
|
|||
|
||||
# Common to a single watch and the global settings
|
||||
class commonSettingsForm(Form):
|
||||
from . import processors
|
||||
|
||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
|
||||
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
|
||||
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
|
||||
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
|
||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1,
|
||||
message="Should contain one or more seconds")])
|
||||
|
||||
class importForm(Form):
|
||||
from . import processors
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
|
|
@ -447,7 +451,7 @@ class SingleBrowserStep(Form):
|
|||
# remove_button = SubmitField('-', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
|
||||
# add_button = SubmitField('+', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
|
||||
|
||||
class watchForm(commonSettingsForm):
|
||||
class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
|
||||
|
|
@ -475,9 +479,6 @@ class watchForm(commonSettingsForm):
|
|||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||
filter_text_removed = BooleanField('Removed lines', default=True)
|
||||
|
||||
# @todo this class could be moved to its own text_json_diff_watchForm and this goes to restock_diff_Watchform perhaps
|
||||
in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
|
||||
|
||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
|
|
@ -493,6 +494,12 @@ class watchForm(commonSettingsForm):
|
|||
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
||||
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
|
||||
def extra_form_content(self):
|
||||
return None
|
||||
|
||||
def validate(self, **kwargs):
|
||||
if not super().validate():
|
||||
return False
|
||||
|
|
@ -513,7 +520,6 @@ class watchForm(commonSettingsForm):
|
|||
result = False
|
||||
return result
|
||||
|
||||
|
||||
class SingleExtraProxy(Form):
|
||||
|
||||
# maybe better to set some <script>var..
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ def _get_stripped_text_from_json_match(match):
|
|||
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
|
||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||
stripped_text_from_html = False
|
||||
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
try:
|
||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||
|
|
@ -282,17 +282,19 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
# @type could also be a list (Product, SubType)
|
||||
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
|
||||
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
|
||||
# 1833 - could be either str or dict, should not be anything else
|
||||
if json_data.get('@type') and stripped_text_from_html:
|
||||
try:
|
||||
if json_data.get('@type') == str or json_data.get('@type') == dict:
|
||||
types = [json_data.get('@type')] if isinstance(json_data.get('@type'), str) else json_data.get('@type')
|
||||
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in types]:
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
t = json_data.get('@type')
|
||||
if t and stripped_text_from_html:
|
||||
|
||||
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
|
||||
break
|
||||
# The non-standard part, some have a list
|
||||
elif isinstance(t, list):
|
||||
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
|
||||
break
|
||||
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
|
|
|
|||
|
|
@ -1,19 +1,14 @@
|
|||
from .Watch import base_config
|
||||
import uuid
|
||||
|
||||
class model(dict):
|
||||
from changedetectionio.model import watch_base
|
||||
|
||||
class model(watch_base):
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
|
||||
self.update(base_config)
|
||||
|
||||
self['uuid'] = str(uuid.uuid4())
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
|
||||
# Goes at the end so we update the default object with the initialiser
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
from . import watch_base
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
|
|
@ -15,69 +13,6 @@ SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
|
|||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_format_for_watch
|
||||
)
|
||||
|
||||
base_config = {
|
||||
'body': None,
|
||||
'browser_steps': [],
|
||||
'browser_steps_last_error_step': None,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'check_count': 0,
|
||||
'date_created': None,
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'fetch_time': 0.0,
|
||||
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'filter_text_added': True,
|
||||
'filter_text_replaced': True,
|
||||
'filter_text_removed': True,
|
||||
'has_ldjson_price_data': None,
|
||||
'track_ldjson_price_data': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'in_stock' : None,
|
||||
'in_stock_only' : True, # Only trigger change on going to instock from out-of-stock
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'method': 'GET',
|
||||
'notification_alert_count': 0,
|
||||
# Custom notification content
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'notification_title': None,
|
||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'remote_server_reply': None, # From 'server' reply header
|
||||
'sort_text_alphabetically': False,
|
||||
'subtractive_selectors': [],
|
||||
'tag': '', # Old system of text name for a tag, to be removed
|
||||
'tags': [], # list of UUIDs to App.Tags
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
# Requires setting to None on submit if it's the same as the default
|
||||
# Should be all None by default, so we use the system default in this case.
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||
'time_between_check_use_default': True,
|
||||
'title': None,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': '',
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
}
|
||||
|
||||
|
||||
def is_safe_url(test_url):
|
||||
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
|
||||
|
|
@ -94,30 +29,26 @@ def is_safe_url(test_url):
|
|||
|
||||
return True
|
||||
|
||||
class model(dict):
|
||||
|
||||
class model(watch_base):
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
jitter_seconds = 0
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
|
||||
self.update(base_config)
|
||||
self.__datastore_path = kw['datastore_path']
|
||||
|
||||
self['uuid'] = str(uuid.uuid4())
|
||||
|
||||
del kw['datastore_path']
|
||||
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
if self.get('default'):
|
||||
del self['default']
|
||||
|
||||
# Be sure the cached timestamp is ready
|
||||
bump = self.history
|
||||
|
||||
# Goes at the end so we update the default object with the initialiser
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
@property
|
||||
def viewed(self):
|
||||
# Don't return viewed when last_viewed is 0 and newest_key is 0
|
||||
|
|
@ -157,6 +88,33 @@ class model(dict):
|
|||
ready_url=ready_url.replace('source:', '')
|
||||
return ready_url
|
||||
|
||||
def clear_watch(self):
|
||||
import pathlib
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
|
||||
os.unlink(item)
|
||||
|
||||
# Force the attr to recalculate
|
||||
bump = self.history
|
||||
|
||||
# Do this last because it will trigger a recheck due to last_checked being zero
|
||||
self.update({
|
||||
'browser_steps_last_error_step': None,
|
||||
'check_count': 0,
|
||||
'fetch_time': 0.0,
|
||||
'has_ldjson_price_data': None,
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_notification_error': False,
|
||||
'last_viewed': 0,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False,
|
||||
'remote_server_reply': None,
|
||||
'track_ldjson_price_data': None
|
||||
})
|
||||
return
|
||||
|
||||
@property
|
||||
def is_source_type_url(self):
|
||||
return self.get('url', '').startswith('source:')
|
||||
|
|
@ -258,6 +216,13 @@ class model(dict):
|
|||
|
||||
return has_browser_steps
|
||||
|
||||
@property
|
||||
def has_restock_info(self):
|
||||
if self.get('restock') and self['restock'].get('in_stock') != None:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
|
||||
@property
|
||||
def newest_history_key(self):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,73 @@
|
|||
import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.notification import default_notification_format_for_watch
|
||||
|
||||
class watch_base(dict):
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
self.update({
|
||||
# Custom notification content
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
# Requires setting to None on submit if it's the same as the default
|
||||
# Should be all None by default, so we use the system default in this case.
|
||||
'body': None,
|
||||
'browser_steps': [],
|
||||
'browser_steps_last_error_step': None,
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'date_created': None,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'fetch_time': 0.0,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'filter_text_added': True,
|
||||
'filter_text_removed': True,
|
||||
'filter_text_replaced': True,
|
||||
'follow_price_changes': True,
|
||||
'has_ldjson_price_data': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'method': 'GET',
|
||||
'notification_alert_count': 0,
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||
'notification_title': None,
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||
'price_change_threshold_percent': None,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'remote_server_reply': None, # From 'server' reply header
|
||||
'sort_text_alphabetically': False,
|
||||
'subtractive_selectors': [],
|
||||
'tag': '', # Old system of text name for a tag, to be removed
|
||||
'tags': [], # list of UUIDs to App.Tags
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||
'time_between_check_use_default': True,
|
||||
'title': None,
|
||||
'track_ldjson_price_data': None,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': '',
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
})
|
||||
|
||||
super(watch_base, self).__init__(*arg, **kw)
|
||||
|
||||
if self.get('default'):
|
||||
del self['default']
|
||||
|
|
@ -8,4 +8,8 @@ The concept here is to be able to switch between different domain specific probl
|
|||
Some suggestions for the future
|
||||
|
||||
- `graphical`
|
||||
- `restock_and_price` - extract price AND stock text
|
||||
|
||||
## Todo
|
||||
|
||||
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
||||
- move restock_diff to its own pip/github repo
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
from abc import abstractmethod
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.model import Watch
|
||||
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import importlib
|
||||
import pkgutil
|
||||
import inspect
|
||||
|
||||
class difference_detection_processor():
|
||||
|
||||
|
|
@ -139,7 +142,7 @@ class difference_detection_processor():
|
|||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch: Watch, skip_when_checksum_same=True):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
|
|
@ -147,8 +150,83 @@ class difference_detection_processor():
|
|||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
|
||||
|
||||
def find_sub_packages(package_name):
|
||||
"""
|
||||
Find all sub-packages within the given package.
|
||||
|
||||
:param package_name: The name of the base package to scan for sub-packages.
|
||||
:return: A list of sub-package names.
|
||||
"""
|
||||
package = importlib.import_module(package_name)
|
||||
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
||||
|
||||
|
||||
def find_processors():
|
||||
"""
|
||||
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
||||
|
||||
:param package_name: The name of the package to scan for processor modules.
|
||||
:return: A list of (module, class) tuples.
|
||||
"""
|
||||
package_name = "changedetectionio.processors" # Name of the current package/module
|
||||
|
||||
processors = []
|
||||
sub_packages = find_sub_packages(package_name)
|
||||
|
||||
for sub_package in sub_packages:
|
||||
module_name = f"{package_name}.{sub_package}.processor"
|
||||
try:
|
||||
module = importlib.import_module(module_name)
|
||||
|
||||
# Iterate through all classes in the module
|
||||
for name, obj in inspect.getmembers(module, inspect.isclass):
|
||||
if issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor:
|
||||
processors.append((module, sub_package))
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
return processors
|
||||
|
||||
|
||||
def get_parent_module(module):
|
||||
module_name = module.__name__
|
||||
if '.' not in module_name:
|
||||
return None # Top-level module has no parent
|
||||
parent_module_name = module_name.rsplit('.', 1)[0]
|
||||
try:
|
||||
return importlib.import_module(parent_module_name)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
def get_custom_watch_obj_for_processor(processor_name):
|
||||
from changedetectionio.model import Watch
|
||||
watch_class = Watch.model
|
||||
processor_classes = find_processors()
|
||||
custom_watch_obj = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
if custom_watch_obj:
|
||||
# Parent of .processor.py COULD have its own Watch implementation
|
||||
parent_module = get_parent_module(custom_watch_obj[0])
|
||||
if hasattr(parent_module, 'Watch'):
|
||||
watch_class = parent_module.Watch
|
||||
|
||||
return watch_class
|
||||
|
||||
|
||||
def available_processors():
|
||||
from . import restock_diff, text_json_diff
|
||||
x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
|
||||
# @todo Make this smarter with introspection of sorts.
|
||||
return x
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
available = []
|
||||
for package, processor_class in processor_classes:
|
||||
available.append((processor_class, package.name))
|
||||
|
||||
return available
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
class ProcessorException(Exception):
|
||||
def __init__(self, message=None, status_code=None, url=None, screenshot=None, has_filters=False, html_content='', xpath_data=None):
|
||||
self.message = message
|
||||
self.status_code = status_code
|
||||
self.url = url
|
||||
self.screenshot = screenshot
|
||||
self.has_filters = has_filters
|
||||
self.html_content = html_content
|
||||
self.xpath_data = xpath_data
|
||||
return
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
|
||||
from . import difference_detection_processor
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import urllib3
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
name = 'Re-stock detection for single product pages'
|
||||
description = 'Detects if the product goes back to in-stock'
|
||||
|
||||
class UnableToExtractRestockData(Exception):
|
||||
def __init__(self, status_code):
|
||||
# Set this so we can use it in other parts of the app
|
||||
self.status_code = status_code
|
||||
return
|
||||
|
||||
class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# Main detection method
|
||||
fetched_md5 = None
|
||||
if self.fetcher.instock_data:
|
||||
fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
|
||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||
update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
|
||||
else:
|
||||
raise UnableToExtractRestockData(status_code=self.fetcher.status_code)
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
changed_detected = False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
|
||||
# Yes if we only care about it going to instock, AND we are in stock
|
||||
if watch.get('in_stock_only') and update_obj["in_stock"]:
|
||||
changed_detected = True
|
||||
|
||||
if not watch.get('in_stock_only'):
|
||||
# All cases
|
||||
changed_detected = True
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip()
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
import re
|
||||
from babel.numbers import parse_decimal
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> float:
|
||||
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
|
||||
standardized_value = raw_value
|
||||
|
||||
if ',' in standardized_value and '.' in standardized_value:
|
||||
# Identify the correct decimal separator
|
||||
if standardized_value.rfind('.') > standardized_value.rfind(','):
|
||||
standardized_value = standardized_value.replace(',', '')
|
||||
else:
|
||||
standardized_value = standardized_value.replace('.', '').replace(',', '.')
|
||||
else:
|
||||
standardized_value = standardized_value.replace(',', '.')
|
||||
|
||||
# Remove any non-numeric characters except for the decimal point
|
||||
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
|
||||
|
||||
# Convert to float
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Define default values
|
||||
default_values = {
|
||||
'in_stock': None,
|
||||
'price': None,
|
||||
'currency': None,
|
||||
'original_price': None
|
||||
}
|
||||
|
||||
# Initialize the dictionary with default values
|
||||
super().__init__(default_values)
|
||||
|
||||
# Update with any provided positional arguments (dictionaries)
|
||||
if args:
|
||||
if len(args) == 1 and isinstance(args[0], dict):
|
||||
self.update(args[0])
|
||||
else:
|
||||
raise ValueError("Only one positional argument of type 'dict' is allowed")
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# Custom logic to handle setting price and original_price
|
||||
if key == 'price':
|
||||
if isinstance(value, str):
|
||||
value = self.parse_currency(raw_value=value)
|
||||
|
||||
if value and not self.get('original_price'):
|
||||
self['original_price'] = value
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
class Watch(BaseWatch):
|
||||
def __init__(self, *arg, **kw):
|
||||
super().__init__(*arg, **kw)
|
||||
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||
|
||||
def clear_watch(self):
|
||||
super().clear_watch()
|
||||
self.update({'restock': Restock()})
|
||||
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
validators,
|
||||
FloatField
|
||||
)
|
||||
|
||||
from changedetectionio.forms import processor_text_json_diff_form
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
|
||||
price_change_min = FloatField('Minimum amount to trigger notification', [validators.Optional()],
|
||||
render_kw={"placeholder": "No limit", "size": "10"})
|
||||
price_change_max = FloatField('Maximum amount to trigger notification', [validators.Optional()],
|
||||
render_kw={"placeholder": "No limit", "size": "10"})
|
||||
price_change_threshold_percent = FloatField('Threshold in % for price changes', validators=[
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=0, max=100, message="Should be between 0 and 100"),
|
||||
], render_kw={"placeholder": "0%", "size": "5"})
|
||||
|
||||
follow_price_changes = BooleanField('Follow price changes', default=False)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return 'Restock & Price Detection'
|
||||
|
||||
def extra_form_content(self):
|
||||
return """
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
<script>
|
||||
$(document).ready(function () {
|
||||
toggleOpacity('#follow_price_changes', '.price-change-minmax', true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.in_stock_only) }}
|
||||
<span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.follow_price_changes) }}
|
||||
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
||||
<span class="pure-form-message-inline">When OFF - only care about restock detection</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.price_change_min, placeholder=watch['restock']['price']) }}
|
||||
<span class="pure-form-message-inline">Minimum amount, only trigger a change when the price is less than this amount.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.price_change_max, placeholder=watch['restock']['price']) }}
|
||||
<span class="pure-form-message-inline">Maximum amount, only trigger a change when the price is more than this amount.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.price_change_threshold_percent) }}
|
||||
<span class="pure-form-message-inline">Price must change more than this % to trigger a change.</span><br>
|
||||
<span class="pure-form-message-inline">For example, If the product is $1,000 USD, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||
</fieldset>
|
||||
</div>
|
||||
</fieldset>"""
|
||||
|
|
@ -0,0 +1,247 @@
|
|||
from .. import difference_detection_processor
|
||||
from ..exceptions import ProcessorException
|
||||
from . import Restock
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import re
|
||||
import urllib3
|
||||
import time
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
name = 'Re-stock & Price detection for single product pages'
|
||||
description = 'Detects if the product goes back to in-stock'
|
||||
|
||||
class UnableToExtractRestockData(Exception):
|
||||
def __init__(self, status_code):
|
||||
# Set this so we can use it in other parts of the app
|
||||
self.status_code = status_code
|
||||
return
|
||||
|
||||
class MoreThanOnePriceFound(Exception):
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
def _search_prop_by_value(matches, value):
|
||||
for properties in matches:
|
||||
for prop in properties:
|
||||
if value in prop[0]:
|
||||
return prop[1] # Yield the desired value and exit the function
|
||||
|
||||
# should return Restock()
|
||||
# add casting?
|
||||
def get_itemprop_availability(html_content) -> Restock:
|
||||
"""
|
||||
Kind of funny/cool way to find price/availability in one many different possibilities.
|
||||
Use 'extruct' to find any possible RDFa/microdata/json-ld data, make a JSON string from the output then search it.
|
||||
"""
|
||||
from jsonpath_ng import parse
|
||||
|
||||
now = time.time()
|
||||
import extruct
|
||||
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
|
||||
|
||||
value = {}
|
||||
now = time.time()
|
||||
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
|
||||
|
||||
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
|
||||
|
||||
data = extruct.extract(html_content, syntaxes=syntaxes)
|
||||
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
|
||||
|
||||
# First phase, dead simple scanning of anything that looks useful
|
||||
value = Restock()
|
||||
if data:
|
||||
logger.debug(f"Using jsonpath to find price/availability/etc")
|
||||
price_parse = parse('$..(price|Price)')
|
||||
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
|
||||
availability_parse = parse('$..(availability|Availability)')
|
||||
|
||||
price_result = price_parse.find(data)
|
||||
if price_result:
|
||||
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
|
||||
# parse that for the UI?
|
||||
prices_found = set(str(item.value).replace('$', '') for item in price_result)
|
||||
if len(price_result) > 1 and len(prices_found) > 1:
|
||||
# See of all prices are different, in the case that one product has many embedded data types with the same price
|
||||
# One might have $121.95 and another 121.95 etc
|
||||
logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
|
||||
raise MoreThanOnePriceFound()
|
||||
|
||||
value['price'] = price_result[0].value
|
||||
|
||||
pricecurrency_result = pricecurrency_parse.find(data)
|
||||
if pricecurrency_result:
|
||||
value['currency'] = pricecurrency_result[0].value
|
||||
|
||||
availability_result = availability_parse.find(data)
|
||||
if availability_result:
|
||||
value['availability'] = availability_result[0].value
|
||||
|
||||
if value.get('availability'):
|
||||
value['availability'] = re.sub(r'(?i)^(https|http)://schema.org/', '',
|
||||
value.get('availability').strip(' "\'').lower()) if value.get('availability') else None
|
||||
|
||||
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
|
||||
if not value.get('price') or value.get('availability'):
|
||||
logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
|
||||
jsonpath_expr = parse('$..properties')
|
||||
|
||||
for match in jsonpath_expr.find(data):
|
||||
if not value.get('price'):
|
||||
value['price'] = _search_prop_by_value([match.value], "price:amount")
|
||||
if not value.get('availability'):
|
||||
value['availability'] = _search_prop_by_value([match.value], "product:availability")
|
||||
if not value.get('currency'):
|
||||
value['currency'] = _search_prop_by_value([match.value], "price:currency")
|
||||
logger.trace(f"Processed with Extruct in {time.time()-now:.3f}s")
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def is_between(number, lower=None, upper=None):
|
||||
"""
|
||||
Check if a number is between two values.
|
||||
|
||||
Parameters:
|
||||
number (float): The number to check.
|
||||
lower (float or None): The lower bound (inclusive). If None, no lower bound.
|
||||
upper (float or None): The upper bound (inclusive). If None, no upper bound.
|
||||
|
||||
Returns:
|
||||
bool: True if the number is between the lower and upper bounds, False otherwise.
|
||||
"""
|
||||
return (lower is None or lower <= number) and (upper is None or number <= upper)
|
||||
|
||||
|
||||
class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
|
||||
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
itemprop_availability = {}
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Something valid in get_itemprop_availability() by scraping metadata ?
|
||||
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
|
||||
# Store for other usage
|
||||
update_obj['restock'] = itemprop_availability
|
||||
|
||||
if itemprop_availability.get('availability'):
|
||||
# @todo: Configurable?
|
||||
if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [
|
||||
'instock',
|
||||
'instoreonly',
|
||||
'limitedavailability',
|
||||
'onlineonly',
|
||||
'presale']
|
||||
):
|
||||
update_obj['restock']['in_stock'] = True
|
||||
else:
|
||||
update_obj['restock']['in_stock'] = False
|
||||
|
||||
# Main detection method
|
||||
fetched_md5 = None
|
||||
|
||||
if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
|
||||
raise ProcessorException(
|
||||
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Nothing automatic in microdata found, revert to scraping the page
|
||||
if self.fetcher.instock_data and itemprop_availability.get('availability') is None:
|
||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||
# Careful! this does not really come from chrome/js when the watch is set to plaintext
|
||||
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
|
||||
|
||||
# What we store in the snapshot
|
||||
price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
|
||||
snapshot_content = f"{update_obj.get('restock').get('in_stock')} - {price}"
|
||||
|
||||
# Main detection method
|
||||
fetched_md5 = hashlib.md5(snapshot_content.encode('utf-8')).hexdigest()
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
changed_detected = False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
# out of stock -> back in stock only?
|
||||
if watch.get('restock') and watch['restock'].get('in_stock') != update_obj['restock'].get('in_stock'):
|
||||
# Yes if we only care about it going to instock, AND we are in stock
|
||||
if watch.get('in_stock_only') and update_obj['restock']['in_stock']:
|
||||
changed_detected = True
|
||||
|
||||
if not watch.get('in_stock_only'):
|
||||
# All cases
|
||||
changed_detected = True
|
||||
|
||||
if watch.get('follow_price_changes') and watch.get('restock') and update_obj.get('restock') and update_obj['restock'].get('price'):
|
||||
price = float(update_obj['restock'].get('price'))
|
||||
# Default to current price if no previous price found
|
||||
if watch['restock'].get('original_price'):
|
||||
previous_price = float(watch['restock'].get('original_price'))
|
||||
# It was different, but negate it further down
|
||||
if price != previous_price:
|
||||
changed_detected = True
|
||||
|
||||
# Minimum/maximum price limit
|
||||
if update_obj.get('restock') and update_obj['restock'].get('price'):
|
||||
logger.debug(
|
||||
f"{watch.get('uuid')} - Change was detected, 'price_change_max' is '{watch.get('price_change_max', '')}' 'price_change_min' is '{watch.get('price_change_min', '')}', price from website is '{update_obj['restock'].get('price', '')}'.")
|
||||
if update_obj['restock'].get('price'):
|
||||
min_limit = float(watch.get('price_change_min')) if watch.get('price_change_min') else None
|
||||
max_limit = float(watch.get('price_change_max')) if watch.get('price_change_max') else None
|
||||
|
||||
price = float(update_obj['restock'].get('price'))
|
||||
logger.debug(f"{watch.get('uuid')} after float conversion - Min limit: '{min_limit}' Max limit: '{max_limit}' Price: '{price}'")
|
||||
if min_limit or max_limit:
|
||||
if is_between(number=price, lower=min_limit, upper=max_limit):
|
||||
logger.trace(f"{watch.get('uuid')} {price} is between {min_limit} and {max_limit}")
|
||||
if changed_detected:
|
||||
logger.debug(f"{watch.get('uuid')} Override change-detected to FALSE because price was inside threshold")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.trace(f"{watch.get('uuid')} {price} is NOT between {min_limit} and {max_limit}")
|
||||
|
||||
# Price comparison by %
|
||||
if watch['restock'].get('original_price') and changed_detected and watch.get('price_change_threshold_percent'):
|
||||
previous_price = float(watch['restock'].get('original_price'))
|
||||
pc = float(watch.get('price_change_threshold_percent'))
|
||||
change = abs((price - previous_price) / previous_price * 100)
|
||||
if change and change <= pc:
|
||||
logger.debug(f"{watch.get('uuid')} Override change-detected to FALSE because % threshold ({pc}%) was {change:.3f}%")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.debug(f"{watch.get('uuid')} Price change was {change:.3f}% , (threshold {pc}%)")
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
|
||||
|
|
@ -6,8 +6,8 @@ import os
|
|||
import re
|
||||
import urllib3
|
||||
|
||||
from . import difference_detection_processor
|
||||
from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from loguru import logger
|
||||
|
|
@ -16,6 +16,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
description = 'Detects all text changes where possible'
|
||||
|
||||
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
|
|
@ -217,7 +218,7 @@ class perform_site_check(difference_detection_processor):
|
|||
# Rewrite's the processing text based on only what diff result they want to see
|
||||
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
|
||||
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
|
||||
from .. import diff
|
||||
from changedetectionio import diff
|
||||
# needs to not include (added) etc or it may get used twice
|
||||
# Replace the processed text with the preferred result
|
||||
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
function toggleOpacity(checkboxSelector, fieldSelector) {
|
||||
function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
|
||||
const checkbox = document.querySelector(checkboxSelector);
|
||||
const fields = document.querySelectorAll(fieldSelector);
|
||||
function updateOpacity() {
|
||||
const opacityValue = checkbox.checked ? 0.6 : 1;
|
||||
const opacityValue = !checkbox.checked ? (inverted ? 0.6 : 1) : (inverted ? 1 : 0.6);
|
||||
fields.forEach(field => {
|
||||
field.style.opacity = opacityValue;
|
||||
});
|
||||
|
|
@ -25,6 +25,8 @@ $(document).ready(function () {
|
|||
$('#notification-tokens-info').toggle();
|
||||
});
|
||||
|
||||
toggleOpacity('#time_between_check_use_default', '#time_between_check');
|
||||
toggleOpacity('#time_between_check_use_default', '#time_between_check', false);
|
||||
|
||||
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -186,12 +186,17 @@ code {
|
|||
}
|
||||
}
|
||||
|
||||
.watch-tag-list {
|
||||
color: var(--color-white);
|
||||
.inline-tag {
|
||||
white-space: nowrap;
|
||||
background: var(--color-text-watch-tag-list);
|
||||
border-radius: 5px;
|
||||
padding: 2px 5px;
|
||||
margin-right: 4px;
|
||||
}
|
||||
|
||||
.watch-tag-list {
|
||||
color: var(--color-white);
|
||||
background: var(--color-text-watch-tag-list);
|
||||
@extend .inline-tag;
|
||||
}
|
||||
|
||||
.box {
|
||||
|
|
@ -1061,9 +1066,8 @@ ul {
|
|||
.tracking-ldjson-price-data {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #000;
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap;
|
||||
opacity: 0.6;
|
||||
@extend .inline-tag;
|
||||
}
|
||||
|
||||
.ldjson-price-track-offer {
|
||||
|
|
@ -1109,9 +1113,12 @@ ul {
|
|||
background-color: var(--color-background-button-cancel);
|
||||
color: #777;
|
||||
}
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap;
|
||||
&.error {
|
||||
background-color: var(--color-background-button-error);
|
||||
color: #fff;
|
||||
opacity: 0.7;
|
||||
}
|
||||
@extend .inline-tag;
|
||||
}
|
||||
|
||||
#chrome-extension-link {
|
||||
|
|
|
|||
|
|
@ -531,12 +531,15 @@ code {
|
|||
content: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAQElEQVR42qXKwQkAIAxDUUdxtO6/RBQkQZvSi8I/pL4BoGw/XPkh4XigPmsUgh0626AjRsgxHTkUThsG2T/sIlzdTsp52kSS1wAAAABJRU5ErkJggg==);
|
||||
margin: 0 3px 0 5px; }
|
||||
|
||||
.inline-tag, .watch-tag-list, .tracking-ldjson-price-data, .restock-label {
|
||||
white-space: nowrap;
|
||||
border-radius: 5px;
|
||||
padding: 2px 5px;
|
||||
margin-right: 4px; }
|
||||
|
||||
.watch-tag-list {
|
||||
color: var(--color-white);
|
||||
white-space: nowrap;
|
||||
background: var(--color-text-watch-tag-list);
|
||||
border-radius: 5px;
|
||||
padding: 2px 5px; }
|
||||
background: var(--color-text-watch-tag-list); }
|
||||
|
||||
.box {
|
||||
max-width: 80%;
|
||||
|
|
@ -1153,9 +1156,7 @@ ul {
|
|||
.tracking-ldjson-price-data {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #000;
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap; }
|
||||
opacity: 0.6; }
|
||||
|
||||
.ldjson-price-track-offer {
|
||||
font-weight: bold;
|
||||
|
|
@ -1180,16 +1181,18 @@ ul {
|
|||
#quick-watch-processor-type ul li > * {
|
||||
display: inline-block; }
|
||||
|
||||
.restock-label {
|
||||
padding: 3px;
|
||||
border-radius: 3px;
|
||||
white-space: nowrap; }
|
||||
.restock-label.in-stock {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #fff; }
|
||||
.restock-label.not-in-stock {
|
||||
background-color: var(--color-background-button-cancel);
|
||||
color: #777; }
|
||||
.restock-label.in-stock {
|
||||
background-color: var(--color-background-button-green);
|
||||
color: #fff; }
|
||||
|
||||
.restock-label.not-in-stock {
|
||||
background-color: var(--color-background-button-cancel);
|
||||
color: #777; }
|
||||
|
||||
.restock-label.error {
|
||||
background-color: var(--color-background-button-error);
|
||||
color: #fff;
|
||||
opacity: 0.7; }
|
||||
|
||||
#chrome-extension-link {
|
||||
padding: 9px;
|
||||
|
|
|
|||
|
|
@ -18,6 +18,9 @@ import time
|
|||
import uuid as uuid_builder
|
||||
from loguru import logger
|
||||
|
||||
from .processors import get_custom_watch_obj_for_processor
|
||||
from .processors.restock_diff import Restock
|
||||
|
||||
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
|
||||
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
|
||||
|
||||
|
|
@ -80,9 +83,15 @@ class ChangeDetectionStore:
|
|||
self.__data['settings']['application'].update(from_disk['settings']['application'])
|
||||
|
||||
# Convert each existing watch back to the Watch.model object
|
||||
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
watch['uuid']=uuid
|
||||
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
|
||||
watch['uuid'] = uuid
|
||||
watch_class = get_custom_watch_obj_for_processor(watch.get('processor'))
|
||||
if watch.get('uuid') != 'text_json_diff':
|
||||
logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
|
||||
|
||||
self.__data['watching'][uuid] = watch_class(datastore_path=self.datastore_path, default=watch)
|
||||
|
||||
logger.info(f"Watching: {uuid} {self.__data['watching'][uuid]['url']}")
|
||||
|
||||
# First time ran, Create the datastore.
|
||||
|
|
@ -240,32 +249,7 @@ class ChangeDetectionStore:
|
|||
|
||||
# Remove a watchs data but keep the entry (URL etc)
|
||||
def clear_watch_history(self, uuid):
|
||||
import pathlib
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
|
||||
unlink(item)
|
||||
|
||||
# Force the attr to recalculate
|
||||
bump = self.__data['watching'][uuid].history
|
||||
|
||||
# Do this last because it will trigger a recheck due to last_checked being zero
|
||||
self.__data['watching'][uuid].update({
|
||||
'browser_steps_last_error_step' : None,
|
||||
'check_count': 0,
|
||||
'fetch_time' : 0.0,
|
||||
'has_ldjson_price_data': None,
|
||||
'in_stock': None,
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_notification_error': False,
|
||||
'last_viewed': 0,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False,
|
||||
'remote_server_reply': None,
|
||||
'track_ldjson_price_data': None,
|
||||
})
|
||||
|
||||
self.__data['watching'][uuid].clear_watch()
|
||||
self.needs_write_urgent = True
|
||||
|
||||
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
|
||||
|
|
@ -342,11 +326,13 @@ class ChangeDetectionStore:
|
|||
if apply_extras.get('tags'):
|
||||
apply_extras['tags'] = list(set(apply_extras.get('tags')))
|
||||
|
||||
new_watch = Watch.model(datastore_path=self.datastore_path, url=url)
|
||||
# If the processor also has its own Watch implementation
|
||||
watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
|
||||
new_watch = watch_class(datastore_path=self.datastore_path, url=url)
|
||||
|
||||
new_uuid = new_watch.get('uuid')
|
||||
|
||||
logger.debug(f"Adding URL {url} - {new_uuid}")
|
||||
logger.debug(f"Adding URL '{url}' - {new_uuid}")
|
||||
|
||||
for k in ['uuid', 'history', 'last_checked', 'last_changed', 'newest_history_key', 'previous_md5', 'viewed']:
|
||||
if k in apply_extras:
|
||||
|
|
@ -582,7 +568,8 @@ class ChangeDetectionStore:
|
|||
# Eventually almost everything todo with a watch will apply as a Tag
|
||||
# So we use the same model as a Watch
|
||||
with self.lock:
|
||||
new_tag = Watch.model(datastore_path=self.datastore_path, default={
|
||||
from .model import Tag
|
||||
new_tag = Tag.model(datastore_path=self.datastore_path, default={
|
||||
'title': name.strip(),
|
||||
'date_created': int(time.time())
|
||||
})
|
||||
|
|
@ -621,6 +608,12 @@ class ChangeDetectionStore:
|
|||
return next((v for v in tags if v.get('title', '').lower() == tag_name.lower()),
|
||||
None)
|
||||
|
||||
def any_watches_have_processor_by_name(self, processor_name):
|
||||
for watch in self.data['watching'].values():
|
||||
if watch.get('processor') == processor_name:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_updates_available(self):
|
||||
import inspect
|
||||
updates_available = []
|
||||
|
|
@ -849,3 +842,12 @@ class ChangeDetectionStore:
|
|||
for uuid, watch in self.data['watching'].items():
|
||||
if isinstance(watch.get('tags'), str):
|
||||
self.data['watching'][uuid]['tags'] = []
|
||||
|
||||
# Migrate old 'in_stock' values to the new Restock
|
||||
def update_17(self):
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if 'in_stock' in watch:
|
||||
watch['restock'] = Restock({'in_stock': watch.get('in_stock')})
|
||||
del watch['in_stock']
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
|
||||
{% endif %}
|
||||
const notification_base_url="{{url_for('ajax_callback_send_notification_test', watch_uuid=uuid)}}";
|
||||
const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %};
|
||||
const playwright_enabled={% if playwright_enabled %}true{% else %}false{% endif %};
|
||||
const recheck_proxy_start_url="{{url_for('check_proxies.start_check', uuid=uuid)}}";
|
||||
const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}";
|
||||
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
|
|
@ -41,18 +41,16 @@
|
|||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#request">Request</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if playwright_enabled %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<li class="tab"><a href="#restock">Restock Detection</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#stats">Stats</a></li>
|
||||
</ul>
|
||||
|
|
@ -69,16 +67,9 @@
|
|||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
|
||||
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br>
|
||||
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a>
|
||||
{% else %}
|
||||
Current mode: <strong>Re-stock detection.</strong><br>
|
||||
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a>
|
||||
{% endif %}
|
||||
</span>
|
||||
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, class="m-d") }}
|
||||
|
|
@ -413,18 +404,12 @@ Unavailable") }}
|
|||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<div class="tab-pane-inner" id="restock">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.in_stock_only) }}
|
||||
<span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
|
||||
</div>
|
||||
</fieldset>
|
||||
{# rendered sub Template #}
|
||||
{% if extra_form_content %}
|
||||
<div class="tab-pane-inner" id="extras_tab">
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
|
|
|||
|
|
@ -59,6 +59,11 @@
|
|||
{% set sort_order = sort_order or 'asc' %}
|
||||
{% set sort_attribute = sort_attribute or 'last_changed' %}
|
||||
{% set pagination_page = request.args.get('page', 0) %}
|
||||
{% set cols_required = 6 %}
|
||||
{% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %}
|
||||
{% if any_has_restock_price_processor %}
|
||||
{% set cols_required = cols_required + 1 %}
|
||||
{% endif %}
|
||||
|
||||
<div id="watch-table-wrapper">
|
||||
|
||||
|
|
@ -70,6 +75,9 @@
|
|||
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th class="empty-cell"></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||
{% if any_has_restock_price_processor %}
|
||||
<th>Restock & Price</th>
|
||||
{% endif %}
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th class="empty-cell"></th>
|
||||
|
|
@ -78,7 +86,7 @@
|
|||
<tbody>
|
||||
{% if not watches|length %}
|
||||
<tr>
|
||||
<td colspan="6" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
|
||||
|
|
@ -91,6 +99,7 @@
|
|||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
|
||||
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
|
||||
{% if is_unviewed %}unviewed{% endif %}
|
||||
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
|
||||
{% if watch.uuid in queued_uuids %}queued{% endif %}">
|
||||
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
|
||||
<td class="inline watch-controls">
|
||||
|
|
@ -135,30 +144,39 @@
|
|||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
|
||||
<div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{% endif %}
|
||||
{% if watch['track_ldjson_price_data'] == 'accepted' %}
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<span class="restock-label {{'in-stock' if watch['in_stock'] else 'not-in-stock' }}" title="detecting restock conditions">
|
||||
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
||||
{% if watch['last_checked'] and watch['in_stock'] != None %}
|
||||
{% if watch['in_stock'] %} In stock {% else %} Not in stock {% endif %}
|
||||
{% else %}
|
||||
Not yet checked
|
||||
{% endif %}
|
||||
</span>
|
||||
{% endif %}
|
||||
|
||||
|
||||
{% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
{% endfor %}
|
||||
|
||||
</td>
|
||||
<!-- @todo make it so any watch handler obj can expose this --->
|
||||
{% if any_has_restock_price_processor %}
|
||||
<td class="restock-and-price">
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
{% if watch.has_restock_info %}
|
||||
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
|
||||
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
||||
{% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
|
||||
</span>
|
||||
{% endif %}
|
||||
|
||||
{% if watch.get('restock') and watch['restock']['price'] != None %}
|
||||
{% if watch['restock']['price'] != None %}
|
||||
<span class="restock-label price" title="Price">
|
||||
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
|
||||
</span>
|
||||
{% endif %}
|
||||
{% elif not watch.has_restock_info %}
|
||||
<span class="restock-label error">No information</span>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</td>
|
||||
{% endif %}
|
||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}">{{watch|format_last_checked_time|safe}}</td>
|
||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
|
|
|
|||
|
|
@ -140,6 +140,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||
url_for("edit_page", uuid="first"),
|
||||
data={"trigger_text": 'Oh yes please',
|
||||
"url": test_url,
|
||||
'processor': 'text_json_diff',
|
||||
'fetch_backend': "html_requests",
|
||||
'filter_text_removed': '',
|
||||
'filter_text_added': 'y'},
|
||||
|
|
|
|||
|
|
@ -100,12 +100,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
|||
|
||||
# Accept it
|
||||
uuid = extract_UUID_from_client(client)
|
||||
time.sleep(1)
|
||||
#time.sleep(1)
|
||||
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Trigger a check
|
||||
time.sleep(1)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
# Offer should be gone
|
||||
|
|
@ -120,8 +116,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
|||
headers={'x-api-key': api_key},
|
||||
)
|
||||
|
||||
# Should see this (dont know where the whitespace came from)
|
||||
assert b'"highPrice": 8099900' in res.data
|
||||
assert b'8097000' in res.data
|
||||
|
||||
# And not this cause its not the ld-json
|
||||
assert b"So let's see what happens" not in res.data
|
||||
|
||||
|
|
@ -235,4 +231,3 @@ def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usa
|
|||
# f.write(test_return_data)
|
||||
#
|
||||
# _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -74,3 +74,8 @@ def test_consistent_history(client, live_server, measure_memory_usage):
|
|||
|
||||
|
||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
|
||||
|
||||
|
||||
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
|
||||
with open(json_db_file, 'r') as f:
|
||||
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
||||
|
|
|
|||
|
|
@ -9,8 +9,6 @@ def test_setup(live_server):
|
|||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_regex_text_func():
|
||||
from ..processors import text_json_diff as fetch_site_status
|
||||
|
||||
test_content = """
|
||||
but sometimes we want to remove the lines.
|
||||
|
||||
|
|
|
|||
|
|
@ -11,9 +11,6 @@ def test_setup(live_server):
|
|||
# Unit test of the stripper
|
||||
# Always we are dealing in utf-8
|
||||
def test_strip_text_func():
|
||||
from ..processors import text_json_diff as fetch_site_status
|
||||
|
||||
|
||||
test_content = """
|
||||
Some content
|
||||
is listed here
|
||||
|
|
|
|||
|
|
@ -378,11 +378,17 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
|
|||
with open('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt', 'w') as f:
|
||||
f.write("watch-header: nice")
|
||||
|
||||
wait_for_all_checks(client)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
# Give the thread time to pick it up, this actually is not super reliable and pytest can terminate before the check is ran
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# WARNING - pytest and 'wait_for_all_checks' shuts down before it has actually stopped processing when using pyppeteer fetcher
|
||||
# so adding more time here
|
||||
if os.getenv('FAST_PUPPETEER_CHROME_FETCHER'):
|
||||
time.sleep(6)
|
||||
|
||||
res = client.get(url_for("edit_page", uuid="first"))
|
||||
assert b"Extra headers file found and will be added to this watch" in res.data
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,312 @@
|
|||
#!/usr/bin/python3
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
|
||||
instock_props = [
|
||||
# LD+JSON with non-standard list of 'type' https://github.com/dgtlmoon/changedetection.io/issues/1833
|
||||
'<script type=\'application/ld+json\'>{"@context": "http://schema.org","@type": ["Product", "SubType"],"name": "My test product","description":"","Offers": { "@type": "Offer", "offeredBy": { "@type": "Organization", "name":"Person", "telephone":"+1 999 999 999" }, "price": $$PRICE$$, "priceCurrency": "EUR", "url": "/some/url", "availability": "http://schema.org/InStock"} }</script>',
|
||||
# LD JSON
|
||||
'<script id="product-jsonld" type="application/ld+json">{"@context":"https://schema.org","@type":"Product","brand":{"@type":"Brand","name":"Ubiquiti"},"name":"UniFi Express","sku":"UX","description":"Impressively compact UniFi Cloud Gateway and WiFi 6 access point that runs UniFi Network. Powers an entire network or simply meshes as an access point.","url":"https://store.ui.com/us/en/products/ux","image":{"@type":"ImageObject","url":"https://cdn.ecomm.ui.com/products/4ed25b4c-db92-4b98-bbf3-b0989f007c0e/123417a2-895e-49c7-ba04-b6cd8f6acc03.png","width":"1500","height":"1500"},"offers":{"@type":"Offer","availability":"https://schema.org/InStock","priceSpecification":{"@type":"PriceSpecification","price":$$PRICE$$,"priceCurrency":"USD","valueAddedTaxIncluded":false}}}</script>',
|
||||
'<script id="product-schema" type="application/ld+json">{"@context": "https://schema.org","@type": "Product","itemCondition": "https://schema.org/NewCondition","image": "//1.com/hmgo","name": "Polo MuscleFit","color": "Beige","description": "Polo","sku": "0957102010","brand": {"@type": "Brand","name": "H&M"},"category": {"@type": "Thing","name": "Polo"},"offers": [{"@type": "Offer","url": "https:/www2.xxxxxx.com/fr_fr/productpage.0957102010.html","priceCurrency": "EUR","price": $$PRICE$$,"availability": "http://schema.org/InStock","seller": { "@type": "Organization", "name": "H&M"}}]}</script>'
|
||||
# Microdata
|
||||
'<div itemscope itemtype="https://schema.org/Product"><h1 itemprop="name">Example Product</h1><p itemprop="description">This is a sample product description.</p><div itemprop="offers" itemscope itemtype="https://schema.org/Offer"><p>Price: <span itemprop="price">$$$PRICE$$</span></p><link itemprop="availability" href="https://schema.org/InStock" /></div></div>'
|
||||
]
|
||||
|
||||
out_of_stock_props = [
|
||||
# out of stock AND contains multiples
|
||||
'<script type="application/ld+json">{"@context":"http://schema.org","@type":"WebSite","url":"https://www.medimops.de/","potentialAction":{"@type":"SearchAction","target":"https://www.medimops.de/produkte-C0/?fcIsSearch=1&searchparam={searchparam}","query-input":"required name=searchparam"}}</script><script type="application/ld+json">{"@context":"http://schema.org","@type":"Product","name":"Horsetrader: Robert Sangster and the Rise and Fall of the Sport of Kings","image":"https://images2.medimops.eu/product/43a982/M00002551322-large.jpg","productID":"isbn:9780002551328","gtin13":"9780002551328","category":"Livres en langue étrangère","offers":{"@type":"Offer","priceCurrency":"EUR","price":$$PRICE$$,"itemCondition":"UsedCondition","availability":"OutOfStock"},"brand":{"@type":"Thing","name":"Patrick Robinson","url":"https://www.momox-shop.fr/,patrick-robinson/"}}</script>'
|
||||
]
|
||||
|
||||
def set_original_response(props_markup='', price="121.95"):
|
||||
|
||||
props_markup=props_markup.replace('$$PRICE$$', price)
|
||||
test_return_data = f"""<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<br>
|
||||
So let's see what happens. <br>
|
||||
<div>price: ${price}</div>
|
||||
{props_markup}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
def test_setup(client, live_server):
|
||||
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_restock_itemprop_basic(client, live_server):
|
||||
|
||||
#live_server_setup(live_server)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
for p in instock_props:
|
||||
set_original_response(props_markup=p)
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'more than one price detected' not in res.data
|
||||
assert b'has-restock-info' in res.data
|
||||
assert b' in-stock' in res.data
|
||||
assert b' not-in-stock' not in res.data
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
|
||||
for p in out_of_stock_props:
|
||||
set_original_response(props_markup=p)
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
assert b'has-restock-info not-in-stock' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_itemprop_price_change(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
set_original_response(props_markup=instock_props[0], price="190.95")
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# A change in price, should trigger a change by default
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'190.95' in res.data
|
||||
|
||||
# basic price change, look for notification
|
||||
set_original_response(props_markup=instock_props[0], price='180.45')
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'180.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
client.get(url_for("mark_all_viewed"), follow_redirects=True)
|
||||
|
||||
# turning off price change trigger, but it should show the new price, with no change notification
|
||||
set_original_response(props_markup=instock_props[0], price='120.45')
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'120.45' in res.data
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_itemprop_price_minmax_limit(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
set_original_response(props_markup=instock_props[0], price="950.95")
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# A change in price, should trigger a change by default
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"follow_price_changes": "y",
|
||||
"price_change_min": 900.0,
|
||||
"price_change_max": 1100.10,
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
"headers": "",
|
||||
'fetch_backend': "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
client.get(url_for("mark_all_viewed"))
|
||||
|
||||
# price changed to something greater than min (900), and less than max (1100).. should be no change
|
||||
set_original_response(props_markup=instock_props[0], price='1000.45')
|
||||
client.get(url_for("form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
assert b'more than one price detected' not in res.data
|
||||
# BUT the new price should show, even tho its within limits
|
||||
assert b'1,000.45' or b'1000.45' in res.data #depending on locale
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
# price changed to something LESS than min (900), SHOULD be a change
|
||||
set_original_response(props_markup=instock_props[0], price='890.45')
|
||||
# let previous runs wait
|
||||
time.sleep(1)
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'890.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
client.get(url_for("mark_all_viewed"))
|
||||
|
||||
# price changed to something MORE than max (1100.10), SHOULD be a change
|
||||
set_original_response(props_markup=instock_props[0], price='1890.45')
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'1,890.45' or b'1890.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
|
||||
def test_itemprop_percent_threshold(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
set_original_response(props_markup=instock_props[0], price="950.95")
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# A change in price, should trigger a change by default
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"follow_price_changes": "y",
|
||||
"price_change_threshold_percent": 5.0,
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
"headers": "",
|
||||
'fetch_backend': "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Basic change should not trigger
|
||||
set_original_response(props_markup=instock_props[0], price='960.45')
|
||||
client.get(url_for("form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'960.45' in res.data
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# Bigger INCREASE change than the threshold should trigger
|
||||
set_original_response(props_markup=instock_props[0], price='1960.45')
|
||||
client.get(url_for("form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'1,960.45' or b'1960.45' in res.data #depending on locale
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
|
||||
# Small decrease should NOT trigger
|
||||
client.get(url_for("mark_all_viewed"))
|
||||
set_original_response(props_markup=instock_props[0], price='1950.45')
|
||||
client.get(url_for("form_watch_checknow"))
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'1,950.45' or b'1950.45' in res.data #depending on locale
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_data_sanity(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
test_url2 = url_for('test_endpoint2', _external=True)
|
||||
set_original_response(props_markup=instock_props[0], price="950.95")
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'950.95' in res.data
|
||||
|
||||
# Check the restock model object doesnt store the value by mistake and used in a new one
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert str(res.data.decode()).count("950.95") == 1, "Price should only show once (for the watch added, no other watches yet)"
|
||||
|
||||
## different test, check the edit page works on an empty request result
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"))
|
||||
assert test_url2.encode('utf-8') in res.data
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
# run from dir above changedetectionio/ dir
|
||||
# python3 -m unittest changedetectionio.tests.unit.test_restock_logic
|
||||
|
||||
import unittest
|
||||
import os
|
||||
|
||||
from changedetectionio.processors import restock_diff
|
||||
|
||||
# mostly
|
||||
class TestDiffBuilder(unittest.TestCase):
|
||||
|
||||
def test_logic(self):
|
||||
assert restock_diff.is_between(number=10, lower=9, upper=11) == True, "Between 9 and 11"
|
||||
assert restock_diff.is_between(number=10, lower=0, upper=11) == True, "Between 9 and 11"
|
||||
assert restock_diff.is_between(number=10, lower=None, upper=11) == True, "Between None and 11"
|
||||
assert not restock_diff.is_between(number=12, lower=None, upper=11) == True, "12 is not between None and 11"
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -121,18 +121,21 @@ def extract_UUID_from_client(client):
|
|||
return uuid.strip()
|
||||
|
||||
def wait_for_all_checks(client):
|
||||
# actually this is not entirely true, it can still be 'processing' but not in the queue
|
||||
# Loop waiting until done..
|
||||
attempt=0
|
||||
time.sleep(0.1)
|
||||
# because sub-second rechecks are problematic in testing, use lots of delays
|
||||
time.sleep(1)
|
||||
while attempt < 60:
|
||||
time.sleep(1)
|
||||
res = client.get(url_for("index"))
|
||||
if not b'Checking now' in res.data:
|
||||
break
|
||||
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
|
||||
|
||||
time.sleep(1)
|
||||
attempt += 1
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
def live_server_setup(live_server):
|
||||
|
||||
@live_server.app.route('/test-random-content-endpoint')
|
||||
|
|
@ -140,6 +143,9 @@ def live_server_setup(live_server):
|
|||
import secrets
|
||||
return "Random content - {}\n".format(secrets.token_hex(64))
|
||||
|
||||
@live_server.app.route('/test-endpoint2')
|
||||
def test_endpoint2():
|
||||
return "<html><body>some basic content</body></html>"
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
def test_endpoint():
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
from .processors.exceptions import ProcessorException
|
||||
from . import content_fetchers
|
||||
from .processors.restock_diff import UnableToExtractRestockData
|
||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from changedetectionio import html_tools
|
||||
from copy import deepcopy
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import queue
|
||||
import threading
|
||||
|
|
@ -13,7 +15,6 @@ import time
|
|||
# Requests for checking on a single site(watch) from a queue of watches
|
||||
# (another process inserts watches into the queue that are time-ready for checking)
|
||||
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
class update_worker(threading.Thread):
|
||||
|
|
@ -27,7 +28,6 @@ class update_worker(threading.Thread):
|
|||
super().__init__(*args, **kwargs)
|
||||
|
||||
def queue_notification_for_watch(self, notification_q, n_object, watch):
|
||||
|
||||
from changedetectionio import diff
|
||||
dates = []
|
||||
trigger_text = ''
|
||||
|
|
@ -226,8 +226,6 @@ class update_worker(threading.Thread):
|
|||
os.unlink(full_path)
|
||||
|
||||
def run(self):
|
||||
|
||||
from .processors import text_json_diff, restock_diff
|
||||
now = time.time()
|
||||
|
||||
while not self.app.config.exit.is_set():
|
||||
|
|
@ -258,24 +256,21 @@ class update_worker(threading.Thread):
|
|||
try:
|
||||
# Processor is what we are using for detecting the "Change"
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
# if system...
|
||||
|
||||
# Abort processing when the content was the same as the last fetch
|
||||
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
|
||||
|
||||
|
||||
# @todo some way to switch by name
|
||||
# Init a new 'difference_detection_processor'
|
||||
# Init a new 'difference_detection_processor', first look in processors
|
||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
||||
try:
|
||||
processor_module = importlib.import_module(processor_module_name)
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
|
||||
if processor == 'restock_diff':
|
||||
update_handler = restock_diff.perform_site_check(datastore=self.datastore,
|
||||
update_handler = processor_module.perform_site_check(datastore=self.datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
else:
|
||||
# Used as a default and also by some tests
|
||||
update_handler = text_json_diff.perform_site_check(datastore=self.datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
update_handler.call_browser()
|
||||
|
||||
|
|
@ -293,6 +288,16 @@ class update_worker(threading.Thread):
|
|||
logger.critical(f"File permission error updating file, watch: {uuid}")
|
||||
logger.critical(str(e))
|
||||
process_changedetection_results = False
|
||||
|
||||
# A generic other-exception thrown by processors
|
||||
except ProcessorException as e:
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers.exceptions.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
|
|
@ -466,12 +471,6 @@ class update_worker(threading.Thread):
|
|||
process_changedetection_results = False
|
||||
logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
|
||||
|
||||
except UnableToExtractRestockData as e:
|
||||
# Usually when fetcher.instock_data returns empty
|
||||
logger.error(f"Exception (UnableToExtractRestockData) reached processing watch UUID: {uuid}")
|
||||
logger.error(str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Unable to extract restock data for this page unfortunately. (Got code {e.status_code} from server)"})
|
||||
process_changedetection_results = False
|
||||
except Exception as e:
|
||||
logger.error(f"Exception reached processing watch UUID: {uuid}")
|
||||
logger.error(str(e))
|
||||
|
|
|
|||
|
|
@ -82,5 +82,12 @@ pytest-flask ~=1.2
|
|||
jsonschema==4.17.3
|
||||
|
||||
loguru
|
||||
|
||||
# For scraping all possible metadata relating to products so we can do better restock detection
|
||||
extruct
|
||||
|
||||
# For cleaning up unknown currency formats
|
||||
babel
|
||||
|
||||
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
|
||||
greenlet >= 3.0.3
|
||||
|
|
|
|||
Ładowanie…
Reference in New Issue