kopia lustrzana https://github.com/dgtlmoon/changedetection.io
Remove same checksum skip check - saved a little CPU but added a lot of complexity (#2700)
rodzic
03151da68e
commit
5bb47e47db
|
@ -58,7 +58,7 @@ class Watch(Resource):
|
||||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||||
|
|
||||||
if request.args.get('recheck'):
|
if request.args.get('recheck'):
|
||||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
if request.args.get('paused', '') == 'paused':
|
if request.args.get('paused', '') == 'paused':
|
||||||
self.datastore.data['watching'].get(uuid).pause()
|
self.datastore.data['watching'].get(uuid).pause()
|
||||||
|
@ -246,7 +246,7 @@ class CreateWatch(Resource):
|
||||||
|
|
||||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||||
if new_uuid:
|
if new_uuid:
|
||||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||||
return {'uuid': new_uuid}, 201
|
return {'uuid': new_uuid}, 201
|
||||||
else:
|
else:
|
||||||
return "Invalid or unsupported URL", 400
|
return "Invalid or unsupported URL", 400
|
||||||
|
@ -303,7 +303,7 @@ class CreateWatch(Resource):
|
||||||
|
|
||||||
if request.args.get('recheck_all'):
|
if request.args.get('recheck_all'):
|
||||||
for uuid in self.datastore.data['watching'].keys():
|
for uuid in self.datastore.data['watching'].keys():
|
||||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return {'status': "OK"}, 200
|
return {'status': "OK"}, 200
|
||||||
|
|
||||||
return list, 200
|
return list, 200
|
||||||
|
|
|
@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||||
datastore.data['watching'][uuid].clear_watch()
|
datastore.data['watching'][uuid].clear_watch()
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return redirect(url_for("index"))
|
return redirect(url_for("index"))
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
|
|
|
@ -795,7 +795,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
datastore.needs_write_urgent = True
|
datastore.needs_write_urgent = True
|
||||||
|
|
||||||
# Queue the watch for immediate recheck, with a higher priority
|
# Queue the watch for immediate recheck, with a higher priority
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
# Diff page [edit] link should go back to diff page
|
# Diff page [edit] link should go back to diff page
|
||||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||||
|
@ -976,7 +976,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
importer = import_url_list()
|
importer = import_url_list()
|
||||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||||
for uuid in importer.new_uuids:
|
for uuid in importer.new_uuids:
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
if len(importer.remaining_data) == 0:
|
if len(importer.remaining_data) == 0:
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
@ -989,7 +989,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
d_importer = import_distill_io_json()
|
d_importer = import_distill_io_json()
|
||||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||||
for uuid in d_importer.new_uuids:
|
for uuid in d_importer.new_uuids:
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
# XLSX importer
|
# XLSX importer
|
||||||
if request.files and request.files.get('xlsx_file'):
|
if request.files and request.files.get('xlsx_file'):
|
||||||
|
@ -1013,7 +1013,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
w_importer.run(data=file, flash=flash, datastore=datastore)
|
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||||
|
|
||||||
for uuid in w_importer.new_uuids:
|
for uuid in w_importer.new_uuids:
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
# Could be some remaining, or we could be on GET
|
# Could be some remaining, or we could be on GET
|
||||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||||
|
@ -1442,7 +1442,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
new_uuid = datastore.clone(uuid)
|
new_uuid = datastore.clone(uuid)
|
||||||
if new_uuid:
|
if new_uuid:
|
||||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||||
flash('Cloned.')
|
flash('Cloned.')
|
||||||
|
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
@ -1463,7 +1463,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
|
|
||||||
if uuid:
|
if uuid:
|
||||||
if uuid not in running_uuids:
|
if uuid not in running_uuids:
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
i = 1
|
i = 1
|
||||||
|
|
||||||
elif tag:
|
elif tag:
|
||||||
|
@ -1474,7 +1474,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
continue
|
continue
|
||||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||||
update_q.put(
|
update_q.put(
|
||||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
|
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||||
)
|
)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
@ -1484,7 +1484,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||||
if with_errors and not watch.get('last_error'):
|
if with_errors and not watch.get('last_error'):
|
||||||
continue
|
continue
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||||
i += 1
|
i += 1
|
||||||
flash(f"{i} watches queued for rechecking.")
|
flash(f"{i} watches queued for rechecking.")
|
||||||
return redirect(url_for('index', tag=tag))
|
return redirect(url_for('index', tag=tag))
|
||||||
|
@ -1542,7 +1542,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||||
uuid = uuid.strip()
|
uuid = uuid.strip()
|
||||||
if datastore.data['watching'].get(uuid):
|
if datastore.data['watching'].get(uuid):
|
||||||
# Recheck and require a full reprocessing
|
# Recheck and require a full reprocessing
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
flash("{} watches queued for rechecking".format(len(uuids)))
|
flash("{} watches queued for rechecking".format(len(uuids)))
|
||||||
|
|
||||||
elif (op == 'clear-errors'):
|
elif (op == 'clear-errors'):
|
||||||
|
@ -1866,7 +1866,7 @@ def ticker_thread_check_time_launch_checks():
|
||||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||||
|
|
||||||
# Into the queue with you
|
# Into the queue with you
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
|
||||||
|
|
||||||
# Reset for next time
|
# Reset for next time
|
||||||
watch.jitter_seconds = 0
|
watch.jitter_seconds = 0
|
||||||
|
|
|
@ -157,7 +157,7 @@ class difference_detection_processor():
|
||||||
# After init, call run_changedetection() which will do the actual change-detection
|
# After init, call run_changedetection() which will do the actual change-detection
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
|
def run_changedetection(self, watch):
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
some_data = 'xxxxx'
|
some_data = 'xxxxx'
|
||||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||||
|
|
|
@ -144,7 +144,7 @@ class perform_site_check(difference_detection_processor):
|
||||||
screenshot = None
|
screenshot = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
|
|
||||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
def run_changedetection(self, watch):
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
if not watch:
|
if not watch:
|
||||||
|
|
|
@ -11,10 +11,7 @@ def _task(watch, update_handler):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# The slow process (we run 2 of these in parallel)
|
# The slow process (we run 2 of these in parallel)
|
||||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
|
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
|
||||||
watch=watch,
|
|
||||||
skip_when_checksum_same=False,
|
|
||||||
)
|
|
||||||
except FilterNotFoundInResponse as e:
|
except FilterNotFoundInResponse as e:
|
||||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||||
except ReplyWithContentButNoText as e:
|
except ReplyWithContentButNoText as e:
|
||||||
|
|
|
@ -35,7 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||||
# (set_proxy_from_list)
|
# (set_proxy_from_list)
|
||||||
class perform_site_check(difference_detection_processor):
|
class perform_site_check(difference_detection_processor):
|
||||||
|
|
||||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
def run_changedetection(self, watch):
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
html_content = ""
|
html_content = ""
|
||||||
screenshot = False # as bytes
|
screenshot = False # as bytes
|
||||||
|
@ -58,9 +58,6 @@ class perform_site_check(difference_detection_processor):
|
||||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||||
# Saves a lot of CPU
|
# Saves a lot of CPU
|
||||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||||
if skip_when_checksum_same:
|
|
||||||
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
|
|
||||||
raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()
|
|
||||||
|
|
||||||
# Fetching complete, now filters
|
# Fetching complete, now filters
|
||||||
|
|
||||||
|
|
|
@ -147,6 +147,7 @@ body.spinner-active {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
.tabs ul li a {
|
.tabs ul li a {
|
||||||
// .tab-pane-inner will have the #id that the tab button jumps/anchors to
|
// .tab-pane-inner will have the #id that the tab button jumps/anchors to
|
||||||
scroll-margin-top: 200px;
|
scroll-margin-top: 200px;
|
||||||
|
|
|
@ -605,6 +605,7 @@ body.spinner-active #pure-menu-horizontal-spinner {
|
||||||
background-color: var(--color-background-menu-link-hover);
|
background-color: var(--color-background-menu-link-hover);
|
||||||
color: var(--color-text-menu-link-hover); }
|
color: var(--color-text-menu-link-hover); }
|
||||||
|
|
||||||
|
|
||||||
.tabs ul li a {
|
.tabs ul li a {
|
||||||
scroll-margin-top: 200px; }
|
scroll-margin-top: 200px; }
|
||||||
|
|
||||||
|
|
|
@ -260,9 +260,6 @@ class update_worker(threading.Thread):
|
||||||
try:
|
try:
|
||||||
# Processor is what we are using for detecting the "Change"
|
# Processor is what we are using for detecting the "Change"
|
||||||
processor = watch.get('processor', 'text_json_diff')
|
processor = watch.get('processor', 'text_json_diff')
|
||||||
# Abort processing when the content was the same as the last fetch
|
|
||||||
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
|
|
||||||
|
|
||||||
|
|
||||||
# Init a new 'difference_detection_processor', first look in processors
|
# Init a new 'difference_detection_processor', first look in processors
|
||||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
||||||
|
@ -278,10 +275,7 @@ class update_worker(threading.Thread):
|
||||||
|
|
||||||
update_handler.call_browser()
|
update_handler.call_browser()
|
||||||
|
|
||||||
changed_detected, update_obj, contents = update_handler.run_changedetection(
|
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
|
||||||
watch=watch,
|
|
||||||
skip_when_checksum_same=skip_when_same_checksum,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Re #342
|
# Re #342
|
||||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||||
|
|
Ładowanie…
Reference in New Issue