kopia lustrzana https://github.com/dgtlmoon/changedetection.io
UI Edit/Stats - Add levenshtein distance info, explains how "different" the last two snapshot are (#3109)
rodzic
8067d5170b
commit
cfb4decf67
|
@ -19,6 +19,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
||||
return True
|
||||
|
||||
def levenshtein_ratio_recent_history(watch):
|
||||
try:
|
||||
from Levenshtein import ratio, distance
|
||||
k = list(watch.history.keys())
|
||||
if len(k) >= 2:
|
||||
a = watch.get_history_snapshot(timestamp=k[0])
|
||||
b = watch.get_history_snapshot(timestamp=k[1])
|
||||
distance = distance(a, b)
|
||||
return distance
|
||||
except Exception as e:
|
||||
logger.warning("Unable to calc similarity", e)
|
||||
return "Unable to calc similarity"
|
||||
return ''
|
||||
|
||||
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||
|
@ -247,14 +261,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||
'watch_uses_webdriver': watch_uses_webdriver,
|
||||
'jq_support': jq_support,
|
||||
'lev_info': levenshtein_ratio_recent_history(watch),
|
||||
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
'settings_application': datastore.data['settings']['application'],
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch
|
||||
'watch': watch,
|
||||
'watch_uses_webdriver': watch_uses_webdriver,
|
||||
}
|
||||
|
||||
included_content = None
|
||||
|
|
|
@ -443,6 +443,10 @@ Math: {{ 1 + 1 }}") }}
|
|||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<h4>Text similarity</h4>
|
||||
<p><strong>Levenshtein Distance</strong> - Last 2 snapshots: {{ lev_info }}</p>
|
||||
<p style="max-width: 80%; font-size: 80%"><strong>Levenshtein Distance</strong> Calculates the minimum number of insertions, deletions, and substitutions required to change one text into the other.</p>
|
||||
{% if watch.history_n %}
|
||||
<p>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>
|
||||
|
|
|
@ -74,6 +74,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||
res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid))
|
||||
assert b'which has this one new line' in res.data
|
||||
|
||||
# Check the 'levenshtein' distance calc showed something useful
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=uuid))
|
||||
assert b'Last 2 snapshots: 17' in res.data
|
||||
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
|
|
@ -68,6 +68,8 @@ openpyxl
|
|||
jq~=1.3; python_version >= "3.8" and sys_platform == "darwin"
|
||||
jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
|
||||
|
||||
levenshtein
|
||||
|
||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||
|
||||
pyppeteer-ng==2.0.0rc9
|
||||
|
|
Ładowanie…
Reference in New Issue