UI Edit/Stats - Add levenshtein distance info, explains how "different" the last two snapshot are (#3109)

pull/3113/head
dgtlmoon 2025-04-11 17:36:29 +02:00 zatwierdzone przez GitHub
rodzic 8067d5170b
commit cfb4decf67
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
4 zmienionych plików z 28 dodań i 2 usunięć

Wyświetl plik

@ -19,6 +19,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
return True
def levenshtein_ratio_recent_history(watch):
try:
from Levenshtein import ratio, distance
k = list(watch.history.keys())
if len(k) >= 2:
a = watch.get_history_snapshot(timestamp=k[0])
b = watch.get_history_snapshot(timestamp=k[1])
distance = distance(a, b)
return distance
except Exception as e:
logger.warning("Unable to calc similarity", e)
return "Unable to calc similarity"
return ''
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
@ -247,14 +261,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
'watch_uses_webdriver': watch_uses_webdriver,
'jq_support': jq_support,
'lev_info': levenshtein_ratio_recent_history(watch),
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
'settings_application': datastore.data['settings']['application'],
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid,
'watch': watch
'watch': watch,
'watch_uses_webdriver': watch_uses_webdriver,
}
included_content = None

Wyświetl plik

@ -443,6 +443,10 @@ Math: {{ 1 + 1 }}") }}
</tr>
</tbody>
</table>
<h4>Text similarity</h4>
<p><strong>Levenshtein Distance</strong> - Last 2 snapshots: {{ lev_info }}</p>
<p style="max-width: 80%; font-size: 80%"><strong>Levenshtein Distance</strong> Calculates the minimum number of insertions, deletions, and substitutions required to change one text into the other.</p>
{% if watch.history_n %}
<p>
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>

Wyświetl plik

@ -74,6 +74,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid))
assert b'which has this one new line' in res.data
# Check the 'levenshtein' distance calc showed something useful
res = client.get(url_for("ui.ui_edit.edit_page", uuid=uuid))
assert b'Last 2 snapshots: 17' in res.data
# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("watchlist.index"))
assert b'unviewed' in res.data

Wyświetl plik

@ -68,6 +68,8 @@ openpyxl
jq~=1.3; python_version >= "3.8" and sys_platform == "darwin"
jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
levenshtein
# playwright is installed at Dockerfile build time because it's not available on all platforms
pyppeteer-ng==2.0.0rc9