From 06b0c2f413c243c7539c7d9fb5ab7af2ed2376a4 Mon Sep 17 00:00:00 2001 From: Sage Abdullah <sage.abdullah@torchbox.com> Date: Mon, 31 Jul 2023 11:44:37 +0100 Subject: [PATCH] Guard against TypeError in 0088_fix_log_entry_json_timestamps migration Old logs may have the data set to JSON `null` instead of an empty JSON object `{}`. See https://github.com/wagtail/wagtail/pull/8021#issuecomment-1658082683 and https://github.com/wagtail/wagtail/pull/9590#discussion_r1279096075 for more details. --- CHANGELOG.txt | 1 + docs/releases/5.1.md | 1 + .../0088_fix_log_entry_json_timestamps.py | 47 ++++++++++++------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 0643b66b8b..afbc5ec411 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -66,6 +66,7 @@ Changelog * Fix: Ensure userbar dialog can sit above other website content (LB (Ben) Johnston) * Fix: Fix preview panel loading issues (Sage Abdullah) * Fix: Fix `search_promotions` `0004_copy_queries` migration for long-lived Wagtail instances (Sage Abdullah) + * Fix: Guard against `TypeError` in `0088_fix_log_entry_json_timestamps` migration (Sage Abdullah) * Docs: Document how to add non-ModelAdmin views to a `ModelAdminGroup` (Onno Timmerman) * Docs: Document how to add StructBlock data to a StreamField (Ramon Wenger) * Docs: Update ReadTheDocs settings to v2 to resolve urllib3 issue in linkcheck extension (Thibaud Colas) diff --git a/docs/releases/5.1.md b/docs/releases/5.1.md index 3b19a00ef1..0803c49001 100644 --- a/docs/releases/5.1.md +++ b/docs/releases/5.1.md @@ -123,6 +123,7 @@ This feature was developed by Aman Pandey as part of the Google Summer of Code p * Ensure userbar dialog can sit above other website content (LB (Ben) Johnston) * Fix preview panel loading issues (Sage Abdullah) * Fix `search_promotions` `0004_copy_queries` migration for long-lived Wagtail instances (Sage Abdullah) + * Guard against `TypeError` in `0088_fix_log_entry_json_timestamps` migration (Sage Abdullah) ### Documentation diff --git a/wagtail/migrations/0088_fix_log_entry_json_timestamps.py b/wagtail/migrations/0088_fix_log_entry_json_timestamps.py index db0c79feb7..f6308277bd 100644 --- a/wagtail/migrations/0088_fix_log_entry_json_timestamps.py +++ b/wagtail/migrations/0088_fix_log_entry_json_timestamps.py @@ -54,7 +54,14 @@ def migrate_logs_with_created_only(model, converter): converter.__name__, ) continue - except KeyError: + except (KeyError, TypeError): + # Empty `data` in logs created since Wagtail 3.0 normalises to the + # empty JSON object `{}`, so accessing unavailable data will raise + # a `KeyError`. + # However, old logs (e.g. "wagtail.publish" ones) may store the + # `data` as a JSON `null` instead of an empty JSON object `{}` in + # the database, which is deserialized to `None` by Python. So, + # it raises a `TypeError` instead of a `KeyError`. continue else: item.save(update_fields=["data"]) @@ -68,11 +75,9 @@ def migrate_schedule_logs(model, converter): .only("data") .iterator() ): - created = item.data["revision"]["created"] - # May be unset for "wagtail.schedule.cancel"-logs. - go_live_at = item.data["revision"].get("go_live_at") changed = False try: + created = item.data["revision"]["created"] # "created" is set to timezone.now() for new revisions ("wagtail.publish.schedule") # and to self.created_at for "wagtail.schedule.cancel", which is set to UTC # by django. @@ -86,25 +91,31 @@ def migrate_schedule_logs(model, converter): item.pk, converter.__name__, ) + except (KeyError, TypeError): + pass - if go_live_at: - # The go_live_at date is set to the revision object's "go_live_at". - # The revision's object is created by deserializing the json data (see wagtail.models.Revision.as_object()), - # and this process converts all datetime objects to the local timestamp (see https://github.com/wagtail/django-modelcluster/blob/8666f16eaf23ca98afc160b0a4729864411c0563/modelcluster/models.py#L109-L115). - # That's the reason, why this date is the only date, which is not stored in the log's JSON as UTC, but in the default timezone. - try: + # The go_live_at date is set to the revision object's "go_live_at". + # The revision's object is created by deserializing the json data (see wagtail.models.Revision.as_object()), + # and this process converts all datetime objects to the local timestamp (see https://github.com/wagtail/django-modelcluster/blob/8666f16eaf23ca98afc160b0a4729864411c0563/modelcluster/models.py#L109-L115). + # That's the reason, why this date is the only date, which is not stored in the log's JSON as UTC, but in the default timezone. + try: + # May be unset for "wagtail.schedule.cancel"-logs. + go_live_at = item.data["revision"].get("go_live_at") + if go_live_at: item.data["revision"]["go_live_at"] = converter( go_live_at, tz=timezone.get_default_timezone() ) changed = True - except ValueError: - logger.warning( - "Failed to migrate 'go_live_at' timestamp '%s' of %s %s (%s)", - go_live_at, - model.__name__, - item.pk, - converter.__name__, - ) + except ValueError: + logger.warning( + "Failed to migrate 'go_live_at' timestamp '%s' of %s %s (%s)", + go_live_at, + model.__name__, + item.pk, + converter.__name__, + ) + except (KeyError, TypeError): + pass if changed: item.save(update_fields=["data"])