From 06b0c2f413c243c7539c7d9fb5ab7af2ed2376a4 Mon Sep 17 00:00:00 2001
From: Sage Abdullah <sage.abdullah@torchbox.com>
Date: Mon, 31 Jul 2023 11:44:37 +0100
Subject: [PATCH] Guard against TypeError in 0088_fix_log_entry_json_timestamps
 migration

Old logs may have the data set to JSON `null` instead of an empty JSON
object `{}`.

See https://github.com/wagtail/wagtail/pull/8021#issuecomment-1658082683
and https://github.com/wagtail/wagtail/pull/9590#discussion_r1279096075
for more details.
---
 CHANGELOG.txt                                 |  1 +
 docs/releases/5.1.md                          |  1 +
 .../0088_fix_log_entry_json_timestamps.py     | 47 ++++++++++++-------
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 0643b66b8b..afbc5ec411 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -66,6 +66,7 @@ Changelog
  * Fix: Ensure userbar dialog can sit above other website content (LB (Ben) Johnston)
  * Fix: Fix preview panel loading issues (Sage Abdullah)
  * Fix: Fix `search_promotions` `0004_copy_queries` migration for long-lived Wagtail instances (Sage Abdullah)
+ * Fix: Guard against `TypeError` in `0088_fix_log_entry_json_timestamps` migration (Sage Abdullah)
  * Docs: Document how to add non-ModelAdmin views to a `ModelAdminGroup` (Onno Timmerman)
  * Docs: Document how to add StructBlock data to a StreamField (Ramon Wenger)
  * Docs: Update ReadTheDocs settings to v2 to resolve urllib3 issue in linkcheck extension (Thibaud Colas)
diff --git a/docs/releases/5.1.md b/docs/releases/5.1.md
index 3b19a00ef1..0803c49001 100644
--- a/docs/releases/5.1.md
+++ b/docs/releases/5.1.md
@@ -123,6 +123,7 @@ This feature was developed by Aman Pandey as part of the Google Summer of Code p
  * Ensure userbar dialog can sit above other website content (LB (Ben) Johnston)
  * Fix preview panel loading issues (Sage Abdullah)
  * Fix `search_promotions` `0004_copy_queries` migration for long-lived Wagtail instances (Sage Abdullah)
+ * Guard against `TypeError` in `0088_fix_log_entry_json_timestamps` migration (Sage Abdullah)
 
 ### Documentation
 
diff --git a/wagtail/migrations/0088_fix_log_entry_json_timestamps.py b/wagtail/migrations/0088_fix_log_entry_json_timestamps.py
index db0c79feb7..f6308277bd 100644
--- a/wagtail/migrations/0088_fix_log_entry_json_timestamps.py
+++ b/wagtail/migrations/0088_fix_log_entry_json_timestamps.py
@@ -54,7 +54,14 @@ def migrate_logs_with_created_only(model, converter):
                 converter.__name__,
             )
             continue
-        except KeyError:
+        except (KeyError, TypeError):
+            # Empty `data` in logs created since Wagtail 3.0 normalises to the
+            # empty JSON object `{}`, so accessing unavailable data will raise
+            # a `KeyError`.
+            # However, old logs (e.g. "wagtail.publish" ones) may store the
+            # `data` as a JSON `null` instead of an empty JSON object `{}` in
+            # the database, which is deserialized to `None` by Python. So,
+            # it raises a `TypeError` instead of a `KeyError`.
             continue
         else:
             item.save(update_fields=["data"])
@@ -68,11 +75,9 @@ def migrate_schedule_logs(model, converter):
         .only("data")
         .iterator()
     ):
-        created = item.data["revision"]["created"]
-        # May be unset for "wagtail.schedule.cancel"-logs.
-        go_live_at = item.data["revision"].get("go_live_at")
         changed = False
         try:
+            created = item.data["revision"]["created"]
             # "created" is set to timezone.now() for new revisions ("wagtail.publish.schedule")
             # and to self.created_at for "wagtail.schedule.cancel", which is set to UTC
             # by django.
@@ -86,25 +91,31 @@ def migrate_schedule_logs(model, converter):
                 item.pk,
                 converter.__name__,
             )
+        except (KeyError, TypeError):
+            pass
 
-        if go_live_at:
-            # The go_live_at date is set to the revision object's "go_live_at".
-            # The revision's object is created by deserializing the json data (see wagtail.models.Revision.as_object()),
-            # and this process converts all datetime objects to the local timestamp (see https://github.com/wagtail/django-modelcluster/blob/8666f16eaf23ca98afc160b0a4729864411c0563/modelcluster/models.py#L109-L115).
-            # That's the reason, why this date is the only date, which is not stored in the log's JSON as UTC, but in the default timezone.
-            try:
+        # The go_live_at date is set to the revision object's "go_live_at".
+        # The revision's object is created by deserializing the json data (see wagtail.models.Revision.as_object()),
+        # and this process converts all datetime objects to the local timestamp (see https://github.com/wagtail/django-modelcluster/blob/8666f16eaf23ca98afc160b0a4729864411c0563/modelcluster/models.py#L109-L115).
+        # That's the reason, why this date is the only date, which is not stored in the log's JSON as UTC, but in the default timezone.
+        try:
+            # May be unset for "wagtail.schedule.cancel"-logs.
+            go_live_at = item.data["revision"].get("go_live_at")
+            if go_live_at:
                 item.data["revision"]["go_live_at"] = converter(
                     go_live_at, tz=timezone.get_default_timezone()
                 )
                 changed = True
-            except ValueError:
-                logger.warning(
-                    "Failed to migrate 'go_live_at' timestamp '%s' of %s %s (%s)",
-                    go_live_at,
-                    model.__name__,
-                    item.pk,
-                    converter.__name__,
-                )
+        except ValueError:
+            logger.warning(
+                "Failed to migrate 'go_live_at' timestamp '%s' of %s %s (%s)",
+                go_live_at,
+                model.__name__,
+                item.pk,
+                converter.__name__,
+            )
+        except (KeyError, TypeError):
+            pass
 
         if changed:
             item.save(update_fields=["data"])