diff --git a/activities/models/post.py b/activities/models/post.py index 7b7e3e9..df85277 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -12,9 +12,11 @@ from asgiref.sync import async_to_sync from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.search import SearchVector from django.db import models, transaction +from django.db.utils import IntegrityError from django.template import loader from django.template.defaultfilters import linebreaks_filter from django.utils import timezone +from pyld.jsonld import JsonLdError from activities.models.emoji import Emoji from activities.models.fan_out import FanOut @@ -842,6 +844,9 @@ class Post(StatorModel): if author.domain is None: if fetch_author: async_to_sync(author.fetch_actor)() + # perhaps the entire "try again" logic below + # could be replaced with TryAgainLater for + # _all_ fetches, to let it handle pinned posts? if author.domain is None: raise TryAgainLater() else: @@ -849,22 +854,46 @@ class Post(StatorModel): # If the post is from a blocked domain, stop and drop if author.domain.blocked: raise cls.DoesNotExist("Post is from a blocked domain") - post = cls.objects.create( - object_uri=data["id"], - author=author, - content="", - local=False, - type=data["type"], - ) - created = True + try: + # try again, because fetch_actor() also fetches pinned posts + post = cls.objects.select_related("author__domain").get( + object_uri=data["id"] + ) + except cls.DoesNotExist: + # finally, create a stub + try: + post = cls.objects.create( + object_uri=data["id"], + author=author, + content="", + local=False, + type=data["type"], + ) + created = True + except IntegrityError as dupe: + # there's still some kind of race condition here + # it's far more rare, but sometimes we fire an + # IntegrityError on activities_post_object_uri_key + # this transaction is now aborted and anything following + # in the caller function will fail in the database. + raise TryAgainLater() from dupe else: raise cls.DoesNotExist(f"No post with ID {data['id']}", data) if update or created: post.type = data["type"] if post.type in (cls.Types.article, cls.Types.question): post.type_data = PostTypeData(__root__=data).__root__ - post.content = get_value_or_map(data, "content", "contentMap") - post.summary = data.get("summary") + try: + # apparently sometimes posts (Pages?) in the fediverse + # don't have content?! + post.content = get_value_or_map(data, "content", "contentMap") + except KeyError: + post.content = None + # Document types have names, not summaries + post.summary = data.get("summary") or data.get("name") + if not post.content and post.summary: + post.content = post.summary + post.summary = None post.sensitive = data.get("sensitive", False) post.url = data.get("url", data["id"]) post.published = parse_ld_date(data.get("published")) @@ -878,10 +907,13 @@ class Post(StatorModel): mention_identity = Identity.by_actor_uri(tag["href"], create=True) post.mentions.add(mention_identity) elif tag_type in ["_:hashtag", "hashtag"]: + # kbin produces tags with 'tag' instead of 'name' + if "tag" in tag and "name" not in tag: + name = get_value_or_map(tag, "tag", "tagMap") + else: + name = get_value_or_map(tag, "name", "nameMap") post.hashtags.append( - get_value_or_map(tag, "name", "nameMap") - .lower() - .lstrip("#")[: Hashtag.MAXIMUM_LENGTH] + name.lower().lstrip("#")[: Hashtag.MAXIMUM_LENGTH] ) elif tag_type in ["toot:emoji", "emoji"]: emoji = Emoji.by_ap_tag(post.author.domain, tag, create=True) @@ -907,6 +939,10 @@ class Post(StatorModel): # These have no IDs, so we have to wipe them each time post.attachments.all().delete() for attachment in get_list(data, "attachment"): + if "url" not in attachment.keys(): + # sometimes attachments don't have URLs. Skip them. + print(f"no URL for {attachment} in {post}") + continue if "focalPoint" in attachment: try: focal_x, focal_y = attachment["focalPoint"] @@ -982,8 +1018,10 @@ class Post(StatorModel): update=True, fetch_author=True, ) - except (json.JSONDecodeError, ValueError): - raise cls.DoesNotExist(f"Invalid ld+json response for {object_uri}") + except (json.JSONDecodeError, ValueError, JsonLdError) as err: + raise cls.DoesNotExist( + f"Invalid ld+json response for {object_uri}" + ) from err # We may need to fetch the author too if post.author.state == IdentityStates.outdated: async_to_sync(post.author.fetch_actor)() diff --git a/activities/models/post_interaction.py b/activities/models/post_interaction.py index 90d00ec..b3ba3a6 100644 --- a/activities/models/post_interaction.py +++ b/activities/models/post_interaction.py @@ -471,8 +471,9 @@ class PostInteraction(StatorModel): # TODO: Limited retry state? return - interaction.post.calculate_stats() - interaction.post.calculate_type_data() + if interaction and interaction.post: + interaction.post.calculate_stats() + interaction.post.calculate_type_data() @classmethod def handle_undo_ap(cls, data): diff --git a/users/services/identity.py b/users/services/identity.py index 71b0a4f..66c45f2 100644 --- a/users/services/identity.py +++ b/users/services/identity.py @@ -4,6 +4,7 @@ from django.template.defaultfilters import linebreaks_filter from activities.models import FanOut, Post, PostInteraction, PostInteractionStates from core.files import resize_image from core.html import FediverseHtmlParser +from stator.exceptions import TryAgainLater from users.models import ( Block, BlockStates, @@ -201,6 +202,10 @@ class IdentityService: except Post.DoesNotExist: # ignore 404s... pass + except TryAgainLater: + # when fetching a post -> author -> post we can + # get into a state. Ignore this round. + pass for removed in PostInteraction.objects.filter( type=PostInteraction.Types.pin, identity=self.identity,