From 992cd55d7b2811e045e90f22600141a1e6b6377c Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Mon, 26 Sep 2022 21:41:34 +0200 Subject: [PATCH] Tweak processing --- app/activitypub.py | 29 ++++++++++++++++++++++------- app/actor.py | 2 +- app/boxes.py | 14 +++++++------- app/utils/opengraph.py | 8 +++++++- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/app/activitypub.py b/app/activitypub.py index e3ddac9..febab1c 100644 --- a/app/activitypub.py +++ b/app/activitypub.py @@ -53,15 +53,26 @@ AS_EXTENDED_CTX = [ ] -class ObjectIsGoneError(Exception): +class FetchError(Exception): + def __init__(self, url: str, resp: httpx.Response | None = None) -> None: + resp_part = "" + if resp: + resp_part = f", got HTTP {resp.status_code}: {resp.text}" + message = f"Failed to fetch {url}{resp_part}" + super().__init__(message) + self.resp = resp + self.url = url + + +class ObjectIsGoneError(FetchError): pass -class ObjectNotFoundError(Exception): +class ObjectNotFoundError(FetchError): pass -class ObjectUnavailableError(Exception): +class ObjectUnavailableError(FetchError): pass @@ -170,13 +181,17 @@ async def fetch( # Special handling for deleted object if resp.status_code == 410: - raise ObjectIsGoneError(f"{url} is gone") + raise ObjectIsGoneError(url, resp) elif resp.status_code in [401, 403]: - raise ObjectUnavailableError(f"not allowed to fetch {url}") + raise ObjectUnavailableError(url, resp) elif resp.status_code == 404: - raise ObjectNotFoundError(f"{url} not found") + raise ObjectNotFoundError(url, resp) + + try: + resp.raise_for_status() + except httpx.HTTPError as http_error: + raise FetchError(url, resp) from http_error - resp.raise_for_status() try: return resp.json() except json.JSONDecodeError: diff --git a/app/actor.py b/app/actor.py index 7d5d3ea..d0b1c52 100644 --- a/app/actor.py +++ b/app/actor.py @@ -208,7 +208,7 @@ async def fetch_actor( return await save_actor(db_session, ap_actor) else: - raise ap.ObjectNotFoundError + raise ap.ObjectNotFoundError(actor_id) @dataclass diff --git a/app/boxes.py b/app/boxes.py index d22c411..0ab42af 100644 --- a/app/boxes.py +++ b/app/boxes.py @@ -371,10 +371,8 @@ async def fetch_conversation_root( ) in_reply_to_object = RemoteObject(raw_reply, actor=raw_reply_actor) except ( - ap.ObjectNotFoundError, - ap.ObjectIsGoneError, + ap.FetchError, ap.NotAnObjectError, - ap.ObjectUnavailableError, ): return await fetch_conversation_root(db_session, obj, is_root=True) except httpx.HTTPStatusError as http_status_error: @@ -1529,9 +1527,11 @@ async def _handle_create_activity( logger.info("Processing Create activity") # Some PeerTube activities make no sense to process - if (ap_object_type := ap.as_list(create_activity.ap_object["type"])[0]) in [ - "CacheFile" - ]: + if ( + ap_object_type := ap.as_list( + (await ap.get_object(create_activity.ap_object))["type"] + )[0] + ) in ["CacheFile"]: logger.info(f"Dropping Create activity for {ap_object_type} object") await db_session.delete(create_activity) return None @@ -1981,7 +1981,7 @@ async def save_to_inbox( except ap.ObjectNotFoundError: logger.warning("Actor not found") return - except httpx.HTTPStatusError: + except ap.FetchError: logger.exception("Failed to fetch actor") return diff --git a/app/utils/opengraph.py b/app/utils/opengraph.py index 02fdf87..2557944 100644 --- a/app/utils/opengraph.py +++ b/app/utils/opengraph.py @@ -9,6 +9,7 @@ from bs4 import BeautifulSoup # type: ignore from loguru import logger from pydantic import BaseModel +from app import activitypub as ap from app import ap_object from app import config from app.actor import LOCAL_ACTOR @@ -69,7 +70,12 @@ async def external_urls( tags_hrefs.add(tag_href) if tag.get("type") == "Mention": if tag["href"] != LOCAL_ACTOR.ap_id: - mentioned_actor = await fetch_actor(db_session, tag["href"]) + try: + mentioned_actor = await fetch_actor(db_session, tag["href"]) + except (ap.FetchError, ap.NotAnObjectError): + tags_hrefs.add(tag["href"]) + continue + tags_hrefs.add(mentioned_actor.url) tags_hrefs.add(mentioned_actor.ap_id) else: