diff --git a/little_boxes/activitypub.py b/little_boxes/activitypub.py index 744fce2..ec79c18 100644 --- a/little_boxes/activitypub.py +++ b/little_boxes/activitypub.py @@ -12,6 +12,8 @@ from typing import Type from typing import Union from .backend import Backend +from .errors import ActivityNotFoundError +from .errors import ActivityUnavailableError from .errors import BadActivityError from .errors import DropActivityPreProcessError from .errors import Error @@ -555,6 +557,10 @@ class BaseActivity(object, metaclass=_ActivityMeta): except RemoteActivityGoneError: logger.info(f"{recipient} is gone") continue + except ActivityUnavailableError: + # TODO(tsileo): retry separately? + logger.info(f"failed {recipient} to fetch recipient") + continue if actor.ACTIVITY_TYPE in ACTOR_TYPES: if actor.endpoints: @@ -575,13 +581,12 @@ class BaseActivity(object, metaclass=_ActivityMeta): continue try: - col_actor = fetch_remote_activity( - item, expected=ActivityType.PERSON - ) - except UnexpectedActivityTypeError: - logger.exception(f"failed to fetch actor {item!r}") + col_actor = fetch_remote_activity(item) + except ActivityUnavailableError: + # TODO(tsileo): retry separately? + logger.info(f"failed {recipient} to fetch recipient") continue - except RemoteActivityGoneError: + except (RemoteActivityGoneError, ActivityNotFoundError): logger.info(f"{item} is gone") continue diff --git a/little_boxes/backend.py b/little_boxes/backend.py index 94f6fef..378e0d1 100644 --- a/little_boxes/backend.py +++ b/little_boxes/backend.py @@ -1,5 +1,6 @@ import abc import binascii +import json import os import typing from typing import Any @@ -12,7 +13,9 @@ import requests from .__version__ import __version__ from .collection import parse_collection from .errors import ActivityNotFoundError +from .errors import ActivityUnavailableError from .errors import RemoteActivityGoneError +from .urlutils import URLLookupFailedError from .urlutils import check_url as check_url if typing.TYPE_CHECKING: @@ -70,23 +73,45 @@ class Backend(abc.ABC): pass # pragma: no cover def fetch_iri(self, iri: str, **kwargs) -> "ap.ObjectType": # pragma: no cover - self.check_url(iri) - resp = requests.get( - iri, - headers={ - "User-Agent": self.user_agent(), - "Accept": "application/activity+json", - }, - **kwargs, - ) + try: + self.check_url(iri) + except URLLookupFailedError: + raise ActivityUnavailableError(f"unable to fetch {iri}, url lookup failed") + + try: + resp = requests.get( + iri, + headers={ + "User-Agent": self.user_agent(), + "Accept": "application/activity+json", + }, + timeout=15, + **kwargs, + ) + except ( + requests.exceptions.ConnectTimeout, + requests.exceptions.ReadTimeout, + requests.exceptions.ConnectionError, + ): + raise ActivityUnavailableError(f"unable to fetch {iri}, connection error") if resp.status_code == 404: raise ActivityNotFoundError(f"{iri} is not found") elif resp.status_code == 410: raise RemoteActivityGoneError(f"{iri} is gone") + elif resp.status_code in [500, 502, 503]: + raise ActivityUnavailableError( + f"unable to fetch {iri}, server error ({resp.status_code})" + ) resp.raise_for_status() - return resp.json() + try: + out = resp.json() + except json.JSONDecodeError: + # TODO(tsileo): a special error type? + raise ActivityUnavailableError(f"{iri} is not JSON") + + return out @abc.abstractmethod def inbox_check_duplicate(self, as_actor: "ap.Person", iri: str) -> bool: diff --git a/little_boxes/errors.py b/little_boxes/errors.py index 4c1cabc..45377cd 100644 --- a/little_boxes/errors.py +++ b/little_boxes/errors.py @@ -75,3 +75,9 @@ class RecursionLimitExceededError(BadActivityError): class UnexpectedActivityTypeError(BadActivityError): """Raised when an another activty was expected.""" + + +class ActivityUnavailableError(ServerError): + """Raises when fetching a remote activity times out.""" + + status_code = 503 diff --git a/little_boxes/urlutils.py b/little_boxes/urlutils.py index 59ba1ab..391b63e 100644 --- a/little_boxes/urlutils.py +++ b/little_boxes/urlutils.py @@ -4,6 +4,7 @@ import socket from typing import Dict from urllib.parse import urlparse +from .errors import Error from .errors import ServerError logger = logging.getLogger(__name__) @@ -16,6 +17,10 @@ class InvalidURLError(ServerError): pass +class URLLookupFailedError(Error): + pass + + def is_url_valid(url: str, debug: bool = False) -> bool: parsed = urlparse(url) if parsed.scheme not in ["http", "https"]: @@ -40,7 +45,7 @@ def is_url_valid(url: str, debug: bool = False) -> bool: except socket.gaierror: logger.exception(f"failed to lookup url {url}") _CACHE[parsed.hostname] = False - return False + raise URLLookupFailedError(f"failed to lookup url {url}") logger.debug(f"{ip_address}")