kopia lustrzana https://github.com/tsileo/little-boxes
Improve collection parsing
rodzic
b7d63f2302
commit
65c983e588
|
@ -12,6 +12,8 @@ from typing import Type
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from .backend import Backend
|
from .backend import Backend
|
||||||
|
from .errors import ActivityNotFoundError
|
||||||
|
from .errors import ActivityUnavailableError
|
||||||
from .errors import BadActivityError
|
from .errors import BadActivityError
|
||||||
from .errors import DropActivityPreProcessError
|
from .errors import DropActivityPreProcessError
|
||||||
from .errors import Error
|
from .errors import Error
|
||||||
|
@ -555,6 +557,10 @@ class BaseActivity(object, metaclass=_ActivityMeta):
|
||||||
except RemoteActivityGoneError:
|
except RemoteActivityGoneError:
|
||||||
logger.info(f"{recipient} is gone")
|
logger.info(f"{recipient} is gone")
|
||||||
continue
|
continue
|
||||||
|
except ActivityUnavailableError:
|
||||||
|
# TODO(tsileo): retry separately?
|
||||||
|
logger.info(f"failed {recipient} to fetch recipient")
|
||||||
|
continue
|
||||||
|
|
||||||
if actor.ACTIVITY_TYPE in ACTOR_TYPES:
|
if actor.ACTIVITY_TYPE in ACTOR_TYPES:
|
||||||
if actor.endpoints:
|
if actor.endpoints:
|
||||||
|
@ -575,13 +581,12 @@ class BaseActivity(object, metaclass=_ActivityMeta):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
col_actor = fetch_remote_activity(
|
col_actor = fetch_remote_activity(item)
|
||||||
item, expected=ActivityType.PERSON
|
except ActivityUnavailableError:
|
||||||
)
|
# TODO(tsileo): retry separately?
|
||||||
except UnexpectedActivityTypeError:
|
logger.info(f"failed {recipient} to fetch recipient")
|
||||||
logger.exception(f"failed to fetch actor {item!r}")
|
|
||||||
continue
|
continue
|
||||||
except RemoteActivityGoneError:
|
except (RemoteActivityGoneError, ActivityNotFoundError):
|
||||||
logger.info(f"{item} is gone")
|
logger.info(f"{item} is gone")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import abc
|
import abc
|
||||||
import binascii
|
import binascii
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import typing
|
import typing
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
@ -12,7 +13,9 @@ import requests
|
||||||
from .__version__ import __version__
|
from .__version__ import __version__
|
||||||
from .collection import parse_collection
|
from .collection import parse_collection
|
||||||
from .errors import ActivityNotFoundError
|
from .errors import ActivityNotFoundError
|
||||||
|
from .errors import ActivityUnavailableError
|
||||||
from .errors import RemoteActivityGoneError
|
from .errors import RemoteActivityGoneError
|
||||||
|
from .urlutils import URLLookupFailedError
|
||||||
from .urlutils import check_url as check_url
|
from .urlutils import check_url as check_url
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
if typing.TYPE_CHECKING:
|
||||||
|
@ -70,23 +73,45 @@ class Backend(abc.ABC):
|
||||||
pass # pragma: no cover
|
pass # pragma: no cover
|
||||||
|
|
||||||
def fetch_iri(self, iri: str, **kwargs) -> "ap.ObjectType": # pragma: no cover
|
def fetch_iri(self, iri: str, **kwargs) -> "ap.ObjectType": # pragma: no cover
|
||||||
self.check_url(iri)
|
try:
|
||||||
resp = requests.get(
|
self.check_url(iri)
|
||||||
iri,
|
except URLLookupFailedError:
|
||||||
headers={
|
raise ActivityUnavailableError(f"unable to fetch {iri}, url lookup failed")
|
||||||
"User-Agent": self.user_agent(),
|
|
||||||
"Accept": "application/activity+json",
|
try:
|
||||||
},
|
resp = requests.get(
|
||||||
**kwargs,
|
iri,
|
||||||
)
|
headers={
|
||||||
|
"User-Agent": self.user_agent(),
|
||||||
|
"Accept": "application/activity+json",
|
||||||
|
},
|
||||||
|
timeout=15,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
except (
|
||||||
|
requests.exceptions.ConnectTimeout,
|
||||||
|
requests.exceptions.ReadTimeout,
|
||||||
|
requests.exceptions.ConnectionError,
|
||||||
|
):
|
||||||
|
raise ActivityUnavailableError(f"unable to fetch {iri}, connection error")
|
||||||
if resp.status_code == 404:
|
if resp.status_code == 404:
|
||||||
raise ActivityNotFoundError(f"{iri} is not found")
|
raise ActivityNotFoundError(f"{iri} is not found")
|
||||||
elif resp.status_code == 410:
|
elif resp.status_code == 410:
|
||||||
raise RemoteActivityGoneError(f"{iri} is gone")
|
raise RemoteActivityGoneError(f"{iri} is gone")
|
||||||
|
elif resp.status_code in [500, 502, 503]:
|
||||||
|
raise ActivityUnavailableError(
|
||||||
|
f"unable to fetch {iri}, server error ({resp.status_code})"
|
||||||
|
)
|
||||||
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
return resp.json()
|
try:
|
||||||
|
out = resp.json()
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# TODO(tsileo): a special error type?
|
||||||
|
raise ActivityUnavailableError(f"{iri} is not JSON")
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def inbox_check_duplicate(self, as_actor: "ap.Person", iri: str) -> bool:
|
def inbox_check_duplicate(self, as_actor: "ap.Person", iri: str) -> bool:
|
||||||
|
|
|
@ -75,3 +75,9 @@ class RecursionLimitExceededError(BadActivityError):
|
||||||
|
|
||||||
class UnexpectedActivityTypeError(BadActivityError):
|
class UnexpectedActivityTypeError(BadActivityError):
|
||||||
"""Raised when an another activty was expected."""
|
"""Raised when an another activty was expected."""
|
||||||
|
|
||||||
|
|
||||||
|
class ActivityUnavailableError(ServerError):
|
||||||
|
"""Raises when fetching a remote activity times out."""
|
||||||
|
|
||||||
|
status_code = 503
|
||||||
|
|
|
@ -4,6 +4,7 @@ import socket
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from .errors import Error
|
||||||
from .errors import ServerError
|
from .errors import ServerError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -16,6 +17,10 @@ class InvalidURLError(ServerError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class URLLookupFailedError(Error):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def is_url_valid(url: str, debug: bool = False) -> bool:
|
def is_url_valid(url: str, debug: bool = False) -> bool:
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
if parsed.scheme not in ["http", "https"]:
|
if parsed.scheme not in ["http", "https"]:
|
||||||
|
@ -40,7 +45,7 @@ def is_url_valid(url: str, debug: bool = False) -> bool:
|
||||||
except socket.gaierror:
|
except socket.gaierror:
|
||||||
logger.exception(f"failed to lookup url {url}")
|
logger.exception(f"failed to lookup url {url}")
|
||||||
_CACHE[parsed.hostname] = False
|
_CACHE[parsed.hostname] = False
|
||||||
return False
|
raise URLLookupFailedError(f"failed to lookup url {url}")
|
||||||
|
|
||||||
logger.debug(f"{ip_address}")
|
logger.debug(f"{ip_address}")
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue