Improve collection parsing

pull/9/head
Thomas Sileo 2018-07-08 23:02:15 +02:00
rodzic b7d63f2302
commit 65c983e588
4 zmienionych plików z 58 dodań i 17 usunięć

Wyświetl plik

@ -12,6 +12,8 @@ from typing import Type
from typing import Union from typing import Union
from .backend import Backend from .backend import Backend
from .errors import ActivityNotFoundError
from .errors import ActivityUnavailableError
from .errors import BadActivityError from .errors import BadActivityError
from .errors import DropActivityPreProcessError from .errors import DropActivityPreProcessError
from .errors import Error from .errors import Error
@ -555,6 +557,10 @@ class BaseActivity(object, metaclass=_ActivityMeta):
except RemoteActivityGoneError: except RemoteActivityGoneError:
logger.info(f"{recipient} is gone") logger.info(f"{recipient} is gone")
continue continue
except ActivityUnavailableError:
# TODO(tsileo): retry separately?
logger.info(f"failed {recipient} to fetch recipient")
continue
if actor.ACTIVITY_TYPE in ACTOR_TYPES: if actor.ACTIVITY_TYPE in ACTOR_TYPES:
if actor.endpoints: if actor.endpoints:
@ -575,13 +581,12 @@ class BaseActivity(object, metaclass=_ActivityMeta):
continue continue
try: try:
col_actor = fetch_remote_activity( col_actor = fetch_remote_activity(item)
item, expected=ActivityType.PERSON except ActivityUnavailableError:
) # TODO(tsileo): retry separately?
except UnexpectedActivityTypeError: logger.info(f"failed {recipient} to fetch recipient")
logger.exception(f"failed to fetch actor {item!r}")
continue continue
except RemoteActivityGoneError: except (RemoteActivityGoneError, ActivityNotFoundError):
logger.info(f"{item} is gone") logger.info(f"{item} is gone")
continue continue

Wyświetl plik

@ -1,5 +1,6 @@
import abc import abc
import binascii import binascii
import json
import os import os
import typing import typing
from typing import Any from typing import Any
@ -12,7 +13,9 @@ import requests
from .__version__ import __version__ from .__version__ import __version__
from .collection import parse_collection from .collection import parse_collection
from .errors import ActivityNotFoundError from .errors import ActivityNotFoundError
from .errors import ActivityUnavailableError
from .errors import RemoteActivityGoneError from .errors import RemoteActivityGoneError
from .urlutils import URLLookupFailedError
from .urlutils import check_url as check_url from .urlutils import check_url as check_url
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
@ -70,23 +73,45 @@ class Backend(abc.ABC):
pass # pragma: no cover pass # pragma: no cover
def fetch_iri(self, iri: str, **kwargs) -> "ap.ObjectType": # pragma: no cover def fetch_iri(self, iri: str, **kwargs) -> "ap.ObjectType": # pragma: no cover
self.check_url(iri) try:
resp = requests.get( self.check_url(iri)
iri, except URLLookupFailedError:
headers={ raise ActivityUnavailableError(f"unable to fetch {iri}, url lookup failed")
"User-Agent": self.user_agent(),
"Accept": "application/activity+json", try:
}, resp = requests.get(
**kwargs, iri,
) headers={
"User-Agent": self.user_agent(),
"Accept": "application/activity+json",
},
timeout=15,
**kwargs,
)
except (
requests.exceptions.ConnectTimeout,
requests.exceptions.ReadTimeout,
requests.exceptions.ConnectionError,
):
raise ActivityUnavailableError(f"unable to fetch {iri}, connection error")
if resp.status_code == 404: if resp.status_code == 404:
raise ActivityNotFoundError(f"{iri} is not found") raise ActivityNotFoundError(f"{iri} is not found")
elif resp.status_code == 410: elif resp.status_code == 410:
raise RemoteActivityGoneError(f"{iri} is gone") raise RemoteActivityGoneError(f"{iri} is gone")
elif resp.status_code in [500, 502, 503]:
raise ActivityUnavailableError(
f"unable to fetch {iri}, server error ({resp.status_code})"
)
resp.raise_for_status() resp.raise_for_status()
return resp.json() try:
out = resp.json()
except json.JSONDecodeError:
# TODO(tsileo): a special error type?
raise ActivityUnavailableError(f"{iri} is not JSON")
return out
@abc.abstractmethod @abc.abstractmethod
def inbox_check_duplicate(self, as_actor: "ap.Person", iri: str) -> bool: def inbox_check_duplicate(self, as_actor: "ap.Person", iri: str) -> bool:

Wyświetl plik

@ -75,3 +75,9 @@ class RecursionLimitExceededError(BadActivityError):
class UnexpectedActivityTypeError(BadActivityError): class UnexpectedActivityTypeError(BadActivityError):
"""Raised when an another activty was expected.""" """Raised when an another activty was expected."""
class ActivityUnavailableError(ServerError):
"""Raises when fetching a remote activity times out."""
status_code = 503

Wyświetl plik

@ -4,6 +4,7 @@ import socket
from typing import Dict from typing import Dict
from urllib.parse import urlparse from urllib.parse import urlparse
from .errors import Error
from .errors import ServerError from .errors import ServerError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -16,6 +17,10 @@ class InvalidURLError(ServerError):
pass pass
class URLLookupFailedError(Error):
pass
def is_url_valid(url: str, debug: bool = False) -> bool: def is_url_valid(url: str, debug: bool = False) -> bool:
parsed = urlparse(url) parsed = urlparse(url)
if parsed.scheme not in ["http", "https"]: if parsed.scheme not in ["http", "https"]:
@ -40,7 +45,7 @@ def is_url_valid(url: str, debug: bool = False) -> bool:
except socket.gaierror: except socket.gaierror:
logger.exception(f"failed to lookup url {url}") logger.exception(f"failed to lookup url {url}")
_CACHE[parsed.hostname] = False _CACHE[parsed.hostname] = False
return False raise URLLookupFailedError(f"failed to lookup url {url}")
logger.debug(f"{ip_address}") logger.debug(f"{ip_address}")