Merge branch 'activitypub-fetcher' into 'master'

ActivityPub profile fetcher and inbound protocol routing improvements

See merge request jaywink/federation!140
merge-requests/141/head
Jason Robinson 2019-03-02 21:55:54 +00:00
commit fe7cd7c7d1
19 zmienionych plików z 187 dodań i 74 usunięć

Wyświetl plik

@ -18,6 +18,10 @@
* Added network utility `network.fetch_host_ip` to fetch IP by hostname.
* Inbound helper utility `handle_receive` now also supports ActivityPub payloads. Protocol will be identified by looking at the payload contents.
* Fetcher helper utility `retrieve_remote_profile` now also supports fetching ActivityPub profiles. Response will be a serialized protocol specific profile entity.
### Changed
* **Backwards incompatible.** Lowest compatible Python version is now 3.6.
@ -35,6 +39,10 @@
* **Backwards incompatible.** Generator `RFC3033Webfinger` and the related `rfc3033_webfinger_view` have been renamed to `RFC7033Webfinger` and `rfc7033_webfinger_view` to reflect the right RFC number.
* Network helper utility `fetch_document` can now also take a dictionary of `headers`. They will be passed to the underlying `requests` method call as is.
* **Backwards incompatible.** * Fetcher helper utility `retrieve_remote_profile` parameter `handle` has been removed. Pass in the Diaspora protocol handle as the first parameter to fetch a Diaspora remote profile.
### Removed
* **Backwards incompatible.** Support for Legacy Diaspora payloads have been removed to reduce the amount of code needed to maintain while refactoring for ActivityPub.

Wyświetl plik

@ -1 +1,30 @@
import importlib
from federation.exceptions import NoSuitableProtocolFoundError
__version__ = "0.18.0-dev"
PROTOCOLS = (
"activitypub",
"diaspora",
)
def identify_protocol(method: str, value: str):
"""
Loop through protocols, import the protocol module and try to identify the id or payload.
"""
for protocol_name in PROTOCOLS:
protocol = importlib.import_module(f"federation.protocols.{protocol_name}.protocol")
if getattr(protocol, f"identify_{method}")(value):
return protocol
else:
raise NoSuitableProtocolFoundError()
def identify_protocol_by_id(id: str):
return identify_protocol('id', id)
def identify_protocol_by_payload(payload: str):
return identify_protocol('payload', payload)

Wyświetl plik

@ -1,16 +1,22 @@
from enum import Enum
class ActivityType(Enum):
class EnumBase(Enum):
@classmethod
def values(cls):
return [value.value for value in cls.__members__.values()]
class ActivityType(EnumBase):
CREATE = "Create"
DELETE = "Delete"
FOLLOW = "Follow"
UPDATE = "Update"
class ActorType(Enum):
class ActorType(EnumBase):
PERSON = "Person"
class ObjectType(Enum):
class ObjectType(EnumBase):
NOTE = "Note"

Wyświetl plik

@ -5,6 +5,7 @@ from federation.types import UserType
MAPPINGS = {
"Follow": ActivitypubFollow,
"Person": ActivitypubProfile,
}
@ -45,11 +46,27 @@ def transform_attribute(key, value, cls):
return {"activity_id": value}
elif key == "actor":
return {"actor_id": value}
elif key == "icon":
# TODO maybe we should ditch these size constants and instead have a more flexible dict for images
# so based on protocol there would either be one url or many by size name
return {"image_urls": {
"small": value,
"medium": value,
"large": value,
}}
elif key == "name":
return {"name": value}
elif key == "object":
if isinstance(value, dict):
return transform_attributes(value, cls)
else:
return {"target_id": value}
elif key == "preferredUsername":
return {"username": value}
elif key == "publicKey":
return {"public_key": value.get('publicKeyPem', '')}
elif key == "url":
return {"url": value}
return {}

Wyświetl plik

@ -1,11 +1,15 @@
from federation.entities.activitypub.enums import ActorType
from federation.entities.mixins import BaseEntity
from federation.entities.utils import get_base_attributes
class ActivitypubEntityMixin(BaseEntity):
_type = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._required.append('activity_id')
if self._type not in ActorType.values():
self._required.append('activity_id')
@classmethod
def from_base(cls, entity):

Wyświetl plik

@ -99,6 +99,7 @@ class Profile(CreatedAtMixin, OptionalRawContentMixin, PublicMixin):
public_key = ""
tag_list = None
url = ""
username = ""
_allowed_children = (Image,)

Wyświetl plik

@ -1,7 +1,6 @@
import datetime
import importlib
import warnings
from typing import Optional
from federation.entities.activitypub.enums import ActivityType
@ -116,14 +115,6 @@ class BaseEntity:
"""Implement in subclasses if needed."""
pass
@property
def username(self) -> Optional[str]:
if self.handle:
username_part = self.handle.rsplit('@', 1)
if username_part:
# Strip any remaining '@' if this is a Mastodon style handle
return username_part[0].strip('@')
class PublicMixin(BaseEntity):
public = False

Wyświetl plik

@ -2,6 +2,7 @@ import importlib
import logging
from typing import Optional, Callable
from federation import identify_protocol_by_id
from federation.entities.base import Profile
logger = logging.getLogger("federation")
@ -28,17 +29,11 @@ def retrieve_remote_content(
)
def retrieve_remote_profile(id: str, handle: str=None) -> Optional[Profile]:
def retrieve_remote_profile(id: str) -> Optional[Profile]:
"""High level retrieve profile method.
Retrieve the profile from a remote location, using either the given protocol or by checking each
protocol until a user can be constructed from the remote documents.
Currently, due to no other protocols supported, always use the Diaspora protocol.
Retrieve the profile from a remote location, using protocol based on the given ID.
"""
# TODO add support for AP
protocol_name = "diaspora"
if not handle:
handle = id
utils = importlib.import_module("federation.utils.%s" % protocol_name)
return utils.retrieve_and_parse_profile(handle.lower())
protocol = identify_protocol_by_id(id)
utils = importlib.import_module(f"federation.utils.{protocol.PROTOCOL_NAME}")
return utils.retrieve_and_parse_profile(id)

Wyświetl plik

@ -2,16 +2,11 @@ import importlib
import logging
from typing import Tuple, List, Callable
from federation.exceptions import NoSuitableProtocolFoundError
from federation import identify_protocol_by_payload
from federation.types import UserType
logger = logging.getLogger("federation")
PROTOCOLS = (
"activitypub",
"diaspora",
)
def handle_receive(
payload: str,
@ -36,24 +31,15 @@ def handle_receive(
:arg sender_key_fetcher: Function that accepts sender handle and returns public key (optional)
:arg skip_author_verification: Don't verify sender (test purposes, false default)
:returns: Tuple of sender id, protocol name and list of entity objects
:raises NoSuitableProtocolFound: When no protocol was identified to pass message to
"""
logger.debug("handle_receive: processing payload: %s", payload)
found_protocol = None
for protocol_name in PROTOCOLS:
protocol = importlib.import_module("federation.protocols.%s.protocol" % protocol_name)
if protocol.identify_payload(payload):
found_protocol = protocol
break
found_protocol = identify_protocol_by_payload(payload)
if found_protocol:
logger.debug("handle_receive: using protocol %s", found_protocol.PROTOCOL_NAME)
protocol = found_protocol.Protocol()
sender, message = protocol.receive(
payload, user, sender_key_fetcher, skip_author_verification=skip_author_verification)
logger.debug("handle_receive: sender %s, message %s", sender, message)
else:
raise NoSuitableProtocolFoundError()
logger.debug("handle_receive: using protocol %s", found_protocol.PROTOCOL_NAME)
protocol = found_protocol.Protocol()
sender, message = protocol.receive(
payload, user, sender_key_fetcher, skip_author_verification=skip_author_verification)
logger.debug("handle_receive: sender %s, message %s", sender, message)
mappers = importlib.import_module("federation.entities.%s.mappers" % found_protocol.PROTOCOL_NAME)
entities = mappers.message_to_objects(message, sender, sender_key_fetcher, user)

Wyświetl plik

@ -1,12 +1,24 @@
import json
import logging
import re
from typing import Union, Callable, Tuple
from federation.entities.activitypub.enums import ActorType
from federation.types import UserType
from federation.utils.text import decode_if_bytes
logger = logging.getLogger('federation')
PROTOCOL_NAME = "activitypub"
def identify_id(id: str) -> bool:
"""
Try to identify whether this is an ActivityPub ID.
"""
return re.match(r'^https?://', id, flags=re.IGNORECASE) is not None
def identify_payload(payload: Union[str, bytes]) -> bool:
"""
Try to identify whether this is an ActivityPub payload.
@ -21,8 +33,14 @@ def identify_payload(payload: Union[str, bytes]) -> bool:
class Protocol:
def extract_actor(self):
if self.payload.get('type') in ActorType.values():
self.actor = self.payload.get('id')
else:
self.actor = self.payload.get('actor')
def receive(self, payload: str, user: UserType=None, sender_key_fetcher: Callable[[str], str]=None,
skip_author_verification: bool=False) -> Tuple[str, str]:
skip_author_verification: bool=False) -> Tuple[str, dict]:
"""
Receive a payload.
@ -31,10 +49,11 @@ class Protocol:
self.user = user
self.get_contact_key = sender_key_fetcher
self.payload = json.loads(decode_if_bytes(payload))
self.extract_actor()
# Verify the message is from who it claims to be
if not skip_author_verification:
self.verify_signature()
return self.payload["actor"], self.payload
return self.actor, self.payload
def verify_signature(self):
# TODO implement

Wyświetl plik

@ -13,7 +13,7 @@ from federation.protocols.diaspora.encrypted import EncryptedPayload
from federation.protocols.diaspora.magic_envelope import MagicEnvelope
from federation.types import UserType
from federation.utils.diaspora import fetch_public_key
from federation.utils.text import decode_if_bytes, encode_if_text
from federation.utils.text import decode_if_bytes, encode_if_text, validate_handle
logger = logging.getLogger("federation")
@ -22,6 +22,13 @@ PROTOCOL_NS = "https://joindiaspora.com/protocol"
MAGIC_ENV_TAG = "{http://salmon-protocol.org/ns/magic-env}env"
def identify_id(id: str) -> bool:
"""
Try to identify if this ID is a Diaspora ID.
"""
return validate_handle(id)
def identify_payload(payload):
"""Try to identify whether this is a Diaspora payload.

Wyświetl plik

@ -24,9 +24,10 @@ class TestGetBaseAttributes:
assert set(attrs) == {
"created_at", "name", "email", "gender", "raw_content", "location", "public",
"nsfw", "public_key", "image_urls", "tag_list", "signature", "url", "atom_url",
"base_url", "id", "actor_id", "handle", "handle", "guid", "activity", "activity_id",
"base_url", "id", "actor_id", "handle", "handle", "guid", "activity", "activity_id", "username",
}
class TestGetFullXMLRepresentation:
def test_returns_xml_document(self):
entity = Post()

Wyświetl plik

@ -18,16 +18,6 @@ class TestPostEntityTags:
assert post.tags == set()
class TestBaseEntity:
def test_username(self):
entity = Profile(handle='foobar@localhost.local')
assert entity.username == 'foobar'
entity = Profile(handle='@foobar@localhost.local')
assert entity.username == 'foobar'
entity = Profile()
assert entity.username is None
class TestBaseEntityCallsValidateMethods:
def test_entity_calls_attribute_validate_method(self):
post = PostFactory()

Wyświetl plik

@ -1,6 +1,14 @@
import json
from federation.protocols.activitypub.protocol import identify_payload
from federation.protocols.activitypub.protocol import identify_payload, identify_id
def test_identify_id():
assert identify_id('foobar') is False
assert identify_id('foobar@example.com') is False
assert identify_id('foobar@example.com:8000') is False
assert identify_id('http://foobar@example.com') is True
assert identify_id('https://foobar@example.com') is True
class TestIdentifyPayload:

Wyświetl plik

@ -15,16 +15,20 @@ class TestRetrieveRemoteContent:
class TestRetrieveRemoteProfile:
@patch("federation.fetchers.importlib.import_module")
def test_calls_diaspora_retrieve_and_parse_profile(self, mock_import):
mock_retrieve = Mock()
mock_import.return_value = mock_retrieve
retrieve_remote_profile("user@example.com")
mock_retrieve.retrieve_and_parse_profile.assert_called_once_with("user@example.com")
@patch("federation.fetchers.importlib.import_module", autospec=True)
@patch("federation.fetchers.identify_protocol_by_id", autospec=True, return_value=Mock(PROTOCOL_NAME='activitypub'))
def test_calls_activitypub_retrieve_and_parse_profile(self, mock_identify, mock_import):
mock_utils = Mock()
mock_import.return_value = mock_utils
retrieve_remote_profile("https://example.com/foo")
mock_import.assert_called_once_with("federation.utils.activitypub")
mock_utils.retrieve_and_parse_profile.assert_called_once_with("https://example.com/foo")
@patch("federation.fetchers.importlib.import_module")
def test_calls_diaspora_retrieve_and_parse_profile__lower_cases_handle_when_needed(self, mock_import):
mock_retrieve = Mock()
mock_import.return_value = mock_retrieve
retrieve_remote_profile("uSer@ExamPle.com")
mock_retrieve.retrieve_and_parse_profile.assert_called_once_with("user@example.com")
@patch("federation.fetchers.importlib.import_module", autospec=True)
@patch("federation.fetchers.identify_protocol_by_id", autospec=True, return_value=Mock(PROTOCOL_NAME='diaspora'))
def test_calls_diaspora_retrieve_and_parse_profile(self, mock_identify, mock_import):
mock_utils = Mock()
mock_import.return_value = mock_utils
retrieve_remote_profile("user@example.com")
mock_import.assert_called_once_with("federation.utils.diaspora")
mock_utils.retrieve_and_parse_profile.assert_called_once_with("user@example.com")

Wyświetl plik

@ -8,7 +8,7 @@ from federation.protocols.diaspora.protocol import Protocol
from federation.tests.fixtures.payloads import DIASPORA_PUBLIC_PAYLOAD
class TestHandleReceiveProtocolIdentification():
class TestHandleReceiveProtocolIdentification:
def test_handle_receive_routes_to_identified_protocol(self):
payload = DIASPORA_PUBLIC_PAYLOAD
with patch.object(

Wyświetl plik

@ -21,6 +21,13 @@ class TestFetchCountryByIp:
class TestFetchDocument:
call_args = {"timeout": 10, "headers": {'user-agent': USER_AGENT}}
@patch("federation.utils.network.requests.get", autospec=True, return_value=Mock(status_code=200, text="foo"))
def test_extra_headers(self, mock_get):
fetch_document("https://example.com/foo", extra_headers={'accept': 'application/activity+json'})
mock_get.assert_called_once_with('https://example.com/foo', headers={
'user-agent': USER_AGENT, 'accept': 'application/activity+json',
})
def test_raises_without_url_and_host(self):
with pytest.raises(ValueError):
fetch_document()

Wyświetl plik

@ -0,0 +1,38 @@
import logging
from typing import Optional, Any
from federation.entities.activitypub.entities import ActivitypubProfile
from federation.entities.activitypub.mappers import message_to_objects
from federation.utils.network import fetch_document
logger = logging.getLogger('federation')
def retrieve_and_parse_document(id: str) -> Optional[Any]:
"""
Retrieve remote document by ID and return the entity.
"""
document, status_code, ex = fetch_document(id, extra_headers={'accept': 'application/activity+json'})
if document:
from federation.protocols.activitypub.protocol import Protocol
protocol = Protocol()
sender, payload = protocol.receive(document)
entities = message_to_objects(payload, sender)
if entities:
return entities[0]
def retrieve_and_parse_profile(id: str) -> Optional[ActivitypubProfile]:
"""
Retrieve the remote id and return a Profile object.
"""
profile = retrieve_and_parse_document(id)
if not profile:
return
try:
profile.validate()
except ValueError as ex:
logger.warning("retrieve_and_parse_profile - found profile %s but it didn't validate: %s",
profile, ex)
return
return profile

Wyświetl plik

@ -35,7 +35,7 @@ def fetch_country_by_ip(ip):
return data.get('response', {}).get('country_code', '')
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True):
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None):
"""Helper method to fetch remote document.
Must be given either the ``url`` or ``host``.
@ -56,6 +56,8 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
logger.debug("fetch_document: url=%s, host=%s, path=%s, timeout=%s, raise_ssl_errors=%s",
url, host, path, timeout, raise_ssl_errors)
headers = {'user-agent': USER_AGENT}
if extra_headers:
headers.update(extra_headers)
if url:
# Use url since it was given
logger.debug("fetch_document: trying %s", url)