Add pyfed:inlineImage property to ActivityPub attachments

If we rip out embedded images from raw_content then mark them as
pyfed:inlineImage. Receiving side will know then that those
attachments are included as inline images should they wish to
exclude those, if they support inline images via markdown or html.

Also rip out all embedded images, not just the ones from the
senders domain.
merge-requests/156/head
Jason Robinson 2019-08-18 21:43:27 +03:00
rodzic 88bf593e2e
commit b0c6be6cd7
7 zmienionych plików z 48 dodań i 14 usunięć

Wyświetl plik

@ -50,3 +50,17 @@ Namespace
.........
All payloads over ActivityPub sent can be identified with by checking ``@context`` which will include the ``pyfed: https://docs.jasonrobinson.me/ns/python-federation`` namespace.
Content media type
..................
When receiving, all ``object.content`` keys are expected to be in ``text/html``.
For outbound entities, ``raw_content`` is expected to be in ``text/markdown``, specifically CommonMark. When sending payloads, ``raw_content`` will be rendered via the ``commonmark`` library into ``object.content``. The original ``raw_content`` will be added to the ``object.source`` property.
Images
......
Any images referenced in the ``raw_content`` of outbound entities will be extracted into ``object.attachment`` objects, for receivers that don't support inline images. These attachments will have a ``pyfed:inlineImage`` property set to ``true`` to indicate the image has been extrated from the content. Receivers should ignore the inline image attachments if they support showing ``<img>`` HTML tags or the markdown content in ``object.source``.
For inbound entities we do this automatically by not including received attachments in the entity ``_children`` attribute.

Wyświetl plik

@ -70,12 +70,15 @@ class ActivitypubNoteMixin(AttachImagesMixin, CleanContentMixin, ActivitypubEnti
for child in self._children:
image = ImageObject(url=child.url)
if image.mediaType:
as2["object"]["attachment"].append({
attachment = {
"type": "Document",
"mediaType": image.mediaType,
"name": child.name,
"url": child.url,
})
}
if child.inline:
attachment["pyfed:inlineImage"] = True
as2["object"]["attachment"].append(attachment)
return as2

Wyświetl plik

@ -97,8 +97,10 @@ def extract_attachments(payload: Dict) -> List[Image]:
"""
attachments = []
for item in payload.get('attachment', []):
# noinspection PyProtectedMember
if item.get("type") == "Document" and item.get("mediaType") in IMAGE_TYPES:
if item.get('pyfed:inlineImage', False):
# Skip this image as it's indicated to be inline in content and source already
continue
attachments.append(
Image(
url=item.get('url'),

Wyświetl plik

@ -8,11 +8,9 @@ from federation.entities.utils import get_base_attributes
class AttachImagesMixin(RawContentMixin):
def pre_send(self) -> None:
"""
Attach any embedded images from the sender server.
Attach any embedded images from raw_content.
"""
actor_domain = re.match(r"https?://([\w.\-]+)", self.actor_id).groups()[0]
actor_domain = actor_domain.replace(".", "\\.")
regex = r"!\[([\w ]*)\]\((https?://%s[\w\/\-.]+\.[jpg|gif|jpeg|png]*)\)" % actor_domain
regex = r"!\[([\w ]*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)"
matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE)
for match in matches:
groups = match.groups()
@ -20,10 +18,9 @@ class AttachImagesMixin(RawContentMixin):
Image(
url=groups[1],
name=groups[0] or "",
inline=True,
)
)
self.raw_content = re.sub(regex, "", self.raw_content, re.MULTILINE | re.IGNORECASE)
self.raw_content = self.raw_content.strip()
class ActivitypubEntityMixin(BaseEntity):

Wyświetl plik

@ -23,6 +23,7 @@ class Image(OptionalRawContentMixin, CreatedAtMixin, BaseEntity):
name = ""
height = 0
width = 0
inline = False
_default_activity = ActivityType.CREATE

Wyświetl plik

@ -271,10 +271,22 @@ class TestEntitiesPostReceive:
class TestEntitiesPreSend:
def test_post_local_images_are_attached(self, activitypubpost_embedded_images):
def test_post_inline_images_are_attached(self, activitypubpost_embedded_images):
activitypubpost_embedded_images.pre_send()
assert activitypubpost_embedded_images.raw_content == "#Cycling #lauttasaari #sea #sun"
assert len(activitypubpost_embedded_images._children) == 4
image = activitypubpost_embedded_images._children[0]
assert image.url == "https://example.com/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541476.jpeg"
assert image.name == ""
assert image.inline
image = activitypubpost_embedded_images._children[1]
assert image.url == "https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541477.png"
assert image.name == ""
assert image.inline
image = activitypubpost_embedded_images._children[2]
assert image.url == "https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541478.gif"
assert image.name == "foobar"
assert image.inline
image = activitypubpost_embedded_images._children[3]
assert image.url == "https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541479.jpg"
assert image.name == "foobar barfoo"
assert image.inline

Wyświetl plik

@ -95,10 +95,15 @@ def activitypubpost_embedded_images():
#Cycling #lauttasaari #sea #sun
![](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541476.jpg)![](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541477.jpg)
![](https://example.com/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541476.jpeg)![](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541477.png)
![foobar](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541478.jpg)
![foobar barfoo](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541479.jpg)
![foobar](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541478.gif)
![foobar barfoo](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902541479.jpg)
#only a link, not embedded
[foo](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414710.jpg)
#only a link, not embedded
https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414711.jpg
""",
public=True,
provider_display_name="Socialhome",