From e92792fa35413c9c0666a51568888838e0f9e6bc Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Sun, 25 Aug 2019 22:55:00 +0300 Subject: [PATCH 1/2] Markdownify incoming HTML content from ActivityPub layer --- federation/entities/activitypub/mappers.py | 16 ++++++++++++---- .../tests/entities/activitypub/test_mappers.py | 11 +++++------ setup.py | 1 + 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/federation/entities/activitypub/mappers.py b/federation/entities/activitypub/mappers.py index bc3262a..ae3857f 100644 --- a/federation/entities/activitypub/mappers.py +++ b/federation/entities/activitypub/mappers.py @@ -1,6 +1,8 @@ import logging from typing import List, Callable, Dict, Union, Optional +from markdownify import markdownify + from federation.entities.activitypub.constants import NAMESPACE_PUBLIC from federation.entities.activitypub.entities import ( ActivitypubFollow, ActivitypubProfile, ActivitypubAccept, ActivitypubPost, ActivitypubComment, @@ -240,12 +242,18 @@ def transform_attribute( transformed["actor_id"] = value elif key in ("content", "source"): if payload.get('source') and isinstance(payload.get("source"), dict): - transformed["raw_content"] = payload.get('source').get('content') - transformed["_media_type"] = payload.get('source').get('mediaType') - transformed["_rendered_content"] = payload.get('content') + if payload.get('source').get('mediaType') == "text/html": + transformed["_rendered_content"] = payload.get('content') + transformed["_media_type"] = "text/html" + transformed["raw_content"] = markdownify(payload.get('source').get('content')).strip() + else: + transformed["_media_type"] = payload.get('source').get('mediaType') + transformed["_rendered_content"] = payload.get("content").strip() + transformed["raw_content"] = payload.get('source').get('content').strip() else: - transformed["raw_content"] = value + transformed["raw_content"] = markdownify(value).strip() # Assume HTML by convention + transformed["_rendered_content"] = value.strip() transformed["_media_type"] = "text/html" elif key == "inboxes" and isinstance(value, dict): if "inboxes" not in transformed: diff --git a/federation/tests/entities/activitypub/test_mappers.py b/federation/tests/entities/activitypub/test_mappers.py index a386afd..ec96d56 100644 --- a/federation/tests/entities/activitypub/test_mappers.py +++ b/federation/tests/entities/activitypub/test_mappers.py @@ -65,9 +65,9 @@ class TestActivitypubEntityMappersReceive: post = entities[0] assert isinstance(post, ActivitypubPost) assert isinstance(post, Post) - assert post.raw_content == '

@jaywink boom

' - assert post.rendered_content == post.raw_content + assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom' + assert post.rendered_content == '

@jaywink boom

' assert post.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" assert post.actor_id == "https://diaspodon.fr/users/jaywink" assert post.public is True @@ -80,7 +80,7 @@ class TestActivitypubEntityMappersReceive: post = entities[0] assert isinstance(post, ActivitypubPost) assert isinstance(post, Post) - assert post.raw_content == "

boom #test

" + assert post.raw_content == 'boom [#test](https://mastodon.social/tags/test)' def test_message_to_objects_simple_post__with_source(self): entities = message_to_objects(ACTIVITYPUB_POST_WITH_SOURCE, "https://diaspodon.fr/users/jaywink") @@ -120,8 +120,7 @@ class TestActivitypubEntityMappersReceive: comment = entities[0] assert isinstance(comment, ActivitypubComment) assert isinstance(comment, Comment) - assert comment.raw_content == '

@jaywink boom

' + assert comment.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom' assert comment.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" assert comment.actor_id == "https://diaspodon.fr/users/jaywink" assert comment.target_id == "https://dev.jasonrobinson.me/content/653bad70-41b3-42c9-89cb-c4ee587e68e4/" diff --git a/setup.py b/setup.py index e721c38..3146781 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ setup( "lxml>=3.4.0", "ipdata>=3.0", "iteration_utilities", + "markdownify", "jsonschema>=2.0.0", "pycryptodome>=3.4.10", "python-dateutil>=2.4.0", From ed0b6c21e92ce3dbb481cc485b41b97e13af0d7d Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Mon, 26 Aug 2019 22:24:09 +0300 Subject: [PATCH 2/2] Add docs for the content/source HTML/Markdown rules --- docs/protocols.rst | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/protocols.rst b/docs/protocols.rst index db40608..d0e4060 100644 --- a/docs/protocols.rst +++ b/docs/protocols.rst @@ -54,7 +54,16 @@ All payloads over ActivityPub sent can be identified with by checking ``@context Content media type .................. -When receiving, all ``object.content`` keys are expected to be in ``text/html``. +The following keys will be set on the entity based on the ``source`` property existing: + +* if the object has an ``object.source`` property: + * ``_media_type`` will be the source media type + * ``_rendered_content`` will be the object ``content`` + * ``raw_content`` will be the source ``content`` +* if the object has no ``object.source`` property: + * ``_media_type`` will be ``text/html`` + * ``_rendered_content`` will be the object ``content`` + * ``raw_content`` will object ``content`` run through a HTML2Markdown renderer For outbound entities, ``raw_content`` is expected to be in ``text/markdown``, specifically CommonMark. When sending payloads, ``raw_content`` will be rendered via the ``commonmark`` library into ``object.content``. The original ``raw_content`` will be added to the ``object.source`` property.