diff --git a/CHANGELOG.md b/CHANGELOG.md index a4c698f..e470a3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,10 @@ * Don't include OStatus for Mastodon 3.0+ protocols list. ([related issue](https://github.com/thefederationinfo/the-federation.info/issues/217)) +* **Backwards incompatible**: Stop markdownifying incoming ActivityPub content. Instead + copy it as is to the ``raw_content`` attribute on the entity, setting also the + ``_media_type`` to ``text/html``. + ### Fixed * Don't crash loudly when fetching webfinger for Diaspora that does not contain XML. diff --git a/federation/entities/activitypub/entities.py b/federation/entities/activitypub/entities.py index d8163cb..0b88fee 100644 --- a/federation/entities/activitypub/entities.py +++ b/federation/entities/activitypub/entities.py @@ -3,6 +3,8 @@ import re import uuid from typing import Dict, List +import bleach + from federation.entities.activitypub.constants import ( CONTEXTS_DEFAULT, CONTEXT_MANUALLY_APPROVES_FOLLOWERS, CONTEXT_SENSITIVE, CONTEXT_HASHTAG, CONTEXT_LD_SIGNATURES) @@ -57,15 +59,19 @@ class CleanContentMixin(RawContentMixin): """ Make linkified tags normal tags. """ - def cleaner(match): - return f"#{match.groups()[0]}" - super().post_receive() - self.raw_content = re.sub( - r'\[#([\w\-_]+)\]\(http?s://[a-zA-Z0-9/._-]+\)', - cleaner, + + def remove_tag_links(attrs, new=False): + rel = (None, "rel") + if attrs.get(rel) == "tag": + return + return attrs + + self.raw_content = bleach.linkify( self.raw_content, - re.MULTILINE, + callbacks=[remove_tag_links], + parse_email=False, + skip_tags=["code", "pre"], ) diff --git a/federation/entities/activitypub/mappers.py b/federation/entities/activitypub/mappers.py index 4254f96..1506a48 100644 --- a/federation/entities/activitypub/mappers.py +++ b/federation/entities/activitypub/mappers.py @@ -1,8 +1,6 @@ import logging from typing import List, Callable, Dict, Union, Optional -from markdownify import markdownify - from federation.entities.activitypub.constants import NAMESPACE_PUBLIC from federation.entities.activitypub.entities import ( ActivitypubFollow, ActivitypubProfile, ActivitypubAccept, ActivitypubPost, ActivitypubComment, @@ -259,19 +257,16 @@ def transform_attribute( elif key == "attributedTo" and is_object: transformed["actor_id"] = value elif key in ("content", "source"): - if payload.get('source') and isinstance(payload.get("source"), dict): + if payload.get('source') and isinstance(payload.get("source"), dict) and \ + payload.get('source').get('mediaType') == "text/markdown": + transformed["_media_type"] = "text/markdown" + transformed["raw_content"] = payload.get('source').get('content').strip() transformed["_rendered_content"] = payload.get('content').strip() - if payload.get('source').get('mediaType') == "text/markdown": - transformed["_media_type"] = "text/markdown" - transformed["raw_content"] = payload.get('source').get('content').strip() - else: - transformed["raw_content"] = markdownify(payload.get('content').strip()) - transformed["_media_type"] = payload.get('source').get('mediaType') else: - transformed["raw_content"] = markdownify(payload.get('content').strip()).strip() # Assume HTML by convention - transformed["_rendered_content"] = payload.get('content').strip() transformed["_media_type"] = "text/html" + transformed["raw_content"] = payload.get('content').strip() + transformed["_rendered_content"] = transformed["raw_content"] elif key == "endpoints" and isinstance(value, dict): if "inboxes" not in transformed: transformed["inboxes"] = {"private": None, "public": None} diff --git a/federation/tests/entities/activitypub/test_entities.py b/federation/tests/entities/activitypub/test_entities.py index 87ec829..7ad874b 100644 --- a/federation/tests/entities/activitypub/test_entities.py +++ b/federation/tests/entities/activitypub/test_entities.py @@ -409,10 +409,6 @@ class TestEntitiesPostReceive: "public": False, }] - def test_post__post_receive__cleans_linkified_tags(self, activitypubpost_linkified_tags): - activitypubpost_linkified_tags.post_receive() - assert activitypubpost_linkified_tags.raw_content == '

👁️foobar

barfoo!
#fanart #mastoart

' - class TestEntitiesPreSend: def test_post_inline_images_are_attached(self, activitypubpost_embedded_images): diff --git a/federation/tests/entities/activitypub/test_mappers.py b/federation/tests/entities/activitypub/test_mappers.py index c50bfae..f8c5628 100644 --- a/federation/tests/entities/activitypub/test_mappers.py +++ b/federation/tests/entities/activitypub/test_mappers.py @@ -67,7 +67,9 @@ class TestActivitypubEntityMappersReceive: post = entities[0] assert isinstance(post, ActivitypubPost) assert isinstance(post, Post) - assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom' + assert post.raw_content == '

' \ + '@jaywink boom

' assert post.rendered_content == '

@jaywink boom

' assert post.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" @@ -82,7 +84,7 @@ class TestActivitypubEntityMappersReceive: post = entities[0] assert isinstance(post, ActivitypubPost) assert isinstance(post, Post) - assert post.raw_content == 'boom #test' + assert post.raw_content == '

boom #test

' def test_message_to_objects_simple_post__with_mentions(self): entities = message_to_objects(ACTIVITYPUB_POST_WITH_MENTIONS, "https://mastodon.social/users/jaywink") @@ -101,7 +103,9 @@ class TestActivitypubEntityMappersReceive: assert isinstance(post, Post) assert post.rendered_content == '

@jaywink boom

' - assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom\n\n' + assert post.raw_content == '

' \ + '@jaywink boom

' def test_message_to_objects_simple_post__with_source__markdown(self): entities = message_to_objects(ACTIVITYPUB_POST_WITH_SOURCE_MARKDOWN, "https://diaspodon.fr/users/jaywink") @@ -141,7 +145,9 @@ class TestActivitypubEntityMappersReceive: comment = entities[0] assert isinstance(comment, ActivitypubComment) assert isinstance(comment, Comment) - assert comment.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom' + assert comment.raw_content == '

' \ + '@jaywink boom

' assert comment.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237" assert comment.actor_id == "https://diaspodon.fr/users/jaywink" assert comment.target_id == "https://dev.jasonrobinson.me/content/653bad70-41b3-42c9-89cb-c4ee587e68e4/" diff --git a/federation/tests/fixtures/entities.py b/federation/tests/fixtures/entities.py index 317f66d..268f059 100644 --- a/federation/tests/fixtures/entities.py +++ b/federation/tests/fixtures/entities.py @@ -4,7 +4,7 @@ from freezegun import freeze_time from federation.entities.activitypub.entities import ( ActivitypubPost, ActivitypubAccept, ActivitypubFollow, ActivitypubProfile, ActivitypubComment, ActivitypubRetraction, ActivitypubShare, ActivitypubImage) -from federation.entities.base import Profile, Image +from federation.entities.base import Profile from federation.entities.diaspora.entities import ( DiasporaPost, DiasporaComment, DiasporaLike, DiasporaProfile, DiasporaRetraction, DiasporaContact, DiasporaReshare, @@ -144,20 +144,6 @@ https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902 ) -@pytest.fixture -def activitypubpost_linkified_tags(): - with freeze_time("2019-04-27"): - return ActivitypubPost( - raw_content='

👁️foobar

barfoo!
[#fanart](https://mastodon.art/tags/fanart) ' - '[#mastoart](https://mastodon.art/tags/mastoart)

', - public=True, - provider_display_name="Mastodon", - id=f"http://127.0.0.1:8000/post/123456/", - activity_id=f"http://127.0.0.1:8000/post/123456/#create", - actor_id=f"http://127.0.0.1:8000/profile/123456/", - ) - - @pytest.fixture def activitypubprofile(): return ActivitypubProfile( diff --git a/setup.py b/setup.py index c59182d..0757dee 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,6 @@ setup( "lxml>=3.4.0", "ipdata>=3.0", "iteration_utilities", - "markdownify", "jsonschema>=2.0.0", "pycryptodome>=3.4.10", "python-dateutil>=2.4.0",