Stop markdownifying received ActivityPub content

It was causing more trouble than benefits. Just accept
HTML content into entities raw_content attribute and
let apps deal with it.
merge-requests/160/head
Jason Robinson 2020-04-12 23:32:42 +03:00
rodzic 8200725e72
commit 13f0cf0db6
7 zmienionych plików z 34 dodań i 42 usunięć

Wyświetl plik

@ -32,6 +32,10 @@
* Don't include OStatus for Mastodon 3.0+ protocols list. ([related issue](https://github.com/thefederationinfo/the-federation.info/issues/217))
* **Backwards incompatible**: Stop markdownifying incoming ActivityPub content. Instead
copy it as is to the ``raw_content`` attribute on the entity, setting also the
``_media_type`` to ``text/html``.
### Fixed
* Don't crash loudly when fetching webfinger for Diaspora that does not contain XML.

Wyświetl plik

@ -3,6 +3,8 @@ import re
import uuid
from typing import Dict, List
import bleach
from federation.entities.activitypub.constants import (
CONTEXTS_DEFAULT, CONTEXT_MANUALLY_APPROVES_FOLLOWERS, CONTEXT_SENSITIVE, CONTEXT_HASHTAG,
CONTEXT_LD_SIGNATURES)
@ -57,15 +59,19 @@ class CleanContentMixin(RawContentMixin):
"""
Make linkified tags normal tags.
"""
def cleaner(match):
return f"#{match.groups()[0]}"
super().post_receive()
self.raw_content = re.sub(
r'\[#([\w\-_]+)\]\(http?s://[a-zA-Z0-9/._-]+\)',
cleaner,
def remove_tag_links(attrs, new=False):
rel = (None, "rel")
if attrs.get(rel) == "tag":
return
return attrs
self.raw_content = bleach.linkify(
self.raw_content,
re.MULTILINE,
callbacks=[remove_tag_links],
parse_email=False,
skip_tags=["code", "pre"],
)

Wyświetl plik

@ -1,8 +1,6 @@
import logging
from typing import List, Callable, Dict, Union, Optional
from markdownify import markdownify
from federation.entities.activitypub.constants import NAMESPACE_PUBLIC
from federation.entities.activitypub.entities import (
ActivitypubFollow, ActivitypubProfile, ActivitypubAccept, ActivitypubPost, ActivitypubComment,
@ -259,19 +257,16 @@ def transform_attribute(
elif key == "attributedTo" and is_object:
transformed["actor_id"] = value
elif key in ("content", "source"):
if payload.get('source') and isinstance(payload.get("source"), dict):
if payload.get('source') and isinstance(payload.get("source"), dict) and \
payload.get('source').get('mediaType') == "text/markdown":
transformed["_media_type"] = "text/markdown"
transformed["raw_content"] = payload.get('source').get('content').strip()
transformed["_rendered_content"] = payload.get('content').strip()
if payload.get('source').get('mediaType') == "text/markdown":
transformed["_media_type"] = "text/markdown"
transformed["raw_content"] = payload.get('source').get('content').strip()
else:
transformed["raw_content"] = markdownify(payload.get('content').strip())
transformed["_media_type"] = payload.get('source').get('mediaType')
else:
transformed["raw_content"] = markdownify(payload.get('content').strip()).strip()
# Assume HTML by convention
transformed["_rendered_content"] = payload.get('content').strip()
transformed["_media_type"] = "text/html"
transformed["raw_content"] = payload.get('content').strip()
transformed["_rendered_content"] = transformed["raw_content"]
elif key == "endpoints" and isinstance(value, dict):
if "inboxes" not in transformed:
transformed["inboxes"] = {"private": None, "public": None}

Wyświetl plik

@ -409,10 +409,6 @@ class TestEntitiesPostReceive:
"public": False,
}]
def test_post__post_receive__cleans_linkified_tags(self, activitypubpost_linkified_tags):
activitypubpost_linkified_tags.post_receive()
assert activitypubpost_linkified_tags.raw_content == '<p>👁foobar</p><p>barfoo!<br>#fanart #mastoart</p>'
class TestEntitiesPreSend:
def test_post_inline_images_are_attached(self, activitypubpost_embedded_images):

Wyświetl plik

@ -67,7 +67,9 @@ class TestActivitypubEntityMappersReceive:
post = entities[0]
assert isinstance(post, ActivitypubPost)
assert isinstance(post, Post)
assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom'
assert post.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \
'href="https://dev.jasonrobinson.me/u/jaywink/">' \
'@<span>jaywink</span></a></span> boom</p>'
assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \
'class="u-url mention">@<span>jaywink</span></a></span> boom</p>'
assert post.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237"
@ -82,7 +84,7 @@ class TestActivitypubEntityMappersReceive:
post = entities[0]
assert isinstance(post, ActivitypubPost)
assert isinstance(post, Post)
assert post.raw_content == 'boom #test'
assert post.raw_content == '<p>boom #test</p>'
def test_message_to_objects_simple_post__with_mentions(self):
entities = message_to_objects(ACTIVITYPUB_POST_WITH_MENTIONS, "https://mastodon.social/users/jaywink")
@ -101,7 +103,9 @@ class TestActivitypubEntityMappersReceive:
assert isinstance(post, Post)
assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \
'class="u-url mention">@<span>jaywink</span></a></span> boom</p>'
assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom\n\n'
assert post.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \
'href="https://dev.jasonrobinson.me/u/jaywink/">' \
'@<span>jaywink</span></a></span> boom</p>'
def test_message_to_objects_simple_post__with_source__markdown(self):
entities = message_to_objects(ACTIVITYPUB_POST_WITH_SOURCE_MARKDOWN, "https://diaspodon.fr/users/jaywink")
@ -141,7 +145,9 @@ class TestActivitypubEntityMappersReceive:
comment = entities[0]
assert isinstance(comment, ActivitypubComment)
assert isinstance(comment, Comment)
assert comment.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom'
assert comment.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \
'href="https://dev.jasonrobinson.me/u/jaywink/">' \
'@<span>jaywink</span></a></span> boom</p>'
assert comment.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237"
assert comment.actor_id == "https://diaspodon.fr/users/jaywink"
assert comment.target_id == "https://dev.jasonrobinson.me/content/653bad70-41b3-42c9-89cb-c4ee587e68e4/"

Wyświetl plik

@ -4,7 +4,7 @@ from freezegun import freeze_time
from federation.entities.activitypub.entities import (
ActivitypubPost, ActivitypubAccept, ActivitypubFollow, ActivitypubProfile, ActivitypubComment,
ActivitypubRetraction, ActivitypubShare, ActivitypubImage)
from federation.entities.base import Profile, Image
from federation.entities.base import Profile
from federation.entities.diaspora.entities import (
DiasporaPost, DiasporaComment, DiasporaLike, DiasporaProfile, DiasporaRetraction,
DiasporaContact, DiasporaReshare,
@ -144,20 +144,6 @@ https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902
)
@pytest.fixture
def activitypubpost_linkified_tags():
with freeze_time("2019-04-27"):
return ActivitypubPost(
raw_content='<p>👁foobar</p><p>barfoo!<br>[#fanart](https://mastodon.art/tags/fanart) '
'[#mastoart](https://mastodon.art/tags/mastoart)</p>',
public=True,
provider_display_name="Mastodon",
id=f"http://127.0.0.1:8000/post/123456/",
activity_id=f"http://127.0.0.1:8000/post/123456/#create",
actor_id=f"http://127.0.0.1:8000/profile/123456/",
)
@pytest.fixture
def activitypubprofile():
return ActivitypubProfile(

Wyświetl plik

@ -36,7 +36,6 @@ setup(
"lxml>=3.4.0",
"ipdata>=3.0",
"iteration_utilities",
"markdownify",
"jsonschema>=2.0.0",
"pycryptodome>=3.4.10",
"python-dateutil>=2.4.0",