kopia lustrzana https://gitlab.com/jaywink/federation
Merge branch 'inbound-ap-html' into 'master'
Stop markdownifying received ActivityPub content Closes socialhome/socialhome#198 and socialhome/socialhome#222 See merge request jaywink/federation!160merge-requests/161/merge
commit
b4cc7071f4
|
|
@ -32,6 +32,10 @@
|
|||
|
||||
* Don't include OStatus for Mastodon 3.0+ protocols list. ([related issue](https://github.com/thefederationinfo/the-federation.info/issues/217))
|
||||
|
||||
* **Backwards incompatible**: Stop markdownifying incoming ActivityPub content. Instead
|
||||
copy it as is to the ``raw_content`` attribute on the entity, setting also the
|
||||
``_media_type`` to ``text/html``.
|
||||
|
||||
### Fixed
|
||||
|
||||
* Don't crash loudly when fetching webfinger for Diaspora that does not contain XML.
|
||||
|
|
@ -59,6 +63,10 @@
|
|||
|
||||
* Don't try to relay AP payloads to Diaspora receivers and vice versa, for now, until cross-protocol
|
||||
relaying is supported.
|
||||
|
||||
* Fix some characters stopping tags being identified ([related issue](https://git.feneas.org/socialhome/socialhome/-/issues/222))
|
||||
|
||||
* Fix tags separated by slashes being identified ([related issue](https://git.feneas.org/socialhome/socialhome/-/issues/198))
|
||||
|
||||
## [0.19.0] - 2019-12-15
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ import re
|
|||
import uuid
|
||||
from typing import Dict, List
|
||||
|
||||
import bleach
|
||||
|
||||
from federation.entities.activitypub.constants import (
|
||||
CONTEXTS_DEFAULT, CONTEXT_MANUALLY_APPROVES_FOLLOWERS, CONTEXT_SENSITIVE, CONTEXT_HASHTAG,
|
||||
CONTEXT_LD_SIGNATURES)
|
||||
|
|
@ -57,15 +59,19 @@ class CleanContentMixin(RawContentMixin):
|
|||
"""
|
||||
Make linkified tags normal tags.
|
||||
"""
|
||||
def cleaner(match):
|
||||
return f"#{match.groups()[0]}"
|
||||
|
||||
super().post_receive()
|
||||
self.raw_content = re.sub(
|
||||
r'\[#([\w\-_]+)\]\(http?s://[a-zA-Z0-9/._-]+\)',
|
||||
cleaner,
|
||||
|
||||
def remove_tag_links(attrs, new=False):
|
||||
rel = (None, "rel")
|
||||
if attrs.get(rel) == "tag":
|
||||
return
|
||||
return attrs
|
||||
|
||||
self.raw_content = bleach.linkify(
|
||||
self.raw_content,
|
||||
re.MULTILINE,
|
||||
callbacks=[remove_tag_links],
|
||||
parse_email=False,
|
||||
skip_tags=["code", "pre"],
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
import logging
|
||||
from typing import List, Callable, Dict, Union, Optional
|
||||
|
||||
from markdownify import markdownify
|
||||
|
||||
from federation.entities.activitypub.constants import NAMESPACE_PUBLIC
|
||||
from federation.entities.activitypub.entities import (
|
||||
ActivitypubFollow, ActivitypubProfile, ActivitypubAccept, ActivitypubPost, ActivitypubComment,
|
||||
|
|
@ -259,19 +257,16 @@ def transform_attribute(
|
|||
elif key == "attributedTo" and is_object:
|
||||
transformed["actor_id"] = value
|
||||
elif key in ("content", "source"):
|
||||
if payload.get('source') and isinstance(payload.get("source"), dict):
|
||||
if payload.get('source') and isinstance(payload.get("source"), dict) and \
|
||||
payload.get('source').get('mediaType') == "text/markdown":
|
||||
transformed["_media_type"] = "text/markdown"
|
||||
transformed["raw_content"] = payload.get('source').get('content').strip()
|
||||
transformed["_rendered_content"] = payload.get('content').strip()
|
||||
if payload.get('source').get('mediaType') == "text/markdown":
|
||||
transformed["_media_type"] = "text/markdown"
|
||||
transformed["raw_content"] = payload.get('source').get('content').strip()
|
||||
else:
|
||||
transformed["raw_content"] = markdownify(payload.get('content').strip())
|
||||
transformed["_media_type"] = payload.get('source').get('mediaType')
|
||||
else:
|
||||
transformed["raw_content"] = markdownify(payload.get('content').strip()).strip()
|
||||
# Assume HTML by convention
|
||||
transformed["_rendered_content"] = payload.get('content').strip()
|
||||
transformed["_media_type"] = "text/html"
|
||||
transformed["raw_content"] = payload.get('content').strip()
|
||||
transformed["_rendered_content"] = transformed["raw_content"]
|
||||
elif key == "endpoints" and isinstance(value, dict):
|
||||
if "inboxes" not in transformed:
|
||||
transformed["inboxes"] = {"private": None, "public": None}
|
||||
|
|
|
|||
|
|
@ -409,10 +409,6 @@ class TestEntitiesPostReceive:
|
|||
"public": False,
|
||||
}]
|
||||
|
||||
def test_post__post_receive__cleans_linkified_tags(self, activitypubpost_linkified_tags):
|
||||
activitypubpost_linkified_tags.post_receive()
|
||||
assert activitypubpost_linkified_tags.raw_content == '<p>👁️foobar</p><p>barfoo!<br>#fanart #mastoart</p>'
|
||||
|
||||
|
||||
class TestEntitiesPreSend:
|
||||
def test_post_inline_images_are_attached(self, activitypubpost_embedded_images):
|
||||
|
|
|
|||
|
|
@ -67,7 +67,9 @@ class TestActivitypubEntityMappersReceive:
|
|||
post = entities[0]
|
||||
assert isinstance(post, ActivitypubPost)
|
||||
assert isinstance(post, Post)
|
||||
assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom'
|
||||
assert post.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \
|
||||
'href="https://dev.jasonrobinson.me/u/jaywink/">' \
|
||||
'@<span>jaywink</span></a></span> boom</p>'
|
||||
assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \
|
||||
'class="u-url mention">@<span>jaywink</span></a></span> boom</p>'
|
||||
assert post.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237"
|
||||
|
|
@ -82,7 +84,7 @@ class TestActivitypubEntityMappersReceive:
|
|||
post = entities[0]
|
||||
assert isinstance(post, ActivitypubPost)
|
||||
assert isinstance(post, Post)
|
||||
assert post.raw_content == 'boom #test'
|
||||
assert post.raw_content == '<p>boom #test</p>'
|
||||
|
||||
def test_message_to_objects_simple_post__with_mentions(self):
|
||||
entities = message_to_objects(ACTIVITYPUB_POST_WITH_MENTIONS, "https://mastodon.social/users/jaywink")
|
||||
|
|
@ -101,7 +103,9 @@ class TestActivitypubEntityMappersReceive:
|
|||
assert isinstance(post, Post)
|
||||
assert post.rendered_content == '<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" ' \
|
||||
'class="u-url mention">@<span>jaywink</span></a></span> boom</p>'
|
||||
assert post.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom\n\n'
|
||||
assert post.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \
|
||||
'href="https://dev.jasonrobinson.me/u/jaywink/">' \
|
||||
'@<span>jaywink</span></a></span> boom</p>'
|
||||
|
||||
def test_message_to_objects_simple_post__with_source__markdown(self):
|
||||
entities = message_to_objects(ACTIVITYPUB_POST_WITH_SOURCE_MARKDOWN, "https://diaspodon.fr/users/jaywink")
|
||||
|
|
@ -141,7 +145,9 @@ class TestActivitypubEntityMappersReceive:
|
|||
comment = entities[0]
|
||||
assert isinstance(comment, ActivitypubComment)
|
||||
assert isinstance(comment, Comment)
|
||||
assert comment.raw_content == '[@jaywink](https://dev.jasonrobinson.me/u/jaywink/) boom'
|
||||
assert comment.raw_content == '<p><span class="h-card"><a class="u-url mention" ' \
|
||||
'href="https://dev.jasonrobinson.me/u/jaywink/">' \
|
||||
'@<span>jaywink</span></a></span> boom</p>'
|
||||
assert comment.id == "https://diaspodon.fr/users/jaywink/statuses/102356911717767237"
|
||||
assert comment.actor_id == "https://diaspodon.fr/users/jaywink"
|
||||
assert comment.target_id == "https://dev.jasonrobinson.me/content/653bad70-41b3-42c9-89cb-c4ee587e68e4/"
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from freezegun import freeze_time
|
|||
from federation.entities.activitypub.entities import (
|
||||
ActivitypubPost, ActivitypubAccept, ActivitypubFollow, ActivitypubProfile, ActivitypubComment,
|
||||
ActivitypubRetraction, ActivitypubShare, ActivitypubImage)
|
||||
from federation.entities.base import Profile, Image
|
||||
from federation.entities.base import Profile
|
||||
from federation.entities.diaspora.entities import (
|
||||
DiasporaPost, DiasporaComment, DiasporaLike, DiasporaProfile, DiasporaRetraction,
|
||||
DiasporaContact, DiasporaReshare,
|
||||
|
|
@ -144,20 +144,6 @@ https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a902
|
|||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def activitypubpost_linkified_tags():
|
||||
with freeze_time("2019-04-27"):
|
||||
return ActivitypubPost(
|
||||
raw_content='<p>👁️foobar</p><p>barfoo!<br>[#fanart](https://mastodon.art/tags/fanart) '
|
||||
'[#mastoart](https://mastodon.art/tags/mastoart)</p>',
|
||||
public=True,
|
||||
provider_display_name="Mastodon",
|
||||
id=f"http://127.0.0.1:8000/post/123456/",
|
||||
activity_id=f"http://127.0.0.1:8000/post/123456/#create",
|
||||
actor_id=f"http://127.0.0.1:8000/profile/123456/",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def activitypubprofile():
|
||||
return ActivitypubProfile(
|
||||
|
|
|
|||
|
|
@ -34,12 +34,13 @@ class TestFindTags:
|
|||
assert text == "foo\n```\n#code\n```\n#notcode/notcode\n\n #alsocode\n"
|
||||
|
||||
def test_endings_are_filtered_out(self):
|
||||
source = "#parenthesis) #exp! #list]"
|
||||
source = "#parenthesis) #exp! #list] *#doh* _#bah_ #gah% #foo/#bar"
|
||||
tags, text = find_tags(source)
|
||||
assert tags == {"parenthesis", "exp", "list"}
|
||||
assert tags == {"parenthesis", "exp", "list", "doh", "bah", "gah", "foo", "bar"}
|
||||
assert text == source
|
||||
tags, text = find_tags(source, replacer=self._replacer)
|
||||
assert text == "#parenthesis/parenthesis) #exp/exp! #list/list]"
|
||||
assert text == "#parenthesis/parenthesis) #exp/exp! #list/list] *#doh/doh* _#bah/bah_ #gah/gah% " \
|
||||
"#foo/foo/#bar/bar"
|
||||
|
||||
def test_finds_tags(self):
|
||||
source = "#post **Foobar** #tag #OtherTag #third\n#fourth"
|
||||
|
|
@ -49,6 +50,14 @@ class TestFindTags:
|
|||
tags, text = find_tags(source, replacer=self._replacer)
|
||||
assert text == "#post/post **Foobar** #tag/tag #OtherTag/othertag #third/third\n#fourth/fourth"
|
||||
|
||||
def test_ok_with_html_tags_in_text(self):
|
||||
source = "<p>#starting and <span>#MixED</span> however not <#>this</#> or <#/>that"
|
||||
tags, text = find_tags(source)
|
||||
assert tags == {"starting", "mixed"}
|
||||
assert text == source
|
||||
tags, text = find_tags(source, replacer=self._replacer)
|
||||
assert text == "<p>#starting/starting and <span>#MixED/mixed</span> however not <#>this</#> or <#/>that"
|
||||
|
||||
def test_postfixed_tags(self):
|
||||
source = "#foo) #bar] #hoo, #hee."
|
||||
tags, text = find_tags(source)
|
||||
|
|
@ -66,7 +75,7 @@ class TestFindTags:
|
|||
assert text == "(#foo/foo [#bar/bar"
|
||||
|
||||
def test_invalid_text_returns_no_tags(self):
|
||||
source = "#a!a #a#a #a$a #a%a #a^a #a&a #a*a #a+a #a.a #a,a #a@a #a£a #a/a #a(a #a)a #a=a " \
|
||||
source = "#a!a #a#a #a$a #a%a #a^a #a&a #a*a #a+a #a.a #a,a #a@a #a£a #a(a #a)a #a=a " \
|
||||
"#a?a #a`a #a'a #a\\a #a{a #a[a #a]a #a}a #a~a #a;a #a:a #a\"a #a’a #a”a #\xa0cd"
|
||||
tags, text = find_tags(source)
|
||||
assert tags == set()
|
||||
|
|
@ -74,6 +83,14 @@ class TestFindTags:
|
|||
tags, text = find_tags(source, replacer=self._replacer)
|
||||
assert text == source
|
||||
|
||||
def test_start_of_paragraph_in_html_content(self):
|
||||
source = '<p>First line</p><p>#foobar #barfoo</p>'
|
||||
tags, text = find_tags(source)
|
||||
assert tags == {"foobar", "barfoo"}
|
||||
assert text == source
|
||||
tags, text = find_tags(source, replacer=self._replacer)
|
||||
assert text == '<p>First line</p><p>#foobar/foobar #barfoo/barfoo</p>'
|
||||
|
||||
|
||||
class TestProcessTextLinks:
|
||||
def test_link_at_start_or_end(self):
|
||||
|
|
@ -97,6 +114,12 @@ class TestProcessTextLinks:
|
|||
assert process_text_links('<a href="/streams/tag/foobar">#foobar</a>') == \
|
||||
'<a href="/streams/tag/foobar">#foobar</a>'
|
||||
|
||||
def test_does_not_remove_mention_classes(self):
|
||||
assert process_text_links('<p><span class="h-card"><a href="https://dev.jasonrobinson.me/u/jaywink/" '
|
||||
'class="u-url mention">@<span>jaywink</span></a></span> boom</p>') == \
|
||||
'<p><span class="h-card"><a class="u-url mention" href="https://dev.jasonrobinson.me/u/jaywink/" ' \
|
||||
'rel="nofollow" target="_blank">@<span>jaywink</span></a></span> boom</p>'
|
||||
|
||||
|
||||
def test_validate_handle():
|
||||
assert validate_handle("foo@bar.com")
|
||||
|
|
|
|||
|
|
@ -33,7 +33,9 @@ def find_tags(text: str, replacer: callable = None) -> Tuple[Set, str]:
|
|||
Returns a set of tags and the original or replaced text.
|
||||
"""
|
||||
found_tags = set()
|
||||
lines = text.splitlines(keepends=True)
|
||||
# <br> and <p> tags cause issues in us finding words - add some spacing around them
|
||||
new_text = text.replace("<br>", " <br> ").replace("<p>", " <p> ").replace("</p>", " </p> ")
|
||||
lines = new_text.splitlines(keepends=True)
|
||||
final_lines = []
|
||||
code_block = False
|
||||
final_text = None
|
||||
|
|
@ -49,17 +51,28 @@ def find_tags(text: str, replacer: callable = None) -> Tuple[Set, str]:
|
|||
# Check each word separately
|
||||
words = line.split(" ")
|
||||
for word in words:
|
||||
candidate = word.strip().strip("([]),.!?:")
|
||||
if candidate.startswith("#"):
|
||||
candidate = candidate.strip("#")
|
||||
if test_tag(candidate.lower()):
|
||||
found_tags.add(candidate.lower())
|
||||
if replacer:
|
||||
try:
|
||||
tag_word = word.replace("#%s" % candidate, replacer(candidate))
|
||||
final_words.append(tag_word)
|
||||
except Exception:
|
||||
final_words.append(word)
|
||||
if word.find('#') > -1:
|
||||
candidate = word.strip().strip("([]),.!?:*_%/")
|
||||
if candidate.find('<') > -1 or candidate.find('>') > -1:
|
||||
# Strip html
|
||||
candidate = bleach.clean(word, strip=True)
|
||||
# Now split with slashes
|
||||
candidates = candidate.split("/")
|
||||
to_replace = []
|
||||
for candidate in candidates:
|
||||
if candidate.startswith("#"):
|
||||
candidate = candidate.strip("#")
|
||||
if test_tag(candidate.lower()):
|
||||
found_tags.add(candidate.lower())
|
||||
to_replace.append(candidate)
|
||||
if replacer:
|
||||
tag_word = word
|
||||
try:
|
||||
for counter, replacee in enumerate(to_replace, 1):
|
||||
tag_word = tag_word.replace("#%s" % replacee, replacer(replacee))
|
||||
except Exception:
|
||||
pass
|
||||
final_words.append(tag_word)
|
||||
else:
|
||||
final_words.append(word)
|
||||
else:
|
||||
|
|
@ -67,6 +80,8 @@ def find_tags(text: str, replacer: callable = None) -> Tuple[Set, str]:
|
|||
final_lines.append(" ".join(final_words))
|
||||
if replacer:
|
||||
final_text = "".join(final_lines)
|
||||
if final_text:
|
||||
final_text = final_text.replace(" <br> ", "<br>").replace(" <p> ", "<p>").replace(" </p> ", "</p>")
|
||||
return found_tags, final_text or text
|
||||
|
||||
|
||||
|
|
|
|||
1
setup.py
1
setup.py
|
|
@ -36,7 +36,6 @@ setup(
|
|||
"lxml>=3.4.0",
|
||||
"ipdata>=3.0",
|
||||
"iteration_utilities",
|
||||
"markdownify",
|
||||
"jsonschema>=2.0.0",
|
||||
"pycryptodome>=3.4.10",
|
||||
"python-dateutil>=2.4.0",
|
||||
|
|
|
|||
Ładowanie…
Reference in New Issue