From 4ad4f468a44cf068cdb616bf7e57d7da3cca5349 Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Tue, 20 Dec 2022 11:39:45 +0000 Subject: [PATCH] Refactor HTML rendering into one place Also suppress using external tags for now, until we can separate them from hashtags properly. --- .pre-commit-config.yaml | 1 + activities/models/emoji.py | 60 +++------ activities/models/hashtag.py | 14 --- activities/models/post.py | 19 +-- activities/templatetags/activity_tags.py | 13 -- activities/templatetags/emoji_tags.py | 27 ---- activities/views/posts.py | 5 + api/views/emoji.py | 2 +- core/html.py | 118 +++++++++++++++++- setup.cfg | 1 + templates/identity/view.html | 1 - tests/activities/models/test_hashtag.py | 3 +- tests/activities/models/test_post.py | 10 ++ tests/activities/models/test_post_types.py | 26 ++-- tests/activities/services/test_post.py | 2 +- .../templatetags/test_activity_tags.py | 16 +-- tests/core/test_html.py | 6 +- users/models/identity.py | 18 ++- 18 files changed, 179 insertions(+), 163 deletions(-) delete mode 100644 activities/templatetags/emoji_tags.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b6582c..400f20c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,6 +46,7 @@ repos: rev: v0.991 hooks: - id: mypy + exclude: "^tests/" additional_dependencies: [ types-pyopenssl, diff --git a/activities/models/emoji.py b/activities/models/emoji.py index 8068881..5a0c0c8 100644 --- a/activities/models/emoji.py +++ b/activities/models/emoji.py @@ -1,10 +1,11 @@ import re from functools import partial -from typing import ClassVar, cast +from typing import ClassVar import httpx import urlman from asgiref.sync import sync_to_async +from cachetools import TTLCache, cached from django.conf import settings from django.core.exceptions import ValidationError from django.db import models @@ -50,17 +51,18 @@ class EmojiStates(StateGraph): class EmojiQuerySet(models.QuerySet): def usable(self, domain: Domain | None = None): - if domain is None or domain.local: - visible_q = models.Q(local=True) - else: - visible_q = models.Q(public=True) - if Config.system.emoji_unreviewed_are_public: - visible_q |= models.Q(public__isnull=True) - + """ + Returns all usable emoji, optionally filtering by domain too. + """ + visible_q = models.Q(local=True) | models.Q(public=True) + if Config.system.emoji_unreviewed_are_public: + visible_q |= models.Q(public__isnull=True) qs = self.filter(visible_q) + if domain: if not domain.local: qs = qs.filter(domain=domain) + return qs @@ -136,6 +138,13 @@ class Emoji(StatorModel): def load_locals(cls) -> dict[str, "Emoji"]: return {x.shortcode: x for x in Emoji.objects.usable().filter(local=True)} + @classmethod + @cached(cache=TTLCache(maxsize=1000, ttl=60)) + def for_domain(cls, domain: Domain | None) -> list["Emoji"]: + if not domain: + return list(cls.locals.values()) + return list(cls.objects.usable(domain)) + @property def fullcode(self): return f":{self.shortcode}:" @@ -164,41 +173,6 @@ class Emoji(StatorModel): ) return self.fullcode - @classmethod - def imageify_emojis( - cls, - content: str, - *, - emojis: list["Emoji"] | EmojiQuerySet | None = None, - include_local: bool = True, - ): - """ - Find :emoji: in content and convert to . If include_local is True, - the local emoji will be used as a fallback for any shortcodes not defined - by emojis. - """ - emoji_set = ( - cast(list[Emoji], list(cls.locals.values())) if include_local else [] - ) - - if emojis: - if isinstance(emojis, (EmojiQuerySet, list)): - emoji_set.extend(list(emojis)) - else: - raise TypeError("Unsupported type for emojis") - - possible_matches = { - emoji.shortcode: emoji.as_html() for emoji in emoji_set if emoji.is_usable - } - - def replacer(match): - fullcode = match.group(1).lower() - if fullcode in possible_matches: - return possible_matches[fullcode] - return match.group() - - return mark_safe(Emoji.emoji_regex.sub(replacer, content)) - @classmethod def emojis_from_content(cls, content: str, domain: Domain | None) -> list[str]: """ diff --git a/activities/models/hashtag.py b/activities/models/hashtag.py index 078f110..afe2d94 100644 --- a/activities/models/hashtag.py +++ b/activities/models/hashtag.py @@ -5,7 +5,6 @@ import urlman from asgiref.sync import sync_to_async from django.db import models from django.utils import timezone -from django.utils.safestring import mark_safe from core.html import strip_html from core.models import Config @@ -176,19 +175,6 @@ class Hashtag(StatorModel): hashtags = sorted({tag.lower() for tag in hashtag_hits}) return list(hashtags) - @classmethod - def linkify_hashtags(cls, content, domain=None) -> str: - def replacer(match): - hashtag = match.group(1) - if domain: - return f'#{hashtag}' - else: - return ( - f'#{hashtag}' - ) - - return mark_safe(Hashtag.hashtag_regex.sub(replacer, content)) - def to_mastodon_json(self): return { "name": self.hashtag, diff --git a/activities/models/post.py b/activities/models/post.py index 51e1013..cce8af1 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -20,8 +20,7 @@ from activities.models.post_types import ( PostTypeDataDecoder, PostTypeDataEncoder, ) -from activities.templatetags.emoji_tags import imageify_emojis -from core.html import sanitize_post, strip_html +from core.html import ContentRenderer, strip_html from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date from stator.exceptions import TryAgainLater from stator.models import State, StateField, StateGraph, StatorModel @@ -383,13 +382,7 @@ class Post(StatorModel): return mark_safe(self.mention_regex.sub(replacer, content)) def _safe_content_note(self, *, local: bool = True): - content = Hashtag.linkify_hashtags( - self.linkify_mentions(sanitize_post(self.content), local=local), - domain=None if local else self.author.domain, - ) - if local: - content = imageify_emojis(content, self.author.domain) - return content + return ContentRenderer(local=local).render_post(self.content, self) # def _safe_content_question(self, *, local: bool = True): # context = { @@ -432,12 +425,6 @@ class Post(StatorModel): """ return self.safe_content(local=False) - def safe_content_plain(self): - """ - Returns the content formatted as plain text - """ - return self.linkify_mentions(sanitize_post(self.content)) - ### Async helpers ### async def afetch_full(self) -> "Post": @@ -914,7 +901,7 @@ class Post(StatorModel): "poll": None, "card": None, "language": None, - "text": self.safe_content_plain(), + "text": self.safe_content_remote(), "edited_at": format_ld_date(self.edited) if self.edited else None, } if interactions: diff --git a/activities/templatetags/activity_tags.py b/activities/templatetags/activity_tags.py index fb822f6..571e2d6 100644 --- a/activities/templatetags/activity_tags.py +++ b/activities/templatetags/activity_tags.py @@ -3,8 +3,6 @@ import datetime from django import template from django.utils import timezone -from activities.models import Hashtag - register = template.Library() @@ -33,14 +31,3 @@ def timedeltashort(value: datetime.datetime): years = max(days // 365.25, 1) text = f"{years:0n}y" return text - - -@register.filter -def linkify_hashtags(value: str): - """ - Convert hashtags in content in to /tags// links. - """ - if not value: - return "" - - return Hashtag.linkify_hashtags(value) diff --git a/activities/templatetags/emoji_tags.py b/activities/templatetags/emoji_tags.py deleted file mode 100644 index ad221db..0000000 --- a/activities/templatetags/emoji_tags.py +++ /dev/null @@ -1,27 +0,0 @@ -from cachetools import TTLCache, cached -from django import template - -from activities.models import Emoji -from users.models import Domain - -register = template.Library() - - -@cached(cache=TTLCache(maxsize=1000, ttl=60)) -def emoji_from_domain(domain: Domain | None) -> list[Emoji]: - if not domain: - return list(Emoji.locals.values()) - return list(Emoji.objects.usable(domain)) - - -@register.filter -def imageify_emojis(value: str, arg: Domain | None = None): - """ - Convert hashtags in content in to /tags// links. - """ - if not value: - return "" - - emojis = emoji_from_domain(arg) - - return Emoji.imageify_emojis(value, emojis=emojis) diff --git a/activities/views/posts.py b/activities/views/posts.py index 967352e..36f8fb3 100644 --- a/activities/views/posts.py +++ b/activities/views/posts.py @@ -40,6 +40,11 @@ class Individual(TemplateView): ancestors, descendants = PostService(self.post_obj).context( self.request.identity ) + print( + self.post_obj.to_mastodon_json(), + self.post_obj.emojis.all(), + self.post_obj.emojis.usable(), + ) return { "identity": self.identity, "post": self.post_obj, diff --git a/api/views/emoji.py b/api/views/emoji.py index c4e7558..a0ffbc3 100644 --- a/api/views/emoji.py +++ b/api/views/emoji.py @@ -5,4 +5,4 @@ from api.views.base import api_router @api_router.get("/v1/custom_emojis", response=list[CustomEmoji]) def emojis(request): - return [e.to_mastodon_json() for e in Emoji.objects.usable()] + return [e.to_mastodon_json() for e in Emoji.objects.usable().filter(local=True)] diff --git a/core/html.py b/core/html.py index 0ab6ace..5af476e 100644 --- a/core/html.py +++ b/core/html.py @@ -15,12 +15,12 @@ def allow_a(tag: str, name: str, value: str): return False -def sanitize_post(post_html: str) -> str: +def sanitize_html(post_html: str) -> str: """ Only allows a, br, p and span tags, and class attributes. """ cleaner = bleach.Cleaner( - tags=["br", "p", "a"], + tags=["br", "p"], attributes={ # type:ignore "a": allow_a, "p": ["class"], @@ -50,3 +50,117 @@ def html_to_plaintext(post_html: str) -> str: # Remove all other HTML and return cleaner = bleach.Cleaner(tags=[], strip=True, filters=[]) return cleaner.clean(post_html).strip() + + +class ContentRenderer: + """ + Renders HTML for posts, identity fields, and more. + + The `local` parameter affects whether links are absolute (False) or relative (True) + """ + + def __init__(self, local: bool): + self.local = local + + def render_post(self, html: str, post) -> str: + """ + Given post HTML, normalises it and renders it for presentation. + """ + if not html: + return "" + html = sanitize_html(html) + html = self.linkify_mentions(html, post=post) + html = self.linkify_hashtags(html, identity=post.author) + if self.local: + html = self.imageify_emojis(html, identity=post.author) + return mark_safe(html) + + def render_identity(self, html: str, identity, strip: bool = False) -> str: + """ + Given identity field HTML, normalises it and renders it for presentation. + """ + if not html: + return "" + if strip: + html = strip_html(html) + else: + html = sanitize_html(html) + html = self.linkify_hashtags(html, identity=identity) + if self.local: + html = self.imageify_emojis(html, identity=identity) + return mark_safe(html) + + def linkify_mentions(self, html: str, post) -> str: + """ + Links mentions _in the context of the post_ - as in, using the mentions + property as the only source (as we might be doing this without other + DB access allowed) + """ + from activities.models import Post + + possible_matches = {} + for mention in post.mentions.all(): + if self.local: + url = str(mention.urls.view) + else: + url = mention.absolute_profile_uri() + possible_matches[mention.username] = url + possible_matches[f"{mention.username}@{mention.domain_id}"] = url + + collapse_name: dict[str, str] = {} + + def replacer(match): + precursor = match.group(1) + handle = match.group(2).lower() + if "@" in handle: + short_handle = handle.split("@", 1)[0] + else: + short_handle = handle + if handle in possible_matches: + if short_handle not in collapse_name: + collapse_name[short_handle] = handle + elif collapse_name.get(short_handle) != handle: + short_handle = handle + return f'{precursor}@{short_handle}' + else: + return match.group() + + return Post.mention_regex.sub(replacer, html) + + def linkify_hashtags(self, html, identity) -> str: + from activities.models import Hashtag + + def replacer(match): + hashtag = match.group(1) + if self.local: + return ( + f'#{hashtag}' + ) + else: + return f'#{hashtag}' + + return Hashtag.hashtag_regex.sub(replacer, html) + + def imageify_emojis(self, html: str, identity, include_local: bool = True): + """ + Find :emoji: in content and convert to . If include_local is True, + the local emoji will be used as a fallback for any shortcodes not defined + by emojis. + """ + from activities.models import Emoji + + emoji_set = Emoji.for_domain(identity.domain) + if include_local: + emoji_set.extend(Emoji.for_domain(None)) + + possible_matches = { + emoji.shortcode: emoji.as_html() for emoji in emoji_set if emoji.is_usable + } + + def replacer(match): + fullcode = match.group(1).lower() + if fullcode in possible_matches: + return possible_matches[fullcode] + return match.group() + + return Emoji.emoji_regex.sub(replacer, html) diff --git a/setup.cfg b/setup.cfg index 3503f61..0d26527 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,6 +15,7 @@ filterwarnings = [mypy] warn_unused_ignores = True +exclude = tests [mypy-django.*] ignore_missing_imports = True diff --git a/templates/identity/view.html b/templates/identity/view.html index e68ebc1..e6e37e6 100644 --- a/templates/identity/view.html +++ b/templates/identity/view.html @@ -1,5 +1,4 @@ {% extends "base.html" %} -{% load emoji_tags %} {% block title %}{{ identity }}{% endblock %} diff --git a/tests/activities/models/test_hashtag.py b/tests/activities/models/test_hashtag.py index 91af45c..f54cd4a 100644 --- a/tests/activities/models/test_hashtag.py +++ b/tests/activities/models/test_hashtag.py @@ -1,4 +1,5 @@ from activities.models import Hashtag +from core.html import ContentRenderer def test_hashtag_from_content(): @@ -19,7 +20,7 @@ def test_hashtag_from_content(): def test_linkify_hashtag(): - linkify = Hashtag.linkify_hashtags + linkify = lambda html: ContentRenderer(local=True).linkify_hashtags(html, None) assert linkify("# hashtag") == "# hashtag" assert ( diff --git a/tests/activities/models/test_post.py b/tests/activities/models/test_post.py index ff74a9e..65ca303 100644 --- a/tests/activities/models/test_post.py +++ b/tests/activities/models/test_post.py @@ -9,6 +9,16 @@ def test_fetch_post(httpx_mock: HTTPXMock, config_system): """ Tests that a post we don't have locally can be fetched by by_object_uri """ + httpx_mock.add_response( + url="https://example.com/test-actor", + json={ + "@context": [ + "https://www.w3.org/ns/activitystreams", + ], + "id": "https://example.com/test-actor", + "type": "Person", + }, + ) httpx_mock.add_response( url="https://example.com/test-post", json={ diff --git a/tests/activities/models/test_post_types.py b/tests/activities/models/test_post_types.py index 857d495..30fd1ad 100644 --- a/tests/activities/models/test_post_types.py +++ b/tests/activities/models/test_post_types.py @@ -6,19 +6,19 @@ from core.ld import canonicalise @pytest.mark.django_db -def test_question_post(config_system, identity, remote_identity): +def test_question_post(config_system, identity, remote_identity, httpx_mock): data = { "cc": [], - "id": "https://fosstodon.org/users/manfre/statuses/109519951621804608/activity", + "id": "https://remote.test/test-actor/statuses/109519951621804608/activity", "to": identity.absolute_profile_uri(), "type": "Create", - "actor": "https://fosstodon.org/users/manfre", + "actor": "https://remote.test/test-actor/", "object": { "cc": [], - "id": "https://fosstodon.org/users/manfre/statuses/109519951621804608", + "id": "https://remote.test/test-actor/statuses/109519951621804608", "to": identity.absolute_profile_uri(), "tag": [], - "url": "https://fosstodon.org/@manfre/109519951621804608", + "url": "https://remote.test/test-actor/109519951621804608", "type": "Question", "oneOf": [ { @@ -35,13 +35,13 @@ def test_question_post(config_system, identity, remote_identity): "content": '

This is a poll :python:

@mike

', "endTime": "2022-12-18T22:03:59Z", "replies": { - "id": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies", + "id": "https://remote.test/test-actor/statuses/109519951621804608/replies", "type": "Collection", "first": { - "next": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies?only_other_accounts=true&page=true", + "next": "https://remote.test/test-actor/109519951621804608/replies?only_other_accounts=true&page=true", "type": "CollectionPage", "items": [], - "partOf": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies", + "partOf": "https://remote.test/test-actor/109519951621804608/replies", }, }, "published": "2022-12-15T22:03:59Z", @@ -50,15 +50,9 @@ def test_question_post(config_system, identity, remote_identity): "en": '

This is a poll :python:

@mike

' }, "as:sensitive": False, - "attributedTo": "https://fosstodon.org/users/manfre", - "http://ostatus.org#atomUri": "https://fosstodon.org/users/manfre/statuses/109519951621804608", - "http://ostatus.org#conversation": "tag:fosstodon.org,2022-12-15:objectId=69494364:objectType=Conversation", - "http://joinmastodon.org/ns#votersCount": 0, + "attributedTo": "https://remote.test/test-actor/", + "toot:votersCount": 0, }, - "@context": [ - "https://www.w3.org/ns/activitystreams", - "https://w3id.org/security/v1", - ], "published": "2022-12-15T22:03:59Z", } diff --git a/tests/activities/services/test_post.py b/tests/activities/services/test_post.py index 7069de1..4453fc8 100644 --- a/tests/activities/services/test_post.py +++ b/tests/activities/services/test_post.py @@ -6,7 +6,7 @@ from users.models import Identity @pytest.mark.django_db -def test_post_context(identity: Identity): +def test_post_context(identity: Identity, config_system): """ Tests that post context fetching works correctly """ diff --git a/tests/activities/templatetags/test_activity_tags.py b/tests/activities/templatetags/test_activity_tags.py index 9426337..0c481a2 100644 --- a/tests/activities/templatetags/test_activity_tags.py +++ b/tests/activities/templatetags/test_activity_tags.py @@ -2,7 +2,7 @@ from datetime import timedelta from django.utils import timezone -from activities.templatetags.activity_tags import linkify_hashtags, timedeltashort +from activities.templatetags.activity_tags import timedeltashort def test_timedeltashort(): @@ -22,17 +22,3 @@ def test_timedeltashort(): assert timedeltashort(value - timedelta(days=364)) == "364d" assert timedeltashort(value - timedelta(days=365)) == "1y" assert timedeltashort(value - timedelta(days=366)) == "1y" - - -def test_linkify_hashtags(): - """ - Tests that linkify_hashtags works correctly - """ - - assert linkify_hashtags(None) == "" - assert linkify_hashtags("") == "" - - assert ( - linkify_hashtags("#Takahe") - == '#Takahe' - ) diff --git a/tests/core/test_html.py b/tests/core/test_html.py index d4c74dc..5d798ac 100644 --- a/tests/core/test_html.py +++ b/tests/core/test_html.py @@ -1,4 +1,4 @@ -from core.html import html_to_plaintext, sanitize_post +from core.html import html_to_plaintext, sanitize_html def test_html_to_plaintext(): @@ -17,5 +17,5 @@ def test_html_to_plaintext(): def test_sanitize_post(): - assert sanitize_post("

Hello!

") == "

Hello!

" - assert sanitize_post("

It's great

") == "

It's great

" + assert sanitize_html("

Hello!

") == "

Hello!

" + assert sanitize_html("

It's great

") == "

It's great

" diff --git a/users/models/identity.py b/users/models/identity.py index cdacf34..acea1ee 100644 --- a/users/models/identity.py +++ b/users/models/identity.py @@ -11,7 +11,7 @@ from django.utils import timezone from django.utils.functional import lazy from core.exceptions import ActorMismatchError -from core.html import sanitize_post, strip_html +from core.html import ContentRenderer, strip_html from core.ld import ( canonicalise, format_ld_date, @@ -192,20 +192,18 @@ class Identity(StatorModel): @property def safe_summary(self): - from activities.templatetags.emoji_tags import imageify_emojis - - return imageify_emojis(sanitize_post(self.summary), self.domain) + return ContentRenderer(local=True).render_identity(self.summary, self) @property def safe_metadata(self): - from activities.templatetags.emoji_tags import imageify_emojis + renderer = ContentRenderer(local=True) if not self.metadata: return [] return [ { - "name": imageify_emojis(strip_html(data["name"]), self.domain), - "value": imageify_emojis(strip_html(data["value"]), self.domain), + "name": renderer.render_identity(data["name"], self, strip=True), + "value": renderer.render_identity(data["value"], self, strip=True), } for data in self.metadata ] @@ -279,9 +277,9 @@ class Identity(StatorModel): """ Return the name_or_handle with any HTML substitutions made """ - from activities.templatetags.emoji_tags import imageify_emojis - - return imageify_emojis(self.name_or_handle, self.domain) + return ContentRenderer(local=True).render_identity( + self.name_or_handle, self, strip=True + ) @property def handle(self):