From d577e39777e39801ce9783ddbb0254e6e45ac263 Mon Sep 17 00:00:00 2001 From: Alain St-Denis Date: Thu, 13 Jul 2023 11:09:00 -0400 Subject: [PATCH] Do not assume that the last part of a mention.href is the user's name. Adjust patterns to match a leading whitespace or the beginning. --- federation/entities/activitypub/models.py | 10 +++++++--- federation/utils/text.py | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/federation/entities/activitypub/models.py b/federation/entities/activitypub/models.py index 5bdcff7..f9df860 100644 --- a/federation/entities/activitypub/models.py +++ b/federation/entities/activitypub/models.py @@ -857,9 +857,13 @@ class Note(Object, RawContentMixin): def _find_and_mark_mentions(self): mentions = [mention for mention in self.tag_objects if isinstance(mention, Mention)] - hrefs = [mention.href for mention in mentions] - # add Mastodon's form - hrefs.extend([re.sub(r'/(users/)([\w]+)$', r'/@\2', href) for href in hrefs]) + hrefs = [] + for mention in mentions: + hrefs.append(mention.href) + # add Mastodon's form + parsed = urlparse(mention.href) + username = mention.name.lstrip('@').split('@')[0] + hrefs.append(f'{parsed.scheme}://{parsed.netloc}/@{username}') for href in hrefs: links = self._soup.find_all(href=href) for link in links: diff --git a/federation/utils/text.py b/federation/utils/text.py index e2cd78c..3291fe8 100644 --- a/federation/utils/text.py +++ b/federation/utils/text.py @@ -9,8 +9,8 @@ from bs4.element import NavigableString from commonmark import commonmark ILLEGAL_TAG_CHARS = "!#$%^&*+.,@£/()=?`'\\{[]}~;:\"’”—\xa0" -TAG_PATTERN = re.compile(r'(#[\w]+)', re.UNICODE) -MENTION_PATTERN = re.compile(r'(@{?[\S ]?[^{}@]+[@;]?\s*[\w\-./@]+[\w/]+}?)', re.UNICODE) +TAG_PATTERN = re.compile(r'(^|\s)(#[\w]+)', re.UNICODE) +MENTION_PATTERN = re.compile(r'(^|\s)(@{?[\S ]?[^{}@]+[@;]?\s*[\w\-./@]+[\w/]+}?)', re.UNICODE) def decode_if_bytes(text):