Do not assume that the last part of a mention.href is the user's name. Adjust patterns to match a leading whitespace or the beginning.

ap-processing-improvements
Alain St-Denis 2023-07-13 11:09:00 -04:00
rodzic 47af44582c
commit d577e39777
2 zmienionych plików z 9 dodań i 5 usunięć

Wyświetl plik

@ -857,9 +857,13 @@ class Note(Object, RawContentMixin):
def _find_and_mark_mentions(self): def _find_and_mark_mentions(self):
mentions = [mention for mention in self.tag_objects if isinstance(mention, Mention)] mentions = [mention for mention in self.tag_objects if isinstance(mention, Mention)]
hrefs = [mention.href for mention in mentions] hrefs = []
# add Mastodon's form for mention in mentions:
hrefs.extend([re.sub(r'/(users/)([\w]+)$', r'/@\2', href) for href in hrefs]) hrefs.append(mention.href)
# add Mastodon's form
parsed = urlparse(mention.href)
username = mention.name.lstrip('@').split('@')[0]
hrefs.append(f'{parsed.scheme}://{parsed.netloc}/@{username}')
for href in hrefs: for href in hrefs:
links = self._soup.find_all(href=href) links = self._soup.find_all(href=href)
for link in links: for link in links:

Wyświetl plik

@ -9,8 +9,8 @@ from bs4.element import NavigableString
from commonmark import commonmark from commonmark import commonmark
ILLEGAL_TAG_CHARS = "!#$%^&*+.,@£/()=?`'\\{[]}~;:\"’”—\xa0" ILLEGAL_TAG_CHARS = "!#$%^&*+.,@£/()=?`'\\{[]}~;:\"’”—\xa0"
TAG_PATTERN = re.compile(r'(#[\w]+)', re.UNICODE) TAG_PATTERN = re.compile(r'(^|\s)(#[\w]+)', re.UNICODE)
MENTION_PATTERN = re.compile(r'(@{?[\S ]?[^{}@]+[@;]?\s*[\w\-./@]+[\w/]+}?)', re.UNICODE) MENTION_PATTERN = re.compile(r'(^|\s)(@{?[\S ]?[^{}@]+[@;]?\s*[\w\-./@]+[\w/]+}?)', re.UNICODE)
def decode_if_bytes(text): def decode_if_bytes(text):