From 3f98f1e04e6a41ba670b135a1bb720878a8a2eb4 Mon Sep 17 00:00:00 2001 From: Alain St-Denis Date: Sun, 11 Feb 2024 08:33:32 -0500 Subject: [PATCH] Select the img tag title property over the alt property for embedded images. Adjust the corresponding test. --- federation/entities/mixins.py | 18 +++++++++++------- federation/tests/fixtures/entities.py | 9 ++++++--- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/federation/entities/mixins.py b/federation/entities/mixins.py index d37fd93..8855011 100644 --- a/federation/entities/mixins.py +++ b/federation/entities/mixins.py @@ -224,13 +224,17 @@ class RawContentMixin(BaseEntity): Returns a Tuple of (url, filename). """ images = [] - if self._media_type != "text/markdown" or self.raw_content is None: - return images - regex = r"!\[([\w\s\-\']*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)" - matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE) - for match in matches: - groups = match.groups() - images.append((groups[1], groups[0] or "")) + if hasattr(self, '_soup'): + for img in self._soup.find_all('img', src=re.compile(r'^http')): + images.append((img['src'], img.get('title', '') or img.get('alt', ''))) + else: + if self._media_type != "text/markdown" or self.raw_content is None: + return images + regex = r"!\[([\w\s\-\']*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)" + matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE) + for match in matches: + groups = match.groups() + images.append((groups[1], groups[0] or "")) return images # Legacy. Keep this until tests are reworked diff --git a/federation/tests/fixtures/entities.py b/federation/tests/fixtures/entities.py index e555a97..7f48b43 100644 --- a/federation/tests/fixtures/entities.py +++ b/federation/tests/fixtures/entities.py @@ -1,5 +1,6 @@ import pytest # noinspection PyPackageRequirements +from commonmark import commonmark from freezegun import freeze_time from unittest.mock import patch @@ -152,8 +153,7 @@ def activitypubpost_tags(): @pytest.fixture def activitypubpost_embedded_images(): with freeze_time("2019-04-27"): - obj = models.Post( - raw_content=""" + raw_content=""" #Cycling #lauttasaari #sea #sun @@ -166,7 +166,10 @@ def activitypubpost_embedded_images(): [foo](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414710.jpg) #only a link, not embedded https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414711.jpg -""", +""" + obj = models.Post( + raw_content=raw_content, + rendered_content=commonmark(raw_content, ignore_html_blocks=True), public=True, provider_display_name="Socialhome", id=f"http://127.0.0.1:8000/post/123456/",