From 82ac0ce3cfec66ac1974b80197f637b8f62ee561 Mon Sep 17 00:00:00 2001 From: Jason Robinson Date: Tue, 29 Dec 2020 23:12:06 +0200 Subject: [PATCH] Refactor up the embedded image recognition code from ActivityPub AttachImagesMixin to the base RawContentMixin so Matrix can use that too to upload images pre sending. --- federation/entities/activitypub/entities.py | 12 +++--------- federation/entities/mixins.py | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/federation/entities/activitypub/entities.py b/federation/entities/activitypub/entities.py index f10963e..ad8e270 100644 --- a/federation/entities/activitypub/entities.py +++ b/federation/entities/activitypub/entities.py @@ -1,5 +1,4 @@ import logging -import re import uuid from typing import Dict, List @@ -26,16 +25,11 @@ class AttachImagesMixin(RawContentMixin): Attach any embedded images from raw_content. """ super().pre_send() - if self._media_type != "text/markdown": - return - regex = r"!\[([\w ]*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)" - matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE) - for match in matches: - groups = match.groups() + for image in self.embedded_images: self._children.append( ActivitypubImage( - url=groups[1], - name=groups[0] or "", + url=image[0], + name=image[1], inline=True, ) ) diff --git a/federation/entities/mixins.py b/federation/entities/mixins.py index 04580cb..23a8822 100644 --- a/federation/entities/mixins.py +++ b/federation/entities/mixins.py @@ -2,7 +2,7 @@ import datetime import importlib import re import warnings -from typing import List, Set, Union, Dict +from typing import List, Set, Union, Dict, Tuple from commonmark import commonmark @@ -202,6 +202,24 @@ class RawContentMixin(BaseEntity): super().__init__(*args, **kwargs) self._required += ["raw_content"] + @property + def embedded_images(self) -> List[Tuple[str, str]]: + """ + Returns a list of images from the raw_content. + Currently only markdown supported. + + Returns a Tuple of (url, filename). + """ + images = [] + if self._media_type != "text/markdown": + return images + regex = r"!\[([\w ]*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)" + matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE) + for match in matches: + groups = match.groups() + images.append((groups[1], groups[0] or "")) + return images + @property def rendered_content(self) -> str: """Returns the rendered version of raw_content, or just raw_content."""