improving media page with images and videos

2022-06-15 16:38:30 +02:00 · 2022-06-15 16:38:30 +02:00 · 951b16ba9c
commit 951b16ba9c
--- a/archivers/base_archiver.py
+++ b/archivers/base_archiver.py
@ -1,4 +1,4 @@
-import os, datetime, shutil, hashlib, time, requests, re
+import os, datetime, shutil, hashlib, time, requests, re, mimetypes
 from dataclasses import dataclass
 from abc import ABC, abstractmethod
 from urllib.parse import urlparse
@ -58,7 +58,13 @@ class Archiver(ABC):
            <h3><a href="{url}">{url}</a></h3><ul>'''

        for url_info in urls_info:
-            page += f'''<li><a href="{url_info['cdn_url']}">{url_info['key']}</a>: {url_info['hash']}</li>'''
+            mime_global, mime_type = self._guess_file_type(url_info["key"])
+            preview = ""
+            if mime_global == "image":
+                preview = f'<img src="{url_info["cdn_url"]}" style="max-height:200px;max-width:200px;"></img>'
+            elif mime_global == "video":
+                preview = f'<video controls style="max-height:200px;max-width:200px;"><source src="{url_info["cdn_url"]}" type="{mime_type}"></source></video>'
+            page += f'''<li><a href="{url_info['cdn_url']}">{preview}{url_info['key']}</a>: {url_info['hash']}</li>'''

        page += f"</ul><h2>{self.name} object data:</h2><code>{object}</code>"
        page += f"</body></html>"
@ -77,8 +83,20 @@ class Archiver(ABC):
        page_cdn = self.storage.get_cdn_url(page_key)
        return (page_cdn, page_hash, thumbnail)

+    def _guess_file_type(self, path:str):
+        """
+        Receives a URL or filename and returns global mimetype like 'image' or 'video' and the specific mimetype as a tuple
+        see https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
+        ex: ('audio', 'audio/mp3')
+        """
+        mime = mimetypes.guess_type(path)[0]
+        if mime is not None:
+            return mime.split("/")[0], mime
+        return "", ""
+
+
    # eg images in a tweet save to cloud storage
-    def generate_media_page(self, urls, url, object):
+    def generate_media_page(self, urls, url, object, requester=requests):
        """
        For a list of media urls, fetch them, upload them
        and call self.generate_media_page_html with them
@ -94,7 +112,7 @@ class Archiver(ABC):

            filename = os.path.join(Storage.TMP_FOLDER, key)

-            d = requests.get(media_url, headers=headers)
+            d = requester.get(media_url, headers=headers)
            with open(filename, 'wb') as f:
                f.write(d.content)