From dff01056594e0c145ed73d823eddb21e1bbcfff9 Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Mon, 20 Jan 2025 18:40:46 +0100
Subject: [PATCH] Small fixups + implement Truth code for posts with multiple
 media

---
 .../generic_archiver/generic_archiver.py      | 10 ++++++++--
 .../archivers/generic_archiver/truth.py       | 20 +++++++++++++------
 tests/archivers/test_generic_archiver.py      |  1 -
 3 files changed, 22 insertions(+), 9 deletions(-)
diff --git a/src/auto_archiver/archivers/generic_archiver/generic_archiver.py b/src/auto_archiver/archivers/generic_archiver/generic_archiver.py
index 00119f7..41f1314 100644
--- a/src/auto_archiver/archivers/generic_archiver/generic_archiver.py
+++ b/src/auto_archiver/archivers/generic_archiver/generic_archiver.py
@@ -181,7 +181,8 @@ class GenericArchiver(Archiver):
 
         return self.create_metadata_for_post(ie_instance, post_data, url)
         
-    def get_metatdata_for_video(self, info: dict, info_extractor: Type[InfoExtractor], url: str, ydl: yt_dlp.YoutubeDL) -> Metadata:
+    def get_metadata_for_video(self, info: dict, info_extractor: Type[InfoExtractor], url: str, ydl: yt_dlp.YoutubeDL) -> Metadata:
+
         # this time download
         ydl.params['getcomments'] = self.comments
         #TODO: for playlist or long lists of videos, how to download one at a time so they can be stored before the next one is downloaded?
@@ -233,13 +234,18 @@ class GenericArchiver(Archiver):
         result = False
 
         try:
+            if info_extractor.ie_key() == "Truth":
+                # the ytdlp truth extractor currently only gets the first image/video in the 'media' section, as opposed to all of them
+                # we don't want this
+                raise yt_dlp.utils.ExtractorError("Use the 'post data' method for Truth posts")
+
             # don't download since it can be a live stream
             info = ydl.extract_info(url, ie_key=info_extractor.ie_key(), download=False)
             if info.get('is_live', False) and not self.livestreams:
                 logger.warning("Livestream detected, skipping due to 'livestreams' configuration setting")
                 return False
             # it's a valid video, that the youtubdedl can download out of the box
-            result = self.get_metatdata_for_video(info, info_extractor, url, ydl)
+            result = self.get_metadata_for_video(info, info_extractor, url, ydl)
 
         except Exception as e:
             logger.debug(f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead')
diff --git a/src/auto_archiver/archivers/generic_archiver/truth.py b/src/auto_archiver/archivers/generic_archiver/truth.py
index 780a56e..00551f3 100644
--- a/src/auto_archiver/archivers/generic_archiver/truth.py
+++ b/src/auto_archiver/archivers/generic_archiver/truth.py
@@ -1,31 +1,39 @@
 import datetime
 
-from auto_archiver.utils import clean_html, traverse_obj
-from auto_archiver.core.metadata import Metadata
+from auto_archiver.utils import traverse_obj
+from auto_archiver.core.metadata import Metadata, Media
 from auto_archiver.archivers.archiver import Archiver
 
+from dateutil.parser import parse as parse_dt
+
 def create_metadata(post: dict, archiver: Archiver, url: str) -> Metadata:
     """
-    Creates metaata from a truth social post
+    Creates metadata from a truth social post
     
-    Only used for posts that contains no media. ytdlp.TruthIE extractor can handle posts with media
+    Only used for posts that contain no media. ytdlp.TruthIE extractor can handle posts with media
     
     Format is:
     
     {'id': '109598702184774628', 'created_at': '2022-12-29T19:51:18.161Z', 'in_reply_to_id': None, 'quote_id': None, 'in_reply_to_account_id': None, 'sensitive': False, 'spoiler_text': '', 'visibility': 'public', 'language': 'en', 'uri': 'https://truthsocial.com/@bbcnewa/109598702184774628', 'url': 'https://truthsocial.com/@bbcnewa/109598702184774628', 'content': '<p>Pele, regarded by many as football\'s greatest ever player, has died in Brazil at the age of 82. <a href="https://www.bbc.com/sport/football/42751517" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://www.</span><span class="ellipsis">bbc.com/sport/football/4275151</span><span class="invisible">7</span></a></p>', 'account': {'id': '107905163010312793', 'username': 'bbcnewa', 'acct': 'bbcnewa', 'display_name': 'BBC News', 'locked': False, 'bot': False, 'discoverable': True, 'group': False, 'created_at': '2022-03-05T17:42:01.159Z', 'note': '<p>News, features and analysis by the BBC</p>', 'url': 'https://truthsocial.com/@bbcnewa', 'avatar': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/avatars/107/905/163/010/312/793/original/e7c07550dc22c23a.jpeg', 'avatar_static': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/avatars/107/905/163/010/312/793/original/e7c07550dc22c23a.jpeg', 'header': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/headers/107/905/163/010/312/793/original/a00eeec2b57206c7.jpeg', 'header_static': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/headers/107/905/163/010/312/793/original/a00eeec2b57206c7.jpeg', 'followers_count': 1131, 'following_count': 3, 'statuses_count': 9, 'last_status_at': '2024-11-12', 'verified': False, 'location': '', 'website': 'https://www.bbc.com/news', 'unauth_visibility': True, 'chats_onboarded': True, 'feeds_onboarded': True, 'accepting_messages': False, 'show_nonmember_group_statuses': None, 'emojis': [], 'fields': [], 'tv_onboarded': True, 'tv_account': False}, 'media_attachments': [], 'mentions': [], 'tags': [], 'card': None, 'group': None, 'quote': None, 'in_reply_to': None, 'reblog': None, 'sponsored': False, 'replies_count': 1, 'reblogs_count': 0, 'favourites_count': 2, 'favourited': False, 'reblogged': False, 'muted': False, 'pinned': False, 'bookmarked': False, 'poll': None, 'emojis': []}
     """
+    breakpoint()
     result = Metadata()
     result.set_url(url)
     timestamp = post['created_at'] # format is 2022-12-29T19:51:18.161Z
-    result.set_timestamp(datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ"))
+    result.set_timestamp(parse_dt(timestamp))
     result.set('description', post['content'])
     result.set('author', post['account']['username'])
 
     for key in ['replies_count', 'reblogs_count', 'favourites_count', ('account', 'followers_count'), ('account', 'following_count'), ('account', 'statuses_count'), ('account', 'display_name'), 'language', 'in_reply_to_account', 'replies_count']:
         if isinstance(key, tuple):
-            store_key = u" ".join(key)
+            store_key = " ".join(key)
         else:
             store_key = key
         result.set(store_key, traverse_obj(post, key))
+    
+    # add the media
+    for media in post.get('media_attachments', []):
+        filename = archiver.download_from_url(media['url'])
+        result.add_media(Media(filename), id=media.get('id'))
 
     return result
\ No newline at end of file
diff --git a/tests/archivers/test_generic_archiver.py b/tests/archivers/test_generic_archiver.py
index a35d28d..d493437 100644
--- a/tests/archivers/test_generic_archiver.py
+++ b/tests/archivers/test_generic_archiver.py
@@ -125,7 +125,6 @@ class TestGenericArchiver(TestArchiverBase):
         assert len(result.media) == 1
         assert result is not False
 
-    @pytest.mark.skip("Currently failing, multiple images are not being downloaded - this is due to an issue with ytdlp extractor")
     @pytest.mark.download
     def test_truthsocial_download_multiple_images(self, make_item):
         item = make_item("https://truthsocial.com/@trrth/posts/113861302149349135")