diff --git a/src/auto_archiver/archivers/twitter_archiver.py b/src/auto_archiver/archivers/twitter_archiver.py index a925be6..995910b 100644 --- a/src/auto_archiver/archivers/twitter_archiver.py +++ b/src/auto_archiver/archivers/twitter_archiver.py @@ -114,6 +114,10 @@ class TwitterArchiver(Archiver): result = Metadata() tweet = r.json() + if tweet.get('__typename') == 'TweetTombstone': + logger.error(f"Failed to get tweet {tweet_id}: {tweet['tombstone']['text']['text']}") + return False + urls = [] for p in tweet.get("photos", []): urls.append(p["url"]) @@ -135,7 +139,7 @@ class TwitterArchiver(Archiver): media.filename = self.download_from_url(u, f'{slugify(url)}_{i}{ext}') result.add_media(media) - + result.set_title(tweet.get("text")).set_content(json.dumps(tweet, ensure_ascii=False)).set_timestamp(datetime.strptime(tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")) return result.success("twitter-syndication") diff --git a/tests/archivers/test_twitter_archiver.py b/tests/archivers/test_twitter_archiver.py index 858f12c..e63573a 100644 --- a/tests/archivers/test_twitter_archiver.py +++ b/tests/archivers/test_twitter_archiver.py @@ -69,19 +69,6 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): chosen_variant = self.archiver.choose_variant(variant_list) assert chosen_variant == variant_list[3] - @pytest.mark.download - def test_youtube_dlp_archiver(self): - - url = "https://x.com/bellingcat/status/1874097816571961839" - post = self.archiver.download_yt_dlp(self.create_item(url), url, "1874097816571961839") - assert post - self.assertValidResponseMetadata( - post, - "As 2024 comes to a close, here’s some examples of what Bellingcat investigated per month in our 10th year! 🧵", - datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), - "twitter-ytdl" - ) - def test_reverse_engineer_token(self): # see Vercel's implementation here: https://github.com/vercel/react-tweet/blob/main/packages/react-tweet/src/api/fetch-tweet.ts#L27C1-L31C2 # and the discussion here: https://github.com/JustAnotherArchivist/snscrape/issues/996#issuecomment-2211358215 @@ -94,7 +81,21 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): ("1346554693649113090", "39ibqxei7mo"),]: generated_token = self.archiver.generate_token(tweet_id) self.assertEqual(real_token, generated_token) - + + @pytest.mark.download + def test_youtube_dlp_archiver(self): + + url = "https://x.com/bellingcat/status/1874097816571961839" + post = self.archiver.download_yt_dlp(self.create_item(url), url, "1874097816571961839") + assert post + self.assertValidResponseMetadata( + post, + "As 2024 comes to a close, here’s some examples of what Bellingcat investigated per month in our 10th year! 🧵", + datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), + "twitter-ytdl" + ) + + @pytest.mark.download def test_syndication_archiver(self): url = "https://x.com/bellingcat/status/1874097816571961839" @@ -106,12 +107,14 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc) ) + @pytest.mark.download def test_download_nonexistend_tweet(self): # this tweet does not exist url = "https://x.com/Bellingcat/status/17197025860711058" response = self.archiver.download(self.create_item(url)) self.assertFalse(response) - + + @pytest.mark.download def test_download_malformed_tweetid(self): # this tweet does not exist url = "https://x.com/Bellingcat/status/1719702586071100058" @@ -147,8 +150,7 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): """Download tweets with sensitive media - Note: currently failing, youtube-dlp requres logged in users""" - + Note: currently failing, youtube-dlp requres logged in users + download_syndication requires logging in""" test_data = [ ("https://x.com/SozinhoRamalho/status/1876710769913450647", "ignore tweet, testing sensitivity warning nudity", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), "image_hash"),