kopia lustrzana https://github.com/bellingcat/auto-archiver
Flag tombstone tweets for twitter_syndication method
rodzic
57eacdc24a
commit
528b78db85
|
@ -114,6 +114,10 @@ class TwitterArchiver(Archiver):
|
||||||
result = Metadata()
|
result = Metadata()
|
||||||
tweet = r.json()
|
tweet = r.json()
|
||||||
|
|
||||||
|
if tweet.get('__typename') == 'TweetTombstone':
|
||||||
|
logger.error(f"Failed to get tweet {tweet_id}: {tweet['tombstone']['text']['text']}")
|
||||||
|
return False
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
for p in tweet.get("photos", []):
|
for p in tweet.get("photos", []):
|
||||||
urls.append(p["url"])
|
urls.append(p["url"])
|
||||||
|
|
|
@ -69,19 +69,6 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
||||||
chosen_variant = self.archiver.choose_variant(variant_list)
|
chosen_variant = self.archiver.choose_variant(variant_list)
|
||||||
assert chosen_variant == variant_list[3]
|
assert chosen_variant == variant_list[3]
|
||||||
|
|
||||||
@pytest.mark.download
|
|
||||||
def test_youtube_dlp_archiver(self):
|
|
||||||
|
|
||||||
url = "https://x.com/bellingcat/status/1874097816571961839"
|
|
||||||
post = self.archiver.download_yt_dlp(self.create_item(url), url, "1874097816571961839")
|
|
||||||
assert post
|
|
||||||
self.assertValidResponseMetadata(
|
|
||||||
post,
|
|
||||||
"As 2024 comes to a close, here’s some examples of what Bellingcat investigated per month in our 10th year! 🧵",
|
|
||||||
datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
|
|
||||||
"twitter-ytdl"
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_reverse_engineer_token(self):
|
def test_reverse_engineer_token(self):
|
||||||
# see Vercel's implementation here: https://github.com/vercel/react-tweet/blob/main/packages/react-tweet/src/api/fetch-tweet.ts#L27C1-L31C2
|
# see Vercel's implementation here: https://github.com/vercel/react-tweet/blob/main/packages/react-tweet/src/api/fetch-tweet.ts#L27C1-L31C2
|
||||||
# and the discussion here: https://github.com/JustAnotherArchivist/snscrape/issues/996#issuecomment-2211358215
|
# and the discussion here: https://github.com/JustAnotherArchivist/snscrape/issues/996#issuecomment-2211358215
|
||||||
|
@ -95,6 +82,20 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
||||||
generated_token = self.archiver.generate_token(tweet_id)
|
generated_token = self.archiver.generate_token(tweet_id)
|
||||||
self.assertEqual(real_token, generated_token)
|
self.assertEqual(real_token, generated_token)
|
||||||
|
|
||||||
|
@pytest.mark.download
|
||||||
|
def test_youtube_dlp_archiver(self):
|
||||||
|
|
||||||
|
url = "https://x.com/bellingcat/status/1874097816571961839"
|
||||||
|
post = self.archiver.download_yt_dlp(self.create_item(url), url, "1874097816571961839")
|
||||||
|
assert post
|
||||||
|
self.assertValidResponseMetadata(
|
||||||
|
post,
|
||||||
|
"As 2024 comes to a close, here’s some examples of what Bellingcat investigated per month in our 10th year! 🧵",
|
||||||
|
datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
|
||||||
|
"twitter-ytdl"
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.download
|
||||||
def test_syndication_archiver(self):
|
def test_syndication_archiver(self):
|
||||||
|
|
||||||
url = "https://x.com/bellingcat/status/1874097816571961839"
|
url = "https://x.com/bellingcat/status/1874097816571961839"
|
||||||
|
@ -106,12 +107,14 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
||||||
datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)
|
datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.download
|
||||||
def test_download_nonexistend_tweet(self):
|
def test_download_nonexistend_tweet(self):
|
||||||
# this tweet does not exist
|
# this tweet does not exist
|
||||||
url = "https://x.com/Bellingcat/status/17197025860711058"
|
url = "https://x.com/Bellingcat/status/17197025860711058"
|
||||||
response = self.archiver.download(self.create_item(url))
|
response = self.archiver.download(self.create_item(url))
|
||||||
self.assertFalse(response)
|
self.assertFalse(response)
|
||||||
|
|
||||||
|
@pytest.mark.download
|
||||||
def test_download_malformed_tweetid(self):
|
def test_download_malformed_tweetid(self):
|
||||||
# this tweet does not exist
|
# this tweet does not exist
|
||||||
url = "https://x.com/Bellingcat/status/1719702586071100058"
|
url = "https://x.com/Bellingcat/status/1719702586071100058"
|
||||||
|
@ -147,8 +150,7 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
||||||
|
|
||||||
"""Download tweets with sensitive media
|
"""Download tweets with sensitive media
|
||||||
|
|
||||||
Note: currently failing, youtube-dlp requres logged in users"""
|
Note: currently failing, youtube-dlp requres logged in users + download_syndication requires logging in"""
|
||||||
|
|
||||||
|
|
||||||
test_data = [
|
test_data = [
|
||||||
("https://x.com/SozinhoRamalho/status/1876710769913450647", "ignore tweet, testing sensitivity warning nudity", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), "image_hash"),
|
("https://x.com/SozinhoRamalho/status/1876710769913450647", "ignore tweet, testing sensitivity warning nudity", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), "image_hash"),
|
||||||
|
|
Ładowanie…
Reference in New Issue