Improved logging when an invalid/deleted tweet is attempted to be downloaded

Plus: unit tests for non-existent tweet + invalid tweet ID
pull/162/head
Patrick Robertson 2025-01-12 12:00:45 +01:00
rodzic f29950905c
commit c932fb7416
2 zmienionych plików z 15 dodań i 4 usunięć

Wyświetl plik

@ -108,9 +108,11 @@ class TwitterArchiver(Archiver):
tweet = tie._extract_status(tweet_id)
result = Metadata()
try:
if not tweet.get("user") or not tweet.get("created_at"):
raise ValueError(f"Error retreiving post with id {tweet_id}. Are you sure it exists?")
timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
except Exception as ex:
logger.warning(f"Failed to get timestamp: {type(ex).__name__} occurred. args: {ex.args}")
except (ValueError, KeyError) as ex:
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
return False
result\

Wyświetl plik

@ -38,7 +38,6 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
test_url = "https://www.bellingcat.com/category/resources/?s=20&t=3d0g4ZQis7dCbSDg-mE7-w"
self.assertEqual(test_url, self.archiver.sanitize_url(test_url))
def test_get_username_tweet_id_from_url(self):
# test valid twitter URL
@ -70,8 +69,18 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
"As 2024 comes to a close, heres some examples of what Bellingcat investigated per month in our 10th year! 🧵",
datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)
)
breakpoint()
def test_download_nonexistend_tweet(self):
# this tweet does not exist
url = "https://x.com/Bellingcat/status/17197025860711058"
response = self.archiver.download(self.create_item(url))
self.assertFalse(response)
def test_download_malformed_tweetid(self):
# this tweet does not exist
url = "https://x.com/Bellingcat/status/1719702586071100058"
response = self.archiver.download(self.create_item(url))
self.assertFalse(response)
def test_download_media_with_images(self):
# url https://twitter.com/MeCookieMonster/status/1617921633456640001?s=20&t=3d0g4ZQis7dCbSDg-mE7-w