kopia lustrzana https://github.com/bellingcat/auto-archiver
Improved logging when an invalid/deleted tweet is attempted to be downloaded
Plus: unit tests for non-existent tweet + invalid tweet IDpull/162/head
rodzic
f29950905c
commit
c932fb7416
|
@ -108,9 +108,11 @@ class TwitterArchiver(Archiver):
|
|||
tweet = tie._extract_status(tweet_id)
|
||||
result = Metadata()
|
||||
try:
|
||||
if not tweet.get("user") or not tweet.get("created_at"):
|
||||
raise ValueError(f"Error retreiving post with id {tweet_id}. Are you sure it exists?")
|
||||
timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||
except Exception as ex:
|
||||
logger.warning(f"Failed to get timestamp: {type(ex).__name__} occurred. args: {ex.args}")
|
||||
except (ValueError, KeyError) as ex:
|
||||
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
||||
return False
|
||||
|
||||
result\
|
||||
|
|
|
@ -38,7 +38,6 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
|||
test_url = "https://www.bellingcat.com/category/resources/?s=20&t=3d0g4ZQis7dCbSDg-mE7-w"
|
||||
self.assertEqual(test_url, self.archiver.sanitize_url(test_url))
|
||||
|
||||
|
||||
def test_get_username_tweet_id_from_url(self):
|
||||
|
||||
# test valid twitter URL
|
||||
|
@ -70,8 +69,18 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
|||
"As 2024 comes to a close, here’s some examples of what Bellingcat investigated per month in our 10th year! 🧵",
|
||||
datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)
|
||||
)
|
||||
breakpoint()
|
||||
|
||||
def test_download_nonexistend_tweet(self):
|
||||
# this tweet does not exist
|
||||
url = "https://x.com/Bellingcat/status/17197025860711058"
|
||||
response = self.archiver.download(self.create_item(url))
|
||||
self.assertFalse(response)
|
||||
|
||||
def test_download_malformed_tweetid(self):
|
||||
# this tweet does not exist
|
||||
url = "https://x.com/Bellingcat/status/1719702586071100058"
|
||||
response = self.archiver.download(self.create_item(url))
|
||||
self.assertFalse(response)
|
||||
|
||||
def test_download_media_with_images(self):
|
||||
# url https://twitter.com/MeCookieMonster/status/1617921633456640001?s=20&t=3d0g4ZQis7dCbSDg-mE7-w
|
||||
|
|
Ładowanie…
Reference in New Issue