kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix small bug in twitter dropin
- previously the 'content' was being set to a json dump of the tweet, it should be set to full_textpull/263/head
rodzic
034857075d
commit
0a5ba3385e
|
@ -1,6 +1,5 @@
|
||||||
import re
|
import re
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import json
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
@ -32,6 +31,9 @@ class Twitter(GenericDropin):
|
||||||
twid = ie_instance._match_valid_url(url).group("id")
|
twid = ie_instance._match_valid_url(url).group("id")
|
||||||
return ie_instance._extract_status(twid=twid)
|
return ie_instance._extract_status(twid=twid)
|
||||||
|
|
||||||
|
def keys_to_clean(self, video_data, info_extractor):
|
||||||
|
return ["user", "created_at", "entities", "favorited", "translator_type"]
|
||||||
|
|
||||||
def create_metadata(self, tweet: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
|
def create_metadata(self, tweet: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
|
||||||
result = Metadata()
|
result = Metadata()
|
||||||
try:
|
try:
|
||||||
|
@ -42,9 +44,11 @@ class Twitter(GenericDropin):
|
||||||
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
result.set_title(tweet.get("full_text", "")).set_content(json.dumps(tweet, ensure_ascii=False)).set_timestamp(
|
full_text = tweet.pop("full_text", "")
|
||||||
timestamp
|
author = tweet["user"].get("name", "")
|
||||||
)
|
result.set("author", author).set_url(url)
|
||||||
|
|
||||||
|
result.set_title(f"{author} - {full_text}").set_content(full_text).set_timestamp(timestamp)
|
||||||
if not tweet.get("entities", {}).get("media"):
|
if not tweet.get("entities", {}).get("media"):
|
||||||
logger.debug("No media found, archiving tweet text only")
|
logger.debug("No media found, archiving tweet text only")
|
||||||
result.status = "twitter-ytdl"
|
result.status = "twitter-ytdl"
|
||||||
|
|
|
@ -206,10 +206,11 @@ class TestGenericExtractor(TestExtractorBase):
|
||||||
|
|
||||||
self.assertValidResponseMetadata(
|
self.assertValidResponseMetadata(
|
||||||
post,
|
post,
|
||||||
"Onion rings are just vegetable donuts.",
|
"Cookie Monster - Onion rings are just vegetable donuts.",
|
||||||
datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc),
|
||||||
"yt-dlp_Twitter: success",
|
"yt-dlp_Twitter: success",
|
||||||
)
|
)
|
||||||
|
assert post.get("content") == "Onion rings are just vegetable donuts."
|
||||||
|
|
||||||
@pytest.mark.download
|
@pytest.mark.download
|
||||||
def test_twitter_download_video(self, make_item):
|
def test_twitter_download_video(self, make_item):
|
||||||
|
|
Ładowanie…
Reference in New Issue