kopia lustrzana https://github.com/bellingcat/auto-archiver
asdf
rodzic
cfa42c51ee
commit
0a76d954b8
|
@ -5,16 +5,12 @@ from urllib.parse import urlparse
|
||||||
|
|
||||||
from .base_archiver import Archiver, ArchiveResult
|
from .base_archiver import Archiver, ArchiveResult
|
||||||
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
|
|
||||||
class TwitterArchiver(Archiver):
|
class TwitterArchiver(Archiver):
|
||||||
name = "twitter"
|
name = "twitter"
|
||||||
|
|
||||||
# DM added filenumber params and storage
|
# DM added filenumber params and storage
|
||||||
def download(self, url, check_if_exists=False, filenumber=None):
|
def download(self, url, check_if_exists=False, filenumber=None):
|
||||||
if filenumber is not None:
|
|
||||||
logger.debug(f'filenumber is {filenumber}')
|
|
||||||
|
|
||||||
if 'twitter.com' != self.get_netloc(url):
|
if 'twitter.com' != self.get_netloc(url):
|
||||||
return False
|
return False
|
||||||
|
@ -30,11 +26,10 @@ class TwitterArchiver(Archiver):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tweet = next(scr.get_items())
|
tweet = next(scr.get_items())
|
||||||
# except:
|
except Exception as ex:
|
||||||
except Exception as e:
|
template = "TwitterArchiver cant get tweet and threw, which can happen if a media sensitive tweet. \n type: {0} occurred. \n arguments:{1!r}"
|
||||||
# logger.warning('wah wah')
|
message = template.format(type(ex).__name__, ex.args)
|
||||||
# DM
|
logger.warning(message)
|
||||||
logger.warning(f'TwitterArchiver cant get tweet for url {url} - can happen if a media sensitive tweet: \n{traceback.format_exc()}')
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if tweet.media is None:
|
if tweet.media is None:
|
||||||
|
@ -50,15 +45,7 @@ class TwitterArchiver(Archiver):
|
||||||
elif type(media) == Gif:
|
elif type(media) == Gif:
|
||||||
urls.append(media.variants[0].url)
|
urls.append(media.variants[0].url)
|
||||||
elif type(media) == Photo:
|
elif type(media) == Photo:
|
||||||
# https://webtrickz.com/download-images-in-original-size-on-twitter/
|
urls.append(media.fullUrl.replace('name=large', 'name=orig'))
|
||||||
# 'https://pbs.twimg.com/media/ExeUSW2UcAE6RbN?format=jpg&name=large'
|
|
||||||
# we want name=orig
|
|
||||||
# so can get original quality
|
|
||||||
foo = media.fullUrl
|
|
||||||
bar = foo.replace("name=large", "name=orig")
|
|
||||||
|
|
||||||
# urls.append(media.fullUrl)
|
|
||||||
urls.append(bar)
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Could not get media URL of {media}")
|
logger.warning(f"Could not get media URL of {media}")
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue