kopia lustrzana https://github.com/bellingcat/auto-archiver
improving path operations
rodzic
3019778b8f
commit
13e7d0bf1b
|
@ -64,7 +64,7 @@ class Archiver(ABC):
|
||||||
page += f"</body></html>"
|
page += f"</body></html>"
|
||||||
|
|
||||||
page_key = self.get_key(urlparse(url).path.replace("/", "_") + ".html")
|
page_key = self.get_key(urlparse(url).path.replace("/", "_") + ".html")
|
||||||
page_filename = Storage.TMP_FOLDER + page_key
|
page_filename = os.path.join(Storage.TMP_FOLDER, page_key)
|
||||||
page_cdn = self.storage.get_cdn_url(page_key)
|
page_cdn = self.storage.get_cdn_url(page_key)
|
||||||
|
|
||||||
with open(page_filename, "w") as f:
|
with open(page_filename, "w") as f:
|
||||||
|
@ -96,7 +96,7 @@ class Archiver(ABC):
|
||||||
if '.' not in path:
|
if '.' not in path:
|
||||||
key += '.jpg'
|
key += '.jpg'
|
||||||
|
|
||||||
filename = Storage.TMP_FOLDER + key
|
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||||
|
|
||||||
d = requests.get(media_url, headers=headers)
|
d = requests.get(media_url, headers=headers)
|
||||||
with open(filename, 'wb') as f:
|
with open(filename, 'wb') as f:
|
||||||
|
@ -140,7 +140,7 @@ class Archiver(ABC):
|
||||||
logger.debug(f"getting screenshot for {url=}")
|
logger.debug(f"getting screenshot for {url=}")
|
||||||
key = self.get_key(urlparse(url).path.replace(
|
key = self.get_key(urlparse(url).path.replace(
|
||||||
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
|
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
|
||||||
filename = Storage.TMP_FOLDER + key
|
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||||
|
|
||||||
# Accept cookies popup dismiss for ytdlp video
|
# Accept cookies popup dismiss for ytdlp video
|
||||||
if 'facebook.com' in url:
|
if 'facebook.com' in url:
|
||||||
|
|
|
@ -52,7 +52,7 @@ class TelegramArchiver(Archiver):
|
||||||
video_id = video_url.split('/')[-1].split('?')[0]
|
video_id = video_url.split('/')[-1].split('?')[0]
|
||||||
key = self.get_key(video_id)
|
key = self.get_key(video_id)
|
||||||
|
|
||||||
filename = Storage.TMP_FOLDER + key
|
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||||
cdn_url = self.storage.get_cdn_url(key)
|
cdn_url = self.storage.get_cdn_url(key)
|
||||||
|
|
||||||
if check_if_exists and self.storage.exists(key):
|
if check_if_exists and self.storage.exists(key):
|
||||||
|
|
|
@ -79,7 +79,8 @@ class TelethonArchiver(Archiver):
|
||||||
message = post.message
|
message = post.message
|
||||||
for mp in media_posts:
|
for mp in media_posts:
|
||||||
if len(mp.message) > len(message): message = mp.message
|
if len(mp.message) > len(message): message = mp.message
|
||||||
filename = self.client.download_media(mp.media, f'{Storage.TMP_FOLDER}{chat}_{group_id}/{mp.id}')
|
filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', mp.id)
|
||||||
|
filename = self.client.download_media(mp.media, filename_dest)
|
||||||
key = filename.split(Storage.TMP_FOLDER)[1]
|
key = filename.split(Storage.TMP_FOLDER)[1]
|
||||||
self.storage.upload(filename, key)
|
self.storage.upload(filename, key)
|
||||||
hash = self.get_hash(filename)
|
hash = self.get_hash(filename)
|
||||||
|
@ -92,7 +93,7 @@ class TelethonArchiver(Archiver):
|
||||||
return ArchiveResult(status=status, cdn_url=page_cdn, title=message, timestamp=post.date, hash=page_hash, screenshot=screenshot)
|
return ArchiveResult(status=status, cdn_url=page_cdn, title=message, timestamp=post.date, hash=page_hash, screenshot=screenshot)
|
||||||
elif len(media_posts) == 1:
|
elif len(media_posts) == 1:
|
||||||
key = self.get_key(f'{chat}_{post_id}')
|
key = self.get_key(f'{chat}_{post_id}')
|
||||||
filename = self.client.download_media(post.media, f'{Storage.TMP_FOLDER}{key}')
|
filename = self.client.download_media(post.media, os.path.join(Storage.TMP_FOLDER,key))
|
||||||
key = filename.split(Storage.TMP_FOLDER)[1].replace(" ", "")
|
key = filename.split(Storage.TMP_FOLDER)[1].replace(" ", "")
|
||||||
self.storage.upload(filename, key)
|
self.storage.upload(filename, key)
|
||||||
hash = self.get_hash(filename)
|
hash = self.get_hash(filename)
|
||||||
|
|
|
@ -18,7 +18,7 @@ class TiktokArchiver(Archiver):
|
||||||
try:
|
try:
|
||||||
info = tiktok_downloader.info_post(url)
|
info = tiktok_downloader.info_post(url)
|
||||||
key = self.get_key(f'{info.id}.mp4')
|
key = self.get_key(f'{info.id}.mp4')
|
||||||
filename = Storage.TMP_FOLDER + key
|
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||||
logger.info(f'found video {key=}')
|
logger.info(f'found video {key=}')
|
||||||
|
|
||||||
if check_if_exists and self.storage.exists(key):
|
if check_if_exists and self.storage.exists(key):
|
||||||
|
|
Ładowanie…
Reference in New Issue