diff --git a/archivers/telegram_archiver.py b/archivers/telegram_archiver.py index d98f761..026bdd0 100644 --- a/archivers/telegram_archiver.py +++ b/archivers/telegram_archiver.py @@ -86,4 +86,4 @@ class TelegramArchiver(Archiver): cdn_url = self.storage.get_cdn_url(key) return ArchiveResult(status=status, cdn_url=cdn_url, thumbnail=key_thumb, thumbnail_index=thumb_index, - duration=duration, title=original_url, timestamp=s.find_all('time')[1].get('datetime'), hash=hash, screenshot=screenshot) + duration=duration, title=original_url, timestamp=s.find_all('time')[1].get('datetime'), hash=hash, screenshot=screenshot, wacz=wacz) diff --git a/archivers/telethon_archiver.py b/archivers/telethon_archiver.py index d47cdc5..9f9bbbf 100644 --- a/archivers/telethon_archiver.py +++ b/archivers/telethon_archiver.py @@ -74,6 +74,7 @@ class TelethonArchiver(Archiver): logger.debug(f'got {len(media_posts)=} for {url=}') screenshot = self.get_screenshot(url) + wacz = self.get_wacz(url) if len(media_posts) > 0: key = self.get_html_key(url) @@ -81,7 +82,7 @@ class TelethonArchiver(Archiver): if check_if_exists and self.storage.exists(key): # only s3 storage supports storage.exists as not implemented on gd cdn_url = self.storage.get_cdn_url(key) - return ArchiveResult(status='already archived', cdn_url=cdn_url, title=post.message, timestamp=post.date, screenshot=screenshot) + return ArchiveResult(status='already archived', cdn_url=cdn_url, title=post.message, timestamp=post.date, screenshot=screenshot, wacz=wacz) key_thumb, thumb_index = None, None group_id = post.grouped_id if post.grouped_id is not None else post.id @@ -120,7 +121,7 @@ class TelethonArchiver(Archiver): page_cdn, page_hash, _ = self.generate_media_page_html(url, uploaded_media, html.escape(str(post))) - return ArchiveResult(status=status, cdn_url=page_cdn, title=message, timestamp=post.date, hash=page_hash, screenshot=screenshot, thumbnail=key_thumb, thumbnail_index=thumb_index) + return ArchiveResult(status=status, cdn_url=page_cdn, title=message, timestamp=post.date, hash=page_hash, screenshot=screenshot, thumbnail=key_thumb, thumbnail_index=thumb_index, wacz=wacz) page_cdn, page_hash, _ = self.generate_media_page_html(url, [], html.escape(str(post))) - return ArchiveResult(status=status, cdn_url=page_cdn, title=post.message, timestamp=getattr_or(post, "date"), hash=page_hash, screenshot=screenshot) + return ArchiveResult(status=status, cdn_url=page_cdn, title=post.message, timestamp=getattr_or(post, "date"), hash=page_hash, screenshot=screenshot, wacz=wacz) diff --git a/archivers/twitter_api_archiver.py b/archivers/twitter_api_archiver.py index 852df12..454cfe2 100644 --- a/archivers/twitter_api_archiver.py +++ b/archivers/twitter_api_archiver.py @@ -70,5 +70,6 @@ class TwitterApiArchiver(TwitterArchiver): }, ensure_ascii=False, indent=4) screenshot = self.get_screenshot(url) + wacz = self.get_wacz(url) page_cdn, page_hash, thumbnail = self.generate_media_page(urls, url, output) - return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=timestamp, title=tweet.data.text) + return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=timestamp, title=tweet.data.text, wacz=wacz) diff --git a/archivers/twitter_archiver.py b/archivers/twitter_archiver.py index 81f20ab..b868af5 100644 --- a/archivers/twitter_archiver.py +++ b/archivers/twitter_archiver.py @@ -85,8 +85,9 @@ class TwitterArchiver(Archiver): timestamp = datetime.strptime(tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") screenshot = self.get_screenshot(url) + wacz = self.get_wacz(url) page_cdn, page_hash, thumbnail = self.generate_media_page(urls, url, r.text) - return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=timestamp, title=tweet["text"]) + return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, timestamp=timestamp, title=tweet["text"], wacz=wacz) def choose_variant(self, variants): # choosing the highest quality possible diff --git a/archivers/vk_archiver.py b/archivers/vk_archiver.py index a3af9db..91b8354 100644 --- a/archivers/vk_archiver.py +++ b/archivers/vk_archiver.py @@ -70,4 +70,5 @@ class VkArchiver(Archiver): page_cdn, page_hash, thumbnail = self.generate_media_page_html(url, uploaded_media, textual_output, thumbnail=thumbnail) # # if multiple wall/photos/videos are present the screenshot will only grab the 1st screenshot = self.get_screenshot(url) - return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, thumbnail_index=thumbnail_index, timestamp=datetime, title=title) + wacz = self.get_wacz(url) + return ArchiveResult(status="success", cdn_url=page_cdn, screenshot=screenshot, hash=page_hash, thumbnail=thumbnail, thumbnail_index=thumbnail_index, timestamp=datetime, title=title, wacz=wacz) diff --git a/archivers/wayback_archiver.py b/archivers/wayback_archiver.py index 4de2fa8..e0ede90 100644 --- a/archivers/wayback_archiver.py +++ b/archivers/wayback_archiver.py @@ -61,7 +61,7 @@ class WaybackArchiver(Archiver): retries += 1 if status_r.status_code != 200: - return ArchiveResult(status=f"Internet archive failed: check https://web.archive.org/save/status/{job_id}", screenshot=screenshot) + return ArchiveResult(status=f"Internet archive failed: check https://web.archive.org/save/status/{job_id}", screenshot=screenshot, wacz=wacz) status_json = status_r.json() if status_json['status'] != 'success':