From 4854929a1ddca8dad7e12d294c8b455c1557d13a Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Thu, 2 Feb 2023 13:49:56 +0000 Subject: [PATCH] thumbnail and bot token --- src/auto_archiver/archivers/telethon_archiver.py | 5 +++-- src/auto_archiver/core/metadata.py | 5 +++++ src/auto_archiver/databases/gsheet_db.py | 4 ++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/auto_archiver/archivers/telethon_archiver.py b/src/auto_archiver/archivers/telethon_archiver.py index 59369ca..f30f2a4 100644 --- a/src/auto_archiver/archivers/telethon_archiver.py +++ b/src/auto_archiver/archivers/telethon_archiver.py @@ -28,7 +28,7 @@ class TelethonArchiver(Archiver): return { "api_id": {"default": None, "help": "telegram API_ID value, go to https://my.telegram.org/apps"}, "api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"}, - # "bot_token": {"default": None, "help": "optional, but allows access to more content such as large videos, talk to @botfather"}, + "bot_token": {"default": None, "help": "optional, but allows access to more content such as large videos, talk to @botfather"}, "session_file": {"default": "secrets/anon", "help": "optional, records the telegram login session for future usage, '.session' will be appended to the provided value."}, "join_channels": {"default": True, "help": "disables the initial setup with channel_invites config, useful if you have a lot and get stuck"}, "channel_invites": { @@ -111,7 +111,8 @@ class TelethonArchiver(Archiver): result = Metadata() # NB: not using bot_token since then private channels cannot be archived: self.client.start(bot_token=self.bot_token) - with self.client.start(): + # with self.client.start(): + with self.client.start(bot_token=self.bot_token): try: post = self.client.get_messages(chat, ids=post_id) except ValueError as e: diff --git a/src/auto_archiver/core/metadata.py b/src/auto_archiver/core/metadata.py index 7a741f7..00eefe9 100644 --- a/src/auto_archiver/core/metadata.py +++ b/src/auto_archiver/core/metadata.py @@ -122,6 +122,11 @@ class Metadata: for m in self.media: if m.get("id") == id: return m return default + + def get_first_image(self, default=None) -> Media: + for m in self.media: + if "image" in m.mimetype: return m + return default def set_final_media(self, final: Media) -> Metadata: """final media is a special type of media: if you can show only 1 this is it, it's useful for some DBs like GsheetDb""" diff --git a/src/auto_archiver/databases/gsheet_db.py b/src/auto_archiver/databases/gsheet_db.py index 8c711e8..30b37b2 100644 --- a/src/auto_archiver/databases/gsheet_db.py +++ b/src/auto_archiver/databases/gsheet_db.py @@ -70,6 +70,10 @@ class GsheetsDb(Database): if (screenshot := item.get_media_by_id("screenshot")): batch_if_valid('screenshot', "\n".join(screenshot.urls)) + if (thumbnail := item.get_first_image("thumbnail")): + batch_if_valid('thumbnail', f'=IMAGE("{thumbnail.urls[0]}")') + + if (browsertrix := item.get_media_by_id("browsertrix")): batch_if_valid('wacz', "\n".join(browsertrix.urls)) batch_if_valid('replaywebpage', "\n".join([f'https://replayweb.page/?source={quote(wacz)}#view=pages&url={quote(item.get_url())}' for wacz in browsertrix.urls]))