pull/72/head
msramalho 2023-01-27 00:03:30 +00:00
rodzic f5b7c3a5ea
commit ac000d5943
3 zmienionych plików z 15 dodań i 19 usunięć

Wyświetl plik

@ -32,6 +32,7 @@ class TelethonArchiver(Archiver):
"api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"},
# "bot_token": {"default": None, "help": "optional, but allows access to more content such as large videos, talk to @botfather"},
"session_file": {"default": "secrets/anon", "help": "optional, records the telegram login session for future usage"},
"join_channels": {"default": True, "help": "disables the initial setup with channel_invites config, useful if you have a lot and get stuck"},
"channel_invites": {
"default": {},
"help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup",
@ -51,7 +52,7 @@ class TelethonArchiver(Archiver):
logger.info(f"SETUP {self.name} checking login...")
with self.client.start(): pass
if len(self.channel_invites):
if self.join_channels and len(self.channel_invites):
logger.info(f"SETUP {self.name} joining channels...")
with self.client.start():
# get currently joined channels

Wyświetl plik

@ -21,8 +21,7 @@ class Metadata:
metadata: Dict[str, Any] = field(default_factory=dict)
tmp_keys: Set[str] = field(default_factory=set, repr=False, metadata={"exclude": True}) # keys that are not to be saved in DBs
media: List[Media] = field(default_factory=list)
final_media: Media = None # can be overwritten by formatters
rearchivable: bool = True # defaults to true, archivers can overwrite
rearchivable: bool = True # defaults to true, archivers can overwrite
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
"""
@ -73,7 +72,6 @@ class Metadata:
# custom getter/setters
def set_url(self, url: str) -> Metadata:
assert type(url) is str and len(url) > 0, "invalid URL"
return self.set("url", url)
@ -115,30 +113,27 @@ class Metadata:
def add_media(self, media: Media, id: str = None) -> Metadata:
# adds a new media, optionally including an id
if media is None: return
if id is not None: media.set("id", id)
if id is not None:
assert not len([1 for m in self.media if m.get("id") == id]), f"cannot add 2 pieces of media with the same id {id}"
media.set("id", id)
self.media.append(media)
return media
def get_media_by_id(self, id: str) -> Media:
def get_media_by_id(self, id: str, default=None) -> Media:
for m in self.media:
if m.get("id") == id: return m
return None
return default
def set_final_media(self, final: Media) -> Metadata:
if final:
if self.final_media:
logger.warning(f"overwriting final media value :{self.final_media} with {final}")
self.final_media = final
return self
"""final media is a special type of media: if you can show only 1 this is it, it's useful for some DBs like GsheetDb"""
self.add_media(final, "_final_media")
def get_single_media(self) -> Media:
# TODO: could be refactored to use a custom media.id or metadata
if self.final_media:
return self.final_media
return self.media[0]
def get_final_media(self) -> Media:
_default = self.media[0] if len(self.media) else None
return self.get_media_by_id("_final_media", _default)
def get_clean_metadata(self) -> Metadata:
return dict(
{k: v for k, v in self.metadata.items() if k not in self.tmp_keys},
**{"processed_at": self._processed_at}
)
)

Wyświetl plik

@ -60,7 +60,7 @@ class GsheetsDb(Database):
cell_updates.append((row, 'status', item.status))
media: Media = item.get_single_media()
media: Media = item.get_final_media()
batch_if_valid('archive', "\n".join(media.urls))
batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())