kopia lustrzana https://github.com/bellingcat/auto-archiver
cleanup
rodzic
f5b7c3a5ea
commit
ac000d5943
|
@ -32,6 +32,7 @@ class TelethonArchiver(Archiver):
|
|||
"api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"},
|
||||
# "bot_token": {"default": None, "help": "optional, but allows access to more content such as large videos, talk to @botfather"},
|
||||
"session_file": {"default": "secrets/anon", "help": "optional, records the telegram login session for future usage"},
|
||||
"join_channels": {"default": True, "help": "disables the initial setup with channel_invites config, useful if you have a lot and get stuck"},
|
||||
"channel_invites": {
|
||||
"default": {},
|
||||
"help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup",
|
||||
|
@ -51,7 +52,7 @@ class TelethonArchiver(Archiver):
|
|||
logger.info(f"SETUP {self.name} checking login...")
|
||||
with self.client.start(): pass
|
||||
|
||||
if len(self.channel_invites):
|
||||
if self.join_channels and len(self.channel_invites):
|
||||
logger.info(f"SETUP {self.name} joining channels...")
|
||||
with self.client.start():
|
||||
# get currently joined channels
|
||||
|
|
|
@ -21,8 +21,7 @@ class Metadata:
|
|||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
tmp_keys: Set[str] = field(default_factory=set, repr=False, metadata={"exclude": True}) # keys that are not to be saved in DBs
|
||||
media: List[Media] = field(default_factory=list)
|
||||
final_media: Media = None # can be overwritten by formatters
|
||||
rearchivable: bool = True # defaults to true, archivers can overwrite
|
||||
rearchivable: bool = True # defaults to true, archivers can overwrite
|
||||
|
||||
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
|
||||
"""
|
||||
|
@ -73,7 +72,6 @@ class Metadata:
|
|||
|
||||
# custom getter/setters
|
||||
|
||||
|
||||
def set_url(self, url: str) -> Metadata:
|
||||
assert type(url) is str and len(url) > 0, "invalid URL"
|
||||
return self.set("url", url)
|
||||
|
@ -115,30 +113,27 @@ class Metadata:
|
|||
def add_media(self, media: Media, id: str = None) -> Metadata:
|
||||
# adds a new media, optionally including an id
|
||||
if media is None: return
|
||||
if id is not None: media.set("id", id)
|
||||
if id is not None:
|
||||
assert not len([1 for m in self.media if m.get("id") == id]), f"cannot add 2 pieces of media with the same id {id}"
|
||||
media.set("id", id)
|
||||
self.media.append(media)
|
||||
return media
|
||||
|
||||
def get_media_by_id(self, id: str) -> Media:
|
||||
def get_media_by_id(self, id: str, default=None) -> Media:
|
||||
for m in self.media:
|
||||
if m.get("id") == id: return m
|
||||
return None
|
||||
return default
|
||||
|
||||
def set_final_media(self, final: Media) -> Metadata:
|
||||
if final:
|
||||
if self.final_media:
|
||||
logger.warning(f"overwriting final media value :{self.final_media} with {final}")
|
||||
self.final_media = final
|
||||
return self
|
||||
"""final media is a special type of media: if you can show only 1 this is it, it's useful for some DBs like GsheetDb"""
|
||||
self.add_media(final, "_final_media")
|
||||
|
||||
def get_single_media(self) -> Media:
|
||||
# TODO: could be refactored to use a custom media.id or metadata
|
||||
if self.final_media:
|
||||
return self.final_media
|
||||
return self.media[0]
|
||||
def get_final_media(self) -> Media:
|
||||
_default = self.media[0] if len(self.media) else None
|
||||
return self.get_media_by_id("_final_media", _default)
|
||||
|
||||
def get_clean_metadata(self) -> Metadata:
|
||||
return dict(
|
||||
{k: v for k, v in self.metadata.items() if k not in self.tmp_keys},
|
||||
**{"processed_at": self._processed_at}
|
||||
)
|
||||
)
|
||||
|
|
|
@ -60,7 +60,7 @@ class GsheetsDb(Database):
|
|||
|
||||
cell_updates.append((row, 'status', item.status))
|
||||
|
||||
media: Media = item.get_single_media()
|
||||
media: Media = item.get_final_media()
|
||||
|
||||
batch_if_valid('archive', "\n".join(media.urls))
|
||||
batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())
|
||||
|
|
Ładowanie…
Reference in New Issue