kopia lustrzana https://github.com/bellingcat/auto-archiver
improving path operations
rodzic
3019778b8f
commit
13e7d0bf1b
|
@ -64,7 +64,7 @@ class Archiver(ABC):
|
|||
page += f"</body></html>"
|
||||
|
||||
page_key = self.get_key(urlparse(url).path.replace("/", "_") + ".html")
|
||||
page_filename = Storage.TMP_FOLDER + page_key
|
||||
page_filename = os.path.join(Storage.TMP_FOLDER, page_key)
|
||||
page_cdn = self.storage.get_cdn_url(page_key)
|
||||
|
||||
with open(page_filename, "w") as f:
|
||||
|
@ -95,8 +95,8 @@ class Archiver(ABC):
|
|||
key = self.get_key(path.replace("/", "_"))
|
||||
if '.' not in path:
|
||||
key += '.jpg'
|
||||
|
||||
filename = Storage.TMP_FOLDER + key
|
||||
|
||||
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||
|
||||
d = requests.get(media_url, headers=headers)
|
||||
with open(filename, 'wb') as f:
|
||||
|
@ -140,7 +140,7 @@ class Archiver(ABC):
|
|||
logger.debug(f"getting screenshot for {url=}")
|
||||
key = self.get_key(urlparse(url).path.replace(
|
||||
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
|
||||
filename = Storage.TMP_FOLDER + key
|
||||
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||
|
||||
# Accept cookies popup dismiss for ytdlp video
|
||||
if 'facebook.com' in url:
|
||||
|
|
|
@ -52,7 +52,7 @@ class TelegramArchiver(Archiver):
|
|||
video_id = video_url.split('/')[-1].split('?')[0]
|
||||
key = self.get_key(video_id)
|
||||
|
||||
filename = Storage.TMP_FOLDER + key
|
||||
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
|
||||
if check_if_exists and self.storage.exists(key):
|
||||
|
|
|
@ -79,7 +79,8 @@ class TelethonArchiver(Archiver):
|
|||
message = post.message
|
||||
for mp in media_posts:
|
||||
if len(mp.message) > len(message): message = mp.message
|
||||
filename = self.client.download_media(mp.media, f'{Storage.TMP_FOLDER}{chat}_{group_id}/{mp.id}')
|
||||
filename_dest = os.path.join(Storage.TMP_FOLDER, f'{chat}_{group_id}', mp.id)
|
||||
filename = self.client.download_media(mp.media, filename_dest)
|
||||
key = filename.split(Storage.TMP_FOLDER)[1]
|
||||
self.storage.upload(filename, key)
|
||||
hash = self.get_hash(filename)
|
||||
|
@ -92,7 +93,7 @@ class TelethonArchiver(Archiver):
|
|||
return ArchiveResult(status=status, cdn_url=page_cdn, title=message, timestamp=post.date, hash=page_hash, screenshot=screenshot)
|
||||
elif len(media_posts) == 1:
|
||||
key = self.get_key(f'{chat}_{post_id}')
|
||||
filename = self.client.download_media(post.media, f'{Storage.TMP_FOLDER}{key}')
|
||||
filename = self.client.download_media(post.media, os.path.join(Storage.TMP_FOLDER,key))
|
||||
key = filename.split(Storage.TMP_FOLDER)[1].replace(" ", "")
|
||||
self.storage.upload(filename, key)
|
||||
hash = self.get_hash(filename)
|
||||
|
|
|
@ -18,7 +18,7 @@ class TiktokArchiver(Archiver):
|
|||
try:
|
||||
info = tiktok_downloader.info_post(url)
|
||||
key = self.get_key(f'{info.id}.mp4')
|
||||
filename = Storage.TMP_FOLDER + key
|
||||
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||
logger.info(f'found video {key=}')
|
||||
|
||||
if check_if_exists and self.storage.exists(key):
|
||||
|
|
Ładowanie…
Reference in New Issue