kopia lustrzana https://github.com/bellingcat/auto-archiver
refactoring storage and bringing changes from origin
rodzic
f3ce226665
commit
e4603a9423
|
@ -0,0 +1 @@
|
|||
from storages import *
|
|
@ -1,17 +1,10 @@
|
|||
import os
|
||||
import ffmpeg
|
||||
from dataclasses import dataclass
|
||||
import datetime
|
||||
from loguru import logger
|
||||
from dataclasses import dataclass
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
# TODO There should be a better way of generating keys, that adds the following info:
|
||||
# - name of sheet that it is being archived from
|
||||
# (this means we might archive the same media twice on different sheets, but that's OK I think)
|
||||
# - name of archiver/platform that the video comes from
|
||||
# This should make it easier to maintain and clean the archive later
|
||||
|
||||
# TODO "check_if_exists" has lots of repeated code across the archivers. Can this be
|
||||
# cleaned up? Difficult is we don't know the filename until the archivers start working.
|
||||
from storages import Storage
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -25,33 +18,27 @@ class ArchiveResult:
|
|||
timestamp: datetime.datetime = None
|
||||
|
||||
|
||||
class Archiver:
|
||||
class Archiver(ABC):
|
||||
name = "default"
|
||||
|
||||
def __init__(self, s3_client):
|
||||
self.s3 = s3_client
|
||||
def __init__(self, storage: Storage):
|
||||
self.storage = storage
|
||||
|
||||
def __str__(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
logger.error("method 'download' not implemented")
|
||||
|
||||
def get_cdn_url(self, key):
|
||||
return 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
|
||||
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
|
||||
|
||||
def do_s3_upload(self, f, key):
|
||||
self.s3.upload_fileobj(f, Bucket=os.getenv(
|
||||
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
|
||||
@abstractmethod
|
||||
def download(self, url, check_if_exists=False): pass
|
||||
|
||||
def get_key(self, filename):
|
||||
print(f"key base implementation: {self.name}")
|
||||
# TODO: refactor to be more manageable
|
||||
key = filename.split('/')[1]
|
||||
if 'unknown_video' in key:
|
||||
key = key.replace('unknown_video', 'jpg')
|
||||
return key
|
||||
"""
|
||||
returns a key in the format "[archiverName]_[filename]" includes extension
|
||||
"""
|
||||
tail = os.path.split(filename)[1] # returns filename.ext from full path
|
||||
_id, extension = os.path.splitext(tail) # returns [filename, .ext]
|
||||
if 'unknown_video' in _id:
|
||||
_id = _id.replace('unknown_video', 'jpg')
|
||||
return f'{self.name}_{_id}{extension}'
|
||||
|
||||
def get_thumbnails(self, filename, duration=None):
|
||||
if not os.path.exists(filename.split('.')[0]):
|
||||
|
@ -80,10 +67,9 @@ class Archiver:
|
|||
thumbnail_filename = filename.split('.')[0] + '/' + fname
|
||||
key = filename.split('/')[1].split('.')[0] + '/' + fname
|
||||
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
|
||||
with open(thumbnail_filename, 'rb') as f:
|
||||
self.do_s3_upload(f, key)
|
||||
self.storage.upload(thumbnail_filename, key)
|
||||
|
||||
cdn_urls.append(cdn_url)
|
||||
os.remove(thumbnail_filename)
|
||||
|
@ -107,9 +93,8 @@ class Archiver:
|
|||
|
||||
thumb_index = filename.split('/')[1].split('.')[0] + '/index.html'
|
||||
|
||||
self.s3.upload_fileobj(open(index_fname, 'rb'), Bucket=os.getenv(
|
||||
'DO_BUCKET'), Key=thumb_index, ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/html'})
|
||||
self.storage.upload(index_fname, thumb_index, extra_args={'ACL': 'public-read', 'ContentType': 'text/html'})
|
||||
|
||||
thumb_index_cdn_url = self.get_cdn_url(thumb_index)
|
||||
thumb_index_cdn_url = self.storage.get_cdn_url(thumb_index)
|
||||
|
||||
return (key_thumb, thumb_index_cdn_url)
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
import os
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from botocore.errorfactory import ClientError
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
# TODO: get_cdn_url, get_thumbnails, do_s3_upload
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
|
||||
class TelegramArchiver(Archiver):
|
||||
name = "telegram"
|
||||
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
# detect URLs that we definitely cannot handle
|
||||
if 'http://t.me/' not in url and 'https://t.me/' not in url:
|
||||
|
@ -35,19 +33,13 @@ class TelegramArchiver(Archiver):
|
|||
|
||||
video_url = video.get('src')
|
||||
key = video_url.split('/')[-1].split('?')[0]
|
||||
key = self.get_key(key)
|
||||
|
||||
filename = 'tmp/' + key
|
||||
|
||||
if check_if_exists:
|
||||
try:
|
||||
self.s3.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
|
||||
|
||||
# file exists
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
|
||||
status = 'already archived'
|
||||
|
||||
except ClientError:
|
||||
pass
|
||||
if check_if_exists and self.storage.exists(key):
|
||||
status = 'already archived'
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
|
||||
v = requests.get(video_url, headers=headers)
|
||||
|
||||
|
@ -55,10 +47,9 @@ class TelegramArchiver(Archiver):
|
|||
f.write(v.content)
|
||||
|
||||
if status != 'already archived':
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
self.do_s3_upload(f, key)
|
||||
self.storage.upload(filename, key)
|
||||
|
||||
# extract duration from HTML
|
||||
duration = s.find_all('time')[0].contents[0]
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
import os, traceback
|
||||
from botocore.errorfactory import ClientError
|
||||
import tiktok_downloader
|
||||
from loguru import logger
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
# TODO: get_cdn_url, do_s3_upload, get_thumbnails
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
|
||||
class TiktokArchiver(Archiver):
|
||||
name = "tiktok"
|
||||
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
if 'tiktok.com' not in url:
|
||||
return False
|
||||
|
@ -18,35 +16,28 @@ class TiktokArchiver(Archiver):
|
|||
|
||||
try:
|
||||
info = tiktok_downloader.info_post(url)
|
||||
key = 'tiktok_' + str(info.id) + '.mp4'
|
||||
key = self.get_key(f'{info.id}.mp4')
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
filename = 'tmp/' + key
|
||||
|
||||
if check_if_exists:
|
||||
try:
|
||||
self.s3.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
|
||||
if check_if_exists and self.storage.exists(key):
|
||||
status = 'already archived'
|
||||
|
||||
# file exists
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
media = tiktok_downloader.snaptik(url).get_media()
|
||||
|
||||
status = 'already archived'
|
||||
if len(media) <= 0:
|
||||
if status == 'already archived':
|
||||
return ArchiveResult(status='Could not download media, but already archived', cdn_url=cdn_url)
|
||||
else:
|
||||
return ArchiveResult(status='Could not download media')
|
||||
|
||||
except ClientError:
|
||||
pass
|
||||
media[0].download(filename)
|
||||
|
||||
if status != 'already archived':
|
||||
media = tiktok_downloader.snaptik(url).get_media()
|
||||
if len(media) > 0:
|
||||
media[0].download(filename)
|
||||
with open(filename, 'rb') as f:
|
||||
self.do_s3_upload(f, key)
|
||||
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
else:
|
||||
status = 'could not download media'
|
||||
self.storage.upload(filename, key)
|
||||
|
||||
try:
|
||||
key_thumb, thumb_index = self.get_thumbnails(
|
||||
filename, duration=info.duration)
|
||||
key_thumb, thumb_index = self.get_thumbnails(filename, duration=info.duration)
|
||||
except:
|
||||
key_thumb = ''
|
||||
thumb_index = 'error creating thumbnails'
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
import time, requests, os
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from storages import Storage
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
|
||||
class WaybackArchiver(Archiver):
|
||||
name = "wayback"
|
||||
|
||||
def __init__(self, s3_client):
|
||||
self.s3 = s3_client
|
||||
|
||||
def __init__(self, storage: Storage):
|
||||
super(WaybackArchiver, self).__init__(storage)
|
||||
self.seen_urls = {}
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
|
@ -26,10 +27,12 @@ class WaybackArchiver(Archiver):
|
|||
if r.status_code != 200:
|
||||
return ArchiveResult(status="Internet archive failed")
|
||||
|
||||
if 'job_id' not in r.json() and 'message' in r.json():
|
||||
return ArchiveResult(status=f"Internet archive failed: {r.json()['message']}")
|
||||
|
||||
job_id = r.json()['job_id']
|
||||
|
||||
status_r = requests.get(
|
||||
'https://web.archive.org/save/status/' + job_id, headers=ia_headers)
|
||||
status_r = requests.get('https://web.archive.org/save/status/' + job_id, headers=ia_headers)
|
||||
|
||||
retries = 0
|
||||
|
||||
|
@ -51,7 +54,7 @@ class WaybackArchiver(Archiver):
|
|||
status_json = status_r.json()
|
||||
|
||||
if status_json['status'] != 'success':
|
||||
return ArchiveResult(status='Internet Archive failed: ' + status_json['message'])
|
||||
return ArchiveResult(status='Internet Archive failed: ' + str(status_json))
|
||||
|
||||
archive_url = 'https://web.archive.org/web/' + \
|
||||
status_json['timestamp'] + '/' + status_json['original_url']
|
||||
|
@ -59,15 +62,15 @@ class WaybackArchiver(Archiver):
|
|||
try:
|
||||
r = requests.get(archive_url)
|
||||
|
||||
parsed = BeautifulSoup(
|
||||
r.content, 'html.parser')
|
||||
parsed = BeautifulSoup(r.content, 'html.parser')
|
||||
|
||||
title = parsed.find_all('title')[
|
||||
0].text
|
||||
title = parsed.find_all('title')[0].text
|
||||
|
||||
if title == 'Wayback Machine':
|
||||
title = 'Could not get title'
|
||||
except:
|
||||
title = "Could not get title"
|
||||
|
||||
result = ArchiveResult(
|
||||
status='Internet Archive fallback', cdn_url=archive_url, title=title)
|
||||
result = ArchiveResult(status='Internet Archive fallback', cdn_url=archive_url, title=title)
|
||||
self.seen_urls[url] = result
|
||||
return result
|
||||
|
|
|
@ -3,12 +3,13 @@ import os
|
|||
import datetime
|
||||
import youtube_dl
|
||||
from loguru import logger
|
||||
from botocore.errorfactory import ClientError
|
||||
|
||||
from .base_archiver import Archiver, ArchiveResult
|
||||
|
||||
|
||||
class YoutubeDLArchiver(Archiver):
|
||||
name = "yotube_dl"
|
||||
|
||||
|
||||
def download(self, url, check_if_exists=False):
|
||||
ydl_opts = {'outtmpl': 'tmp/%(id)s.%(ext)s', 'quiet': False}
|
||||
if (url[0:21] == 'https://facebook.com/' or url[0:25] == 'https://wwww.facebook.com/') and os.getenv('FB_COOKIE'):
|
||||
|
@ -32,8 +33,11 @@ class YoutubeDLArchiver(Archiver):
|
|||
if check_if_exists:
|
||||
if 'entries' in info:
|
||||
if len(info['entries']) > 1:
|
||||
logger.warning('YoutubeDLArchiver succeeded but cannot archive channels or pages with multiple videos')
|
||||
return False
|
||||
elif len(info['entries']) == 0:
|
||||
logger.warning(
|
||||
'YoutubeDLArchiver cannot archive channels or pages with multiple videos')
|
||||
'YoutubeDLArchiver succeeded but did not find video')
|
||||
return False
|
||||
|
||||
filename = ydl.prepare_filename(info['entries'][0])
|
||||
|
@ -42,20 +46,14 @@ class YoutubeDLArchiver(Archiver):
|
|||
|
||||
key = self.get_key(filename)
|
||||
|
||||
try:
|
||||
self.s3.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
|
||||
|
||||
# file exists
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
|
||||
if self.storage.exists(key):
|
||||
status = 'already archived'
|
||||
|
||||
except ClientError:
|
||||
pass
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
|
||||
# sometimes this results in a different filename, so do this again
|
||||
info = ydl.extract_info(url, download=True)
|
||||
|
||||
# TODO: add support for multiple videos
|
||||
if 'entries' in info:
|
||||
if len(info['entries']) > 1:
|
||||
logger.warning(
|
||||
|
@ -70,19 +68,24 @@ class YoutubeDLArchiver(Archiver):
|
|||
filename = filename.split('.')[0] + '.mkv'
|
||||
|
||||
if status != 'already archived':
|
||||
key = self. get_key(filename)
|
||||
cdn_url = self.get_cdn_url(key)
|
||||
key = self.get_key(filename)
|
||||
cdn_url = self.storage.get_cdn_url(key)
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
self.do_s3_upload(f, key)
|
||||
self.storage.upload(filename, key)
|
||||
|
||||
# get duration
|
||||
duration = info['duration'] if 'duration' in info else None
|
||||
|
||||
# get thumbnails
|
||||
key_thumb, thumb_index = self.get_thumbnails(filename, duration=duration)
|
||||
try:
|
||||
key_thumb, thumb_index = self.get_thumbnails(filename, duration=duration)
|
||||
except:
|
||||
key_thumb = ''
|
||||
thumb_index = 'Could not generate thumbnails'
|
||||
|
||||
os.remove(filename)
|
||||
|
||||
timestamp = info['timestamp'] if 'timestamp' in info else datetime.datetime.strptime(info['upload_date'], '%Y%m%d').timestamp() if 'upload_date' in info and info['upload_date'] is not None else None
|
||||
|
||||
return ArchiveResult(status=status, cdn_url=cdn_url, thumbnail=key_thumb, thumbnail_index=thumb_index, duration=duration,
|
||||
title=info['title'] if 'title' in info else None,
|
||||
timestamp=info['timestamp'] if 'timestamp' in info else datetime.datetime.strptime(info['upload_date'], '%Y%m%d').timestamp() if 'upload_date' in info else None)
|
||||
title=info['title'] if 'title' in info else None, timestamp=timestamp)
|
||||
|
|
|
@ -2,12 +2,13 @@ import os
|
|||
import datetime
|
||||
import argparse
|
||||
import math
|
||||
import requests
|
||||
import gspread
|
||||
import boto3
|
||||
from loguru import logger
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import archivers
|
||||
from storages import S3Storage, S3Config
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
@ -41,7 +42,7 @@ def index_to_col(index):
|
|||
return alphabet[index]
|
||||
|
||||
|
||||
def update_sheet(wks, row, result : archivers.ArchiveResult, columns, v):
|
||||
def update_sheet(wks, row, result: archivers.ArchiveResult, columns, v):
|
||||
update = []
|
||||
|
||||
if columns['status'] is not None:
|
||||
|
@ -103,19 +104,24 @@ def update_sheet(wks, row, result : archivers.ArchiveResult, columns, v):
|
|||
def process_sheet(sheet):
|
||||
gc = gspread.service_account(filename='service_account.json')
|
||||
sh = gc.open(sheet)
|
||||
n_worksheets = len(sh.worksheets())
|
||||
|
||||
s3_client = boto3.client('s3',
|
||||
region_name=os.getenv('DO_SPACES_REGION'),
|
||||
endpoint_url='https://{}.digitaloceanspaces.com'.format(
|
||||
os.getenv('DO_SPACES_REGION')),
|
||||
aws_access_key_id=os.getenv('DO_SPACES_KEY'),
|
||||
aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))
|
||||
s3_config = S3Config(
|
||||
bucket=os.getenv('DO_BUCKET'),
|
||||
region=os.getenv('DO_SPACES_REGION'),
|
||||
key=os.getenv('DO_SPACES_KEY'),
|
||||
secret=os.getenv('DO_SPACES_SECRET')
|
||||
)
|
||||
|
||||
# s3_client = boto3.client('s3',
|
||||
# region_name=os.getenv('DO_SPACES_REGION'),
|
||||
# endpoint_url='https://{}.digitaloceanspaces.com'.format(
|
||||
# os.getenv('DO_SPACES_REGION')),
|
||||
# aws_access_key_id=os.getenv('DO_SPACES_KEY'),
|
||||
# aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))
|
||||
|
||||
# loop through worksheets to check
|
||||
for ii in range(n_worksheets):
|
||||
logger.info("Opening worksheet " + str(ii))
|
||||
wks = sh.get_worksheet(ii)
|
||||
for ii, wks in enumerate(sh.worksheets()):
|
||||
logger.info(f'Opening worksheet {ii}: "{wks.title}"')
|
||||
values = wks.get_all_values()
|
||||
|
||||
headers = [v.lower() for v in values[0]]
|
||||
|
@ -126,7 +132,7 @@ def process_sheet(sheet):
|
|||
'source url')) if 'source url' in headers else None
|
||||
|
||||
if columns['url'] is None:
|
||||
logger.warning("No 'Media URL' column found, skipping")
|
||||
logger.warning(f'No "Media URL" column found, skipping worksheet {wks.title}')
|
||||
continue
|
||||
|
||||
url_index = col_to_index(columns['url'])
|
||||
|
@ -153,6 +159,9 @@ def process_sheet(sheet):
|
|||
columns['duration'] = index_to_col(headers.index(
|
||||
'duration')) if 'duration' in headers else None
|
||||
|
||||
# archives will be in a folder 'doc_name/worksheet_name'
|
||||
s3_config.folder = f'{sheet}/{wks.title}/'
|
||||
s3_client = S3Storage(s3_config)
|
||||
|
||||
# order matters, first to succeed excludes remaining
|
||||
active_archivers = [
|
||||
|
@ -162,37 +171,43 @@ def process_sheet(sheet):
|
|||
archivers.WaybackArchiver(s3_client)
|
||||
]
|
||||
|
||||
|
||||
# loop through rows in worksheet
|
||||
for i in range(2, len(values)+1):
|
||||
v = values[i-1]
|
||||
for i in range(2, len(values) + 1):
|
||||
v = values[i - 1]
|
||||
url = v[url_index]
|
||||
|
||||
if v[url_index] != "" and v[col_to_index(columns['status'])] == "":
|
||||
latest_val = wks.acell(
|
||||
columns['status'] + str(i)).value
|
||||
if url != "" and v[col_to_index(columns['status'])] == "":
|
||||
latest_val = wks.acell(columns['status'] + str(i)).value
|
||||
|
||||
# check so we don't step on each others' toes
|
||||
if latest_val == '' or latest_val is None:
|
||||
wks.update(
|
||||
columns['status'] + str(i), 'Archive in progress')
|
||||
wks.update(columns['status'] + str(i), 'Archive in progress')
|
||||
|
||||
# expand short URL links
|
||||
if 'https://t.co/' in url:
|
||||
r = requests.get(url)
|
||||
url = r.url
|
||||
|
||||
for archiver in active_archivers:
|
||||
logger.debug(f"Trying {archiver} on row {i}")
|
||||
result = archiver.download(v[url_index], check_if_exists=True)
|
||||
|
||||
result = archiver.download(url, check_if_exists=True)
|
||||
|
||||
if result:
|
||||
logger.info(f"{archiver} succeeded on row {i}")
|
||||
logger.success(f"{archiver} succeeded on row {i}")
|
||||
break
|
||||
|
||||
if result:
|
||||
update_sheet(wks, i, result, columns, v)
|
||||
else:
|
||||
wks.update(columns['status'] + str(i), 'failed: no archiver')
|
||||
|
||||
# except:
|
||||
# if any unexpected errors occured, log these into the Google Sheet
|
||||
# t, value, traceback = sys.exc_info()
|
||||
|
||||
# except:
|
||||
# if any unexpected errors occured, log these into the Google Sheet
|
||||
# t, value, traceback = sys.exc_info()
|
||||
|
||||
# update_sheet(wks, i, str(
|
||||
# value), {}, columns, v)
|
||||
# update_sheet(wks, i, str(
|
||||
# value), {}, columns, v)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
# we need to explicitly expose the available imports here
|
||||
from .base_storage import *
|
||||
from .s3_storage import *
|
|
@ -0,0 +1,19 @@
|
|||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Storage(ABC):
|
||||
@abstractmethod
|
||||
def __init__(self, config): pass
|
||||
|
||||
@abstractmethod
|
||||
def get_cdn_url(self, path): pass
|
||||
|
||||
@abstractmethod
|
||||
def exists(self, path): pass
|
||||
|
||||
@abstractmethod
|
||||
def uploadf(self, file, key, **kwargs): pass
|
||||
|
||||
def upload(self, filename: str, key: str, **kwargs):
|
||||
with open(filename, 'rb') as f:
|
||||
self.uploadf(f, key, **kwargs)
|
|
@ -0,0 +1,49 @@
|
|||
import boto3
|
||||
from botocore.errorfactory import ClientError
|
||||
from .base_storage import Storage
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class S3Config:
|
||||
bucket: str
|
||||
region: str
|
||||
key: str
|
||||
secret: str
|
||||
folder: str = ""
|
||||
|
||||
|
||||
class S3Storage(Storage):
|
||||
|
||||
def __init__(self, config: S3Config):
|
||||
self.bucket = config.bucket
|
||||
self.region = config.region
|
||||
self.folder = config.folder
|
||||
|
||||
if len(self.folder) and self.folder[-1] != '/':
|
||||
self.folder += '/'
|
||||
|
||||
self.s3 = boto3.client(
|
||||
's3',
|
||||
region_name=self.region,
|
||||
endpoint_url=f'https://{self.region}.digitaloceanspaces.com',
|
||||
aws_access_key_id=config.key,
|
||||
aws_secret_access_key=config.secret
|
||||
)
|
||||
|
||||
def _get_path(self, key):
|
||||
return self.folder + key
|
||||
|
||||
def get_cdn_url(self, key):
|
||||
return f'https://{self.bucket}.{self.region}.cdn.digitaloceanspaces.com/{self._get_path(key)}'
|
||||
|
||||
def exists(self, key):
|
||||
try:
|
||||
self.s3.head_object(Bucket=self.bucket, Key=self._get_path(key))
|
||||
return True
|
||||
except ClientError:
|
||||
return False
|
||||
|
||||
def uploadf(self, file, key, **kwargs):
|
||||
extra_args = kwargs["extra_args"] if "extra_args" in kwargs else {'ACL': 'public-read'}
|
||||
self.s3.upload_fileobj(file, Bucket=self.bucket, Key=self._get_path(key), ExtraArgs=extra_args)
|
Ładowanie…
Reference in New Issue