diff --git a/storages/base_storage.py b/storages/base_storage.py index bfb6911..3d9e361 100644 --- a/storages/base_storage.py +++ b/storages/base_storage.py @@ -5,6 +5,7 @@ from pathlib import Path class Storage(ABC): TMP_FOLDER = "tmp/" + @abstractmethod def __init__(self, config): pass @@ -28,18 +29,18 @@ class Storage(ABC): and others not, but that all can call """ for k, v in kwargs.items(): - if k in self.get_allowed_properties(): + if k in self._get_allowed_properties(): setattr(self, k, v) else: logger.warning(f'[{self.__class__.__name__}] does not accept dynamic property "{k}"') - def get_allowed_properties(self): + def _get_allowed_properties(self): """ child classes should specify which properties they allow to be set """ return set(["subfolder"]) - def clean_path(self, folder, default="", add_forward_slash=True): + def _clean_path(self, folder, default="", add_forward_slash=True): if folder is None or type(folder) != str or len(folder.strip()) == 0: return default return str(Path(folder)) + ("/" if add_forward_slash else "") diff --git a/storages/gd_storage.py b/storages/gd_storage.py index 3d65519..f5f4066 100644 --- a/storages/gd_storage.py +++ b/storages/gd_storage.py @@ -12,15 +12,14 @@ import time @dataclass class GDConfig: root_folder_id: str + default_upload_folder_name: str = "default" class GDStorage(Storage): - DEFAULT_UPLOAD_FOLDER_NAME = "default" - def __init__(self, config: GDConfig): + self.default_upload_folder_name = config.default_upload_folder_name self.root_folder_id = config.root_folder_id - SCOPES = ['https://www.googleapis.com/auth/drive'] - creds = service_account.Credentials.from_service_account_file('service_account.json', scopes=SCOPES) + creds = service_account.Credentials.from_service_account_file('service_account.json', scopes=['https://www.googleapis.com/auth/drive']) self.service = build('drive', 'v3', credentials=creds) def get_cdn_url(self, key): @@ -28,150 +27,54 @@ class GDStorage(Storage): only support files saved in a folder for GD S3 supports folder and all stored in the root """ - self.subfolder = self.clean_path(self.subfolder, GDStorage.DEFAULT_UPLOAD_FOLDER_NAME, False) + self.subfolder = self._clean_path(self.subfolder, self.default_upload_folder_name, False) filename = key logger.debug(f'Looking for {self.subfolder} and filename: {filename} on GD') - # retry policy on Google Drive - try_again = True - counter = 1 - folder_id = None - while try_again: - # need to lookup the id of folder eg SM0002 which should be there already as this is get_cdn_url - results = self.service.files().list( - q=f"'{self.root_folder_id}' in parents and name = '{self.subfolder}' ", - spaces='drive', # ie not appDataFolder or photos - fields='files(id, name)' - ).execute() - items = results.get('files', []) + folder_id = self._get_id_from_parent_and_name(self.root_folder_id, self.subfolder, 5, 10) - for item in items: - logger.debug(f"found folder of {item['name']}") - folder_id = item['id'] - try_again = False - - if folder_id is None: - logger.debug(f'Cannot find {self.subfolder=} waiting and trying again {counter=}') - counter += 1 - time.sleep(10) - if counter > 18: - raise ValueError(f'Cannot find {self.subfolder} and retried 18 times pausing 10s at a time which is 3 minutes') - - # check for sub folder in file eg youtube_dl_sDE-qZdi8p8/index.html' - # happens doing thumbnails + # check for sub folder in file youtube_dl_abcde/index.html, needed for thumbnails + # a='youtube_dl_abcde', b='index.html' a, _, b = filename.partition('/') - if b != '': - # a: 'youtube_dl_sDE-qZdi8p8' - # b: 'index.html' - logger.debug(f'get_cdn_url: Found a subfolder so need to split on a: {a} and {b}') - - # get id of the sub folder - results = self.service.files().list( - q=f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and name = '{a}' ", - spaces='drive', # ie not appDataFolder or photos - fields='files(id, name)' - ).execute() - items = results.get('files', []) - - filename = None - for item in items: - folder_id = item['id'] - filename = b - if filename is None: - raise ValueError(f'Problem finding sub folder {a}') + logger.debug(f'get_cdn_url: Found a subfolder so need to split on: {a=} and {b=}') + folder_id = self._get_id_from_parent_and_name(folder_id, a, use_mime_type=True) + filename = b # get id of file inside folder (or sub folder) - results = self.service.files().list( - q=f"'{folder_id}' in parents and name = '{filename}' ", - spaces='drive', - fields='files(id, name)' - ).execute() - items = results.get('files', []) - - file_id = None - for item in items: - logger.debug(f"found file of {item['name']}") - file_id = item['id'] - - if file_id is None: - raise ValueError(f'Problem finding file {filename} in folder_id {folder_id}') - - foo = "https://drive.google.com/file/d/" + file_id + "/view?usp=sharing" - return foo + file_id = self._get_id_from_parent_and_name(folder_id, filename) + return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing" def exists(self, _key): - # TODO: How to check for google drive, as it accepts different names + # TODO: How to check for google drive, as it accepts different names? return False def uploadf(self, file, key, **_kwargs): - logger.debug(f"before {self.subfolder=}") - self.subfolder = self.clean_path(self.subfolder, GDStorage.DEFAULT_UPLOAD_FOLDER_NAME, False) + """ + 1. check if subfolder exists or create it + 2. check if key contains sub-subfolder, check if exists or create it + 3. upload file to root_id/subfolder[/sub-subfolder]/filename + """ + self.subfolder = self._clean_path(self.subfolder, GDStorage.DEFAULT_UPLOAD_FOLDER_NAME, False) filename = key - logger.debug(f"after {self.subfolder=}") - # does folder eg SM0005 exist already inside parent of Files auto-archiver - results = self.service.files().list( - q=f"'{self.root_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and name = '{self.subfolder}' ", - spaces='drive', - fields='files(id, name)' - ).execute() - items = results.get('files', []) - folder_id_to_upload_to = None - if len(items) > 1: - logger.error(f'Duplicate folder name of {self.subfolder} which should never happen, but continuing anyway') - - for item in items: - logger.debug(f"Found existing folder of {item['name']}") - folder_id_to_upload_to = item['id'] + # get id of subfolder or create if it does not exist + folder_id_to_upload_to = self._get_id_from_parent_and_name(self.root_folder_id, self.subfolder, use_mime_type=True, raise_on_missing=False) if folder_id_to_upload_to is None: - logger.debug(f'Creating new folder {self.subfolder}') - file_metadata = { - 'name': [self.subfolder], - 'mimeType': 'application/vnd.google-apps.folder', - 'parents': [self.root_folder_id] - } - gd_file = self.service.files().create(body=file_metadata, fields='id').execute() - folder_id_to_upload_to = gd_file.get('id') + folder_id_to_upload_to = self._mkdir(self.subfolder, self.root_folder_id) - # check for subfolder name in file eg youtube_dl_sDE-qZdi8p8/out1.jpg', eg: thumbnails - # will always return a and a blank b even if there is nothing to split - # https://stackoverflow.com/a/38149500/26086 + # check for sub folder in file youtube_dl_abcde/index.html, needed for thumbnails + # a='youtube_dl_abcde', b='index.html' a, _, b = filename.partition('/') - if b != '': - # a: 'youtube_dl_sDE-qZdi8p8' - # b: 'out1.jpg' - logger.debug(f'uploadf: Found a subfolder so need to split on a: {a} and {b}') - - # does the 'a' folder exist already in folder_id_to_upload_to eg SM0005 - results = self.service.files().list( - q=f"'{folder_id_to_upload_to}' in parents and mimeType='application/vnd.google-apps.folder' and name = '{a}' ", - spaces='drive', # ie not appDataFolder or photos - fields='files(id, name)' - ).execute() - items = results.get('files', []) - sub_folder_id_to_upload_to = None - if len(items) > 1: - logger.error(f'Duplicate folder name of {a} which should never happen') - - for item in items: - logger.debug(f"Found existing folder of {item['name']}") - sub_folder_id_to_upload_to = item['id'] - + logger.debug(f'uploadf: Found a subfolder so need to split on: {a=} and {b=}') + # get id of subfolder or create if it does not exist + sub_folder_id_to_upload_to = self._get_id_from_parent_and_name(folder_id_to_upload_to, a, use_mime_type=True, raise_on_missing=False) if sub_folder_id_to_upload_to is None: - # create new folder - file_metadata = { - 'name': [a], - 'mimeType': 'application/vnd.google-apps.folder', - 'parents': [folder_id_to_upload_to] - } - gd_file = self.service.files().create(body=file_metadata, fields='id').execute() - sub_folder_id_to_upload_to = gd_file.get('id') + sub_folder_id_to_upload_to = self._mkdir(a, folder_id_to_upload_to) filename = b folder_id_to_upload_to = sub_folder_id_to_upload_to - # back to normal control flow # upload file to gd file_metadata = { @@ -180,8 +83,55 @@ class GDStorage(Storage): } media = MediaFileUpload(file, resumable=True) gd_file = self.service.files().create(body=file_metadata, media_body=media, fields='id').execute() + logger.debug(f'uploadf: uploaded file {gd_file["id"]} succesfully in folder={folder_id_to_upload_to}') def upload(self, filename: str, key: str, **kwargs): # GD only requires the filename not a file reader logger.debug(f'[{self.__class__.__name__}] uploading file {filename} with key {key}') self.uploadf(filename, key, **kwargs) + + def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True): + """ + Retrieves the id of a folder or file from its @name and the @parent_id folder + Optionally does multiple @retries and sleeps @sleep_seconds between them + If @use_mime_type will restrict search to "mimeType='application/vnd.google-apps.folder'" + If @raise_on_missing will throw error when not found, or returns None + Returns the id of the file or folder from its name as a string + """ + debug_header: str = f"[searching {name=} in {parent_id=}]" + query_string = f"'{parent_id}' in parents and name = '{name}' " + if use_mime_type: + query_string += f" and mimeType='application/vnd.google-apps.folder' " + + for attempt in range(retries): + results = self.service.files().list( + q=query_string, + spaces='drive', # ie not appDataFolder or photos + fields='files(id, name)' + ).execute() + items = results.get('files', []) + + if len(items) > 0: + logger.debug(f"{debug_header} found {len(items)} matches, returning last of {','.join([i['id'] for i in items])}") + return items[-1]['id'] + else: + logger.debug(f'{debug_header} not found, attempt {attempt+1}/{retries}. sleeping for {sleep_seconds} second(s)') + if attempt < retries - 1: time.sleep(sleep_seconds) + + if raise_on_missing: + raise ValueError(f'{debug_header} not found after {retries} attempt(s)') + return None + + def _mkdir(self, name: str, parent_id: str): + """ + Creates a new GDrive folder @name inside folder @parent_id + Returns id of the created folder + """ + logger.debug(f'[_mkdir] Creating new folder with {name=} inside {parent_id=}') + file_metadata = { + 'name': [name], + 'mimeType': 'application/vnd.google-apps.folder', + 'parents': [parent_id] + } + gd_folder = self.service.files().create(body=file_metadata, fields='id').execute() + return gd_folder.get('id') diff --git a/storages/local_storage.py b/storages/local_storage.py index 0dcdaef..f93446b 100644 --- a/storages/local_storage.py +++ b/storages/local_storage.py @@ -4,12 +4,10 @@ from .base_storage import Storage class LocalStorage(Storage): def __init__(self, folder): - self.folder = folder - if len(self.folder) and self.folder[-1] != '/': - self.folder += '/' + self.folder = self._clean_path(folder) def get_cdn_url(self, key): - return self.folder + key + return self.folder + self._clean_path(self.subfolder) + key def exists(self, key): return os.path.isfile(self.get_cdn_url(key)) diff --git a/storages/s3_storage.py b/storages/s3_storage.py index c637e25..5e882b3 100644 --- a/storages/s3_storage.py +++ b/storages/s3_storage.py @@ -19,8 +19,8 @@ class S3Config: endpoint_url: str = "https://{region}.digitaloceanspaces.com" cdn_url: str = "https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}" private: bool = False - key_path: str = "default" - no_folder: bool = False # when true folders are not used for url path + key_path: str = "default" # 'default' uses full naming, 'random' uses generated uuid + no_folder: bool = False # when true folders are not used for url path class S3Storage(Storage): @@ -28,7 +28,7 @@ class S3Storage(Storage): def __init__(self, config: S3Config): self.bucket = config.bucket self.region = config.region - self.folder = self.clean_path(config.folder) + self.folder = self._clean_path(config.folder) self.private = config.private self.cdn_url = config.cdn_url self.key_path = config.key_path @@ -54,8 +54,7 @@ class S3Storage(Storage): ext = os.path.splitext(key)[1] self.key_dict[key] = f"{str(uuid.uuid4())}{ext}" final_key = self.key_dict[key] - return self.folder + final_key - return self.folder + self.clean_path(self.subfolder) + key + return self.folder + self._clean_path(self.subfolder) + final_key def get_cdn_url(self, key): return self.cdn_url.format(bucket=self.bucket, region=self.region, key=self._get_path(key))