adds gd_drive storage

pull/72/head
msramalho 2023-02-07 21:59:24 +00:00
rodzic 32a8db1223
commit 51a3134065
7 zmienionych plików z 298 dodań i 267 usunięć

134
Pipfile.lock wygenerowano
Wyświetl plik

@ -49,27 +49,27 @@
},
"beautifulsoup4": {
"hashes": [
"sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30",
"sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"
"sha256:0e79446b10b3ecb499c1556f7e228a53e64a2bfcebd455f370d8927cb5b59e39",
"sha256:bc4bdda6717de5a2987436fb8d72f45dc90dd856bdfd512a1314ce90349a0106"
],
"index": "pypi",
"version": "==4.11.1"
"version": "==4.11.2"
},
"boto3": {
"hashes": [
"sha256:4e876ba5d64928cde0c416dd844f04f22d6b73d14002bbc3ca55591f80f49927",
"sha256:c729bb0af76e85a2776b6bd3da8d9fa0f4b91b425eab51612aa53956f644ee23"
"sha256:3a1ffeecfe6e61d414617294b822b008e604ccfd83434c483f429a2922db314d",
"sha256:ebea98f3054b467caf6c8aead9f0ef78395a78bce78b04db12fde452c02b3734"
],
"index": "pypi",
"version": "==1.26.54"
"version": "==1.26.66"
},
"botocore": {
"hashes": [
"sha256:ca3ef7588daa664fe196d3234718db5f6b5dab961507500b4bb921e31133eea1",
"sha256:f2fe17ed6b8e163769a715f81cb6ce3d4628d172918de535256bdf34d29b704f"
"sha256:4d1ac019e677cc39e615f9d473fa658ea22a8d906c1c562f9406b5d0cd854cbd",
"sha256:772da07d2a49a9d2dc8d23e060e88eb72881e58074be7c813aa946ecdbd0e5b5"
],
"markers": "python_version >= '3.7'",
"version": "==1.29.54"
"version": "==1.29.66"
},
"brotli": {
"hashes": [
@ -168,11 +168,11 @@
},
"cachetools": {
"hashes": [
"sha256:5991bc0e08a1319bb618d3195ca5b6bc76646a49c21d55962977197b301cc1fe",
"sha256:8462eebf3a6c15d25430a8c27c56ac61340b2ecf60c9ce57afc2b97e450e47da"
"sha256:13dfddc7b8df938c21a940dfa6557ce6e94a2f1cdfa58eb90c805721d58f2c14",
"sha256:429e1a1e845c008ea6c85aa35d4b98b65d6a9763eeef3e37e92728a12d1de9d4"
],
"markers": "python_version ~= '3.7'",
"version": "==5.2.1"
"version": "==5.3.0"
},
"certifi": {
"hashes": [
@ -316,11 +316,11 @@
},
"dateparser": {
"hashes": [
"sha256:c47b6e4b8c4b2b2a21690111b6571b6991295ba327ec6503753abeebf5e80696",
"sha256:e703db1815270c020552f4b3e3a981937b48b2cbcfcef5347071b74788dd9214"
"sha256:fbed8b738a24c9cd7f47c4f2089527926566fe539e1a06125eddba75917b1eef",
"sha256:ff047d9cffad4d3113ead8ec0faf8a7fc43bab7d853ac8715e071312b53c465a"
],
"index": "pypi",
"version": "==1.1.6"
"version": "==1.1.7"
},
"exceptiongroup": {
"hashes": [
@ -371,11 +371,11 @@
},
"google-api-python-client": {
"hashes": [
"sha256:7e860e3ec27b504fb797fa23c07c012a874dd736491fddbe50a20d3bdde8ace6",
"sha256:bafce2a02b06ee501df039eba5874afc7d28c9cf5ef92253327776448706556d"
"sha256:42a44e9adfca6bb27540ce52348aa1d3b81e214bcc53d454a76ebfbe8eee1483",
"sha256:f18e9dbb365f0485194a8daf5d60da2cff6a80ce2c9a694efc2b279922cb3dd0"
],
"index": "pypi",
"version": "==2.73.0"
"version": "==2.77.0"
},
"google-auth": {
"hashes": [
@ -395,11 +395,11 @@
},
"google-auth-oauthlib": {
"hashes": [
"sha256:40cc612a13c3336d5433e94e2adb42a0c88f6feb6c55769e44500fc70043a576",
"sha256:81056a310fb1c4a3e5a7e1a443e1eb96593c6bbc55b26c0261e4d3295d3e6593"
"sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb",
"sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"
],
"index": "pypi",
"version": "==0.8.0"
"version": "==1.0.0"
},
"googleapis-common-protos": {
"hashes": [
@ -660,10 +660,11 @@
},
"mypy-extensions": {
"hashes": [
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
"sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d",
"sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"
],
"version": "==0.4.3"
"markers": "python_version >= '3.5'",
"version": "==1.0.0"
},
"oauth2client": {
"hashes": [
@ -768,35 +769,42 @@
},
"pycryptodomex": {
"hashes": [
"sha256:04610536921c1ec7adba158ef570348550c9f3a40bc24be9f8da2ef7ab387981",
"sha256:0ba28aa97cdd3ff5ed1a4f2b7f5cd04e721166bd75bd2b929e2734433882b583",
"sha256:0da835af786fdd1c9930994c78b23e88d816dc3f99aa977284a21bbc26d19735",
"sha256:1619087fb5b31510b0b0b058a54f001a5ffd91e6ffee220d9913064519c6a69d",
"sha256:1cda60207be8c1cf0b84b9138f9e3ca29335013d2b690774a5e94678ff29659a",
"sha256:22aed0868622d95179217c298e37ed7410025c7b29dac236d3230617d1e4ed56",
"sha256:231dc8008cbdd1ae0e34645d4523da2dbc7a88c325f0d4a59635a86ee25b41dd",
"sha256:2ad9bb86b355b6104796567dd44c215b3dc953ef2fae5e0bdfb8516731df92cf",
"sha256:4dbbe18cc232b5980c7633972ae5417d0df76fe89e7db246eefd17ef4d8e6d7a",
"sha256:6a465e4f856d2a4f2a311807030c89166529ccf7ccc65bef398de045d49144b6",
"sha256:70288d9bfe16b2fd0d20b6c365db614428f1bcde7b20d56e74cf88ade905d9eb",
"sha256:7993d26dae4d83b8f4ce605bb0aecb8bee330bb3c95475ef06f3694403621e71",
"sha256:8851585ff19871e5d69e1790f4ca5f6fd1699d6b8b14413b472a4c0dbc7ea780",
"sha256:893f8a97d533c66cc3a56e60dd3ed40a3494ddb4aafa7e026429a08772f8a849",
"sha256:8dd2d9e3c617d0712ed781a77efd84ea579e76c5f9b2a4bc0b684ebeddf868b2",
"sha256:a1c0ae7123448ecb034c75c713189cb00ebe2d415b11682865b6c54d200d9c93",
"sha256:b0789a8490114a2936ed77c87792cfe77582c829cb43a6d86ede0f9624ba8aa3",
"sha256:b3d04c00d777c36972b539fb79958790126847d84ec0129fce1efef250bfe3ce",
"sha256:ba57ac7861fd2c837cdb33daf822f2a052ff57dd769a2107807f52a36d0e8d38",
"sha256:ce338a9703f54b2305a408fc9890eb966b727ce72b69f225898bb4e9d9ed3f1f",
"sha256:daa67f5ebb6fbf1ee9c90decaa06ca7fc88a548864e5e484d52b0920a57fe8a5",
"sha256:e2453162f473c1eae4826eb10cd7bce19b5facac86d17fb5f29a570fde145abd",
"sha256:e25a2f5667d91795f9417cb856f6df724ccdb0cdd5cbadb212ee9bf43946e9f8",
"sha256:e5a670919076b71522c7d567a9043f66f14b202414a63c3a078b5831ae342c03",
"sha256:e9ba9d8ed638733c9e95664470b71d624a6def149e2db6cc52c1aca5a6a2df1d",
"sha256:f2b971a7b877348a27dcfd0e772a0343fb818df00b74078e91c008632284137d"
"sha256:0af93aad8d62e810247beedef0261c148790c52f3cd33643791cc6396dd217c1",
"sha256:12056c38e49d972f9c553a3d598425f8a1c1d35b2e4330f89d5ff1ffb70de041",
"sha256:23d83b610bd97704f0cd3acc48d99b76a15c8c1540d8665c94d514a49905bad7",
"sha256:2d4d395f109faba34067a08de36304e846c791808524614c731431ee048fe70a",
"sha256:32e764322e902bbfac49ca1446604d2839381bbbdd5a57920c9daaf2e0b778df",
"sha256:3c2516b42437ae6c7a29ef3ddc73c8d4714e7b6df995b76be4695bbe4b3b5cd2",
"sha256:40e8a11f578bd0851b02719c862d55d3ee18d906c8b68a9c09f8c564d6bb5b92",
"sha256:4b51e826f0a04d832eda0790bbd0665d9bfe73e5a4d8ea93b6a9b38beeebe935",
"sha256:4c4674f4b040321055c596aac926d12f7f6859dfe98cd12f4d9453b43ab6adc8",
"sha256:55eed98b4150a744920597c81b3965b632038781bab8a08a12ea1d004213c600",
"sha256:599bb4ae4bbd614ca05f49bd4e672b7a250b80b13ae1238f05fd0f09d87ed80a",
"sha256:5c23482860302d0d9883404eaaa54b0615eefa5274f70529703e2c43cc571827",
"sha256:64b876d57cb894b31056ad8dd6a6ae1099b117ae07a3d39707221133490e5715",
"sha256:67a3648025e4ddb72d43addab764336ba2e670c8377dba5dd752e42285440d31",
"sha256:6feedf4b0e36b395329b4186a805f60f900129cdf0170e120ecabbfcb763995d",
"sha256:78f0ddd4adc64baa39b416f3637aaf99f45acb0bcdc16706f0cc7ebfc6f10109",
"sha256:7a6651a07f67c28b6e978d63aa3a3fccea0feefed9a8453af3f7421a758461b7",
"sha256:7a8dc3ee7a99aae202a4db52de5a08aa4d01831eb403c4d21da04ec2f79810db",
"sha256:7cc28dd33f1f3662d6da28ead4f9891035f63f49d30267d3b41194c8778997c8",
"sha256:7fa0b52df90343fafe319257b31d909be1d2e8852277fb0376ba89d26d2921db",
"sha256:88b0d5bb87eaf2a31e8a759302b89cf30c97f2f8ca7d83b8c9208abe8acb447a",
"sha256:a4fa037078e92c7cc49f6789a8bac3de06856740bb2038d05f2d9a2e4b165d59",
"sha256:a57e3257bacd719769110f1f70dd901c5b6955e9596ad403af11a3e6e7e3311c",
"sha256:ab33c2d9f275e05e235dbca1063753b5346af4a5cac34a51fa0da0d4edfb21d7",
"sha256:c84689c73358dfc23f9fdcff2cb9e7856e65e2ce3b5ed8ff630d4c9bdeb1867b",
"sha256:c92537b596bd5bffb82f8964cabb9fef1bca8a28a9e0a69ffd3ec92a4a7ad41b",
"sha256:caa937ff29d07a665dfcfd7a84f0d4207b2ebf483362fa9054041d67fdfacc20",
"sha256:d38ab9e53b1c09608ba2d9b8b888f1e75d6f66e2787e437adb1fecbffec6b112",
"sha256:d4cf0128da167562c49b0e034f09e9cedd733997354f2314837c2fa461c87bb1",
"sha256:db23d7341e21b273d2440ec6faf6c8b1ca95c8894da612e165be0b89a8688340",
"sha256:ee8bf4fdcad7d66beb744957db8717afc12d176e3fd9c5d106835133881a049b",
"sha256:f854c8476512cebe6a8681cc4789e4fcff6019c17baa0fd72b459155dc605ab4",
"sha256:fd29d35ac80755e5c0a99d96b44fb9abbd7e871849581ea6a4cb826d24267537"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==3.16.0"
"version": "==3.17"
},
"pygments": {
"hashes": [
@ -840,11 +848,11 @@
},
"python-slugify": {
"hashes": [
"sha256:003aee64f9fd955d111549f96c4b58a3f40b9319383c70fad6277a4974bbf570",
"sha256:7a0f21a39fa6c1c4bf2e5984c9b9ae944483fd10b54804cb0e23a3ccd4954f0b"
"sha256:51f217508df20a6c166c7821683384b998560adcf8f19a6c2ca8b460528ccd9c",
"sha256:f1da83f3c7ab839b3f84543470cd95bdb5a81f1a0b80fed502f78b7dca256062"
],
"index": "pypi",
"version": "==7.0.0"
"version": "==8.0.0"
},
"python-twitter-v2": {
"hashes": [
@ -1035,11 +1043,11 @@
},
"rich": {
"hashes": [
"sha256:7c963f0d03819221e9ac561e1bc866e3f95a02248c1234daa48954e6d381c003",
"sha256:f1a00cdd3eebf999a15d85ec498bfe0b1a77efe9b34f645768a54132ef444ac5"
"sha256:125d96d20c92b946b983d0d392b84ff945461e5a06d3867e9f9e575f8697b67f",
"sha256:8aa57747f3fc3e977684f0176a88e789be314a99f99b43b75d1e9cb5dc6db9e9"
],
"markers": "python_version >= '3.7'",
"version": "==13.2.0"
"version": "==13.3.1"
},
"rsa": {
"hashes": [
@ -1059,11 +1067,11 @@
},
"selenium": {
"hashes": [
"sha256:06a1c7d9f313130b21c3218ddd8852070d0e7419afdd31f96160cd576555a5ce",
"sha256:3aefa14a28a42e520550c1cd0f29cf1d566328186ea63aa9a3e01fb265b5894d"
"sha256:20f28ee4ea9b273b4112a7df5276ebb3052f79ff6eff42a564db6143e5926683",
"sha256:fee36724d6cf0b18c73781bb8ec7be4a35ab1e2564e64e64e64da75e50e052af"
],
"index": "pypi",
"version": "==4.7.2"
"version": "==4.8.0"
},
"six": {
"hashes": [
@ -1106,11 +1114,11 @@
},
"telethon": {
"hashes": [
"sha256:3ec7ea04e61e0179dd08b974b609814e1a5298eeda3d68368a34bba754f43aec",
"sha256:d894f6ef2bf2cb119f6413b9f620957503785bab0999694b4bf67dea36f8ee09"
"sha256:21fb26051adc521a4a00a157e6f4a9e87711940ac3504414f96e66056918ef61",
"sha256:39ae3c3335ddd5acc80e395969f27556df140a73e58e9d3bb45863c766c23a8c"
],
"index": "pypi",
"version": "==1.26.1"
"version": "==1.27.0"
},
"text-unidecode": {
"hashes": [

Wyświetl plik

@ -1,4 +1,5 @@
import os.path
import click, json
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
@ -6,27 +7,41 @@ from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# If creating for first time download the OAuth Client Ids json `credentials.json` from https://console.cloud.google.com/apis/credentials OAuth 2.0 Client IDs
# add "http://localhost:55192/" to the list of "Authorised redirect URIs"
# https://davemateer.com/2022/04/28/google-drive-with-python for more information
# You can run this code to get a new token and verify it belongs to the correct user
# This token will be refresh automatically by the auto-archiver
# Code below from https://developers.google.com/drive/api/quickstart/python
SCOPES = ['https://www.googleapis.com/auth/drive']
def main():
token_file = 'gd-token.json'
creds = None
@click.command(
help="script to generate Google Drive OAuth token to use gdrive_storage, requires credentials.json and outputs gd-token.json, if you don't have credentials.json go to https://console.cloud.google.com/apis/credentials. Be sure to add 'http://localhost:55192/' to the Authorized redirect URIs in your OAuth App. More info: https://davemateer.com/2022/04/28/google-drive-with-python"
)
@click.option(
"--credentials",
"-c",
type=click.Path(exists=True),
help="path to the credentials.json file downloaded from https://console.cloud.google.com/apis/credentials",
required=True
)
@click.option(
"--token",
"-t",
type=click.Path(exists=False),
default="gd-token.json",
help="file where to place the OAuth token, defaults to gd-token.json which you must then move to where your orchestration file points to, defaults to gd-token.json",
required=True
)
def main(credentials, token):
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists(token_file):
creds = Credentials.from_authorized_user_file(token_file, SCOPES)
# created automatically when the authorization flow completes for the first time.
creds = None
if os.path.exists(token):
with open(token, 'r') as stream:
creds_json = json.load(stream)
# creds = Credentials.from_authorized_user_file(creds_json, SCOPES)
creds_json['refresh_token'] = creds_json.get("refresh_token", "")
creds = Credentials.from_authorized_user_info(creds_json, SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
@ -36,10 +51,10 @@ def main():
else:
print('First run through so putting up login dialog')
# credentials.json downloaded from https://console.cloud.google.com/apis/credentials
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
flow = InstalledAppFlow.from_client_secrets_file(credentials, SCOPES)
creds = flow.run_local_server(port=55192)
# Save the credentials for the next run
with open(token_file, 'w') as token:
with open(token, 'w') as token:
print('Saving new token')
token.write(creds.to_json())
else:

Wyświetl plik

@ -1,3 +1,4 @@
from .storage import Storage
from .s3 import S3Storage
from .local import LocalStorage
from .local import LocalStorage
from .gd import GDriveStorage

Wyświetl plik

@ -0,0 +1,192 @@
import shutil, os, time, json
from typing import IO
from loguru import logger
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.oauth2 import service_account
from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from ..core import Media
from . import Storage
class GDriveStorage(Storage):
name = "gdrive_storage"
def __init__(self, config: dict) -> None:
super().__init__(config)
SCOPES = ['https://www.googleapis.com/auth/drive']
if self.oauth_token is not None:
"""
Tokens are refreshed after 1 hour
however keep working for 7 days (tbc)
so as long as the job doesn't last for 7 days
then this method of refreshing only once per run will work
see this link for details on the token
https://davemateer.com/2022/04/28/google-drive-with-python#tokens
"""
logger.debug(f'Using GD OAuth token {self.oauth_token}')
# workaround for missing 'refresh_token' in from_authorized_user_file
with open(self.oauth_token, 'r') as stream:
creds_json = json.load(stream)
creds_json['refresh_token'] = creds_json.get("refresh_token", "")
creds = Credentials.from_authorized_user_info(creds_json, SCOPES)
# creds = Credentials.from_authorized_user_file(self.oauth_token, SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
logger.debug('Requesting new GD OAuth token')
creds.refresh(Request())
else:
raise Exception("Problem with creds - create the token again")
# Save the credentials for the next run
with open(self.oauth_token, 'w') as token:
logger.debug('Saving new GD OAuth token')
token.write(creds.to_json())
else:
logger.debug('GD OAuth Token valid')
else:
gd_service_account = config.service_account
logger.debug(f'Using GD Service Account {gd_service_account}')
creds = service_account.Credentials.from_service_account_file(gd_service_account, scopes=SCOPES)
self.service = build('drive', 'v3', credentials=creds)
@staticmethod
def configs() -> dict:
return dict(
Storage.configs(),
** {
"root_folder_id": {"default": None, "help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'"},
"oauth_token": {"default": None, "help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account."},
"service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path, same as used for Google Sheets. NOTE: storage used will count towards the developer account."},
})
def get_cdn_url(self, media: Media) -> str:
"""
only support files saved in a folder for GD
S3 supports folder and all stored in the root
"""
# full_name = os.path.join(self.folder, media.key)
parent_id, folder_id = self.root_folder_id, None
path_parts = media.key.split(os.path.sep)
filename = path_parts[-1]
logger.info(f"looking for folders for {path_parts[0:-1]} before getting url for {filename=}")
for folder in path_parts[0:-1]:
folder_id = self._get_id_from_parent_and_name(parent_id, folder, use_mime_type=True, raise_on_missing=True)
parent_id = folder_id
# get id of file inside folder (or sub folder)
file_id = self._get_id_from_parent_and_name(folder_id, filename)
return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing"
def upload(self, media: Media, **kwargs) -> bool:
# override parent so that we can use shutil.copy2 and keep metadata
dest = os.path.join(self.save_to, media.key)
os.makedirs(os.path.dirname(dest), exist_ok=True)
logger.debug(f'[{self.__class__.name}] storing file {media.filename} with key {media.key} to {dest}')
res = shutil.copy2(media.filename, dest)
logger.info(res)
return True
def upload(self, media: Media, **kwargs) -> bool:
logger.debug(f'[{self.__class__.name}] storing file {media.filename} with key {media.key}')
"""
1. for each sub-folder in the path check if exists or create
2. upload file to root_id/other_paths.../filename
"""
parent_id, upload_to = self.root_folder_id, None
path_parts = media.key.split(os.path.sep)
filename = path_parts[-1]
logger.info(f"checking folders {path_parts[0:-1]} exist (or creating) before uploading {filename=}")
for folder in path_parts[0:-1]:
upload_to = self._get_id_from_parent_and_name(parent_id, folder, use_mime_type=True, raise_on_missing=False)
if upload_to is None:
upload_to = self._mkdir(folder, parent_id)
parent_id = upload_to
# upload file to gd
logger.debug(f'uploading {filename=} to folder id {upload_to}')
file_metadata = {
'name': [filename],
'parents': [upload_to]
}
media = MediaFileUpload(media.filename, resumable=True)
gd_file = self.service.files().create(body=file_metadata, media_body=media, fields='id').execute()
logger.debug(f'uploadf: uploaded file {gd_file["id"]} successfully in folder={upload_to}')
# must be implemented even if unused
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass
def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False):
"""
Retrieves the id of a folder or file from its @name and the @parent_id folder
Optionally does multiple @retries and sleeps @sleep_seconds between them
If @use_mime_type will restrict search to "mimeType='application/vnd.google-apps.folder'"
If @raise_on_missing will throw error when not found, or returns None
Will remember previous calls to avoid duplication if @use_cache - might not have all edge cases tested, so use at own risk
Returns the id of the file or folder from its name as a string
"""
# cache logic
if use_cache:
self.api_cache = getattr(self, "api_cache", {})
cache_key = f"{parent_id}_{name}_{use_mime_type}"
if cache_key in self.api_cache:
logger.debug(f"cache hit for {cache_key=}")
return self.api_cache[cache_key]
# API logic
debug_header: str = f"[searching {name=} in {parent_id=}]"
query_string = f"'{parent_id}' in parents and name = '{name}' and trashed = false "
if use_mime_type:
query_string += f" and mimeType='application/vnd.google-apps.folder' "
for attempt in range(retries):
results = self.service.files().list(
q=query_string,
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
items = results.get('files', [])
if len(items) > 0:
logger.debug(f"{debug_header} found {len(items)} matches, returning last of {','.join([i['id'] for i in items])}")
_id = items[-1]['id']
if use_cache: self.api_cache[cache_key] = _id
return _id
else:
logger.debug(f'{debug_header} not found, attempt {attempt+1}/{retries}.')
if attempt < retries - 1:
logger.debug(f'sleeping for {sleep_seconds} second(s)')
time.sleep(sleep_seconds)
if raise_on_missing:
raise ValueError(f'{debug_header} not found after {retries} attempt(s)')
return None
def _mkdir(self, name: str, parent_id: str):
"""
Creates a new GDrive folder @name inside folder @parent_id
Returns id of the created folder
"""
logger.debug(f'Creating new folder with {name=} inside {parent_id=}')
file_metadata = {
'name': [name],
'mimeType': 'application/vnd.google-apps.folder',
'parents': [parent_id]
}
gd_folder = self.service.files().create(body=file_metadata, fields='id').execute()
return gd_folder.get('id')
# def exists(self, key):
# try:
# self.get_cdn_url(key)
# return True
# except: return False

Wyświetl plik

@ -1,181 +0,0 @@
#TODO: refactor GDriveStorage before merging to main
# import os, time
# from loguru import logger
# from .base_storage import Storage
# from dataclasses import dataclass
# from googleapiclient.discovery import build
# from googleapiclient.http import MediaFileUpload
# from google.oauth2 import service_account
# from google.oauth2.credentials import Credentials
# from google.auth.transport.requests import Request
# @dataclass
# class GDConfig:
# root_folder_id: str
# oauth_token_filename: str
# service_account: str = "service_account.json"
# folder: str = "default"
# class GDStorage(Storage):
# def __init__(self, config: GDConfig):
# self.folder = config.folder
# self.root_folder_id = config.root_folder_id
# SCOPES=['https://www.googleapis.com/auth/drive']
# token_file = config.oauth_token_filename
# if token_file is not None:
# """
# Tokens are refreshed after 1 hour
# however keep working for 7 days (tbc)
# so as long as the job doesn't last for 7 days
# then this method of refreshing only once per run will work
# see this link for details on the token
# https://davemateer.com/2022/04/28/google-drive-with-python#tokens
# """
# logger.debug(f'Using GD OAuth token {token_file}')
# creds = Credentials.from_authorized_user_file(token_file, SCOPES)
# if not creds or not creds.valid:
# if creds and creds.expired and creds.refresh_token:
# logger.debug('Requesting new GD OAuth token')
# creds.refresh(Request())
# else:
# raise Exception("Problem with creds - create the token again")
# # Save the credentials for the next run
# with open(token_file, 'w') as token:
# logger.debug('Saving new GD OAuth token')
# token.write(creds.to_json())
# else:
# logger.debug('GD OAuth Token valid')
# else:
# gd_service_account = config.service_account
# logger.debug(f'Using GD Service Account {gd_service_account}')
# creds = service_account.Credentials.from_service_account_file(gd_service_account, scopes=SCOPES)
# self.service = build('drive', 'v3', credentials=creds)
# def get_cdn_url(self, key):
# """
# only support files saved in a folder for GD
# S3 supports folder and all stored in the root
# """
# key = self.clean_key(key)
# full_name = os.path.join(self.folder, key)
# parent_id, folder_id = self.root_folder_id, None
# path_parts = full_name.split(os.path.sep)
# filename = path_parts[-1]
# logger.info(f"looking for folders for {path_parts[0:-1]} before uploading {filename=}")
# for folder in path_parts[0:-1]:
# folder_id = self._get_id_from_parent_and_name(parent_id, folder, use_mime_type=True, raise_on_missing=True)
# parent_id = folder_id
# # get id of file inside folder (or sub folder)
# file_id = self._get_id_from_parent_and_name(folder_id, filename)
# return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing"
# def exists(self, key):
# try:
# self.get_cdn_url(key)
# return True
# except: return False
# def uploadf(self, file: str, key: str, **_kwargs):
# """
# 1. for each sub-folder in the path check if exists or create
# 2. upload file to root_id/other_paths.../filename
# """
# key = self.clean_key(key)
# full_name = os.path.join(self.folder, key)
# parent_id, upload_to = self.root_folder_id, None
# path_parts = full_name.split(os.path.sep)
# filename = path_parts[-1]
# logger.info(f"checking folders {path_parts[0:-1]} exist (or creating) before uploading {filename=}")
# for folder in path_parts[0:-1]:
# upload_to = self._get_id_from_parent_and_name(parent_id, folder, use_mime_type=True, raise_on_missing=False)
# if upload_to is None:
# upload_to = self._mkdir(folder, parent_id)
# parent_id = upload_to
# # upload file to gd
# logger.debug(f'uploading {filename=} to folder id {upload_to}')
# file_metadata = {
# 'name': [filename],
# 'parents': [upload_to]
# }
# media = MediaFileUpload(file, resumable=True)
# gd_file = self.service.files().create(body=file_metadata, media_body=media, fields='id').execute()
# logger.debug(f'uploadf: uploaded file {gd_file["id"]} succesfully in folder={upload_to}')
# def upload(self, filename: str, key: str, **kwargs):
# # GD only requires the filename not a file reader
# self.uploadf(filename, key, **kwargs)
# # gets the Drive folderID if it is there
# def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False):
# """
# Retrieves the id of a folder or file from its @name and the @parent_id folder
# Optionally does multiple @retries and sleeps @sleep_seconds between them
# If @use_mime_type will restrict search to "mimeType='application/vnd.google-apps.folder'"
# If @raise_on_missing will throw error when not found, or returns None
# Will remember previous calls to avoid duplication if @use_cache - might not have all edge cases tested, so use at own risk
# Returns the id of the file or folder from its name as a string
# """
# # cache logic
# if use_cache:
# self.api_cache = getattr(self, "api_cache", {})
# cache_key = f"{parent_id}_{name}_{use_mime_type}"
# if cache_key in self.api_cache:
# logger.debug(f"cache hit for {cache_key=}")
# return self.api_cache[cache_key]
# # API logic
# debug_header: str = f"[searching {name=} in {parent_id=}]"
# query_string = f"'{parent_id}' in parents and name = '{name}' and trashed = false "
# if use_mime_type:
# query_string += f" and mimeType='application/vnd.google-apps.folder' "
# for attempt in range(retries):
# results = self.service.files().list(
# q=query_string,
# spaces='drive', # ie not appDataFolder or photos
# fields='files(id, name)'
# ).execute()
# items = results.get('files', [])
# if len(items) > 0:
# logger.debug(f"{debug_header} found {len(items)} matches, returning last of {','.join([i['id'] for i in items])}")
# _id = items[-1]['id']
# if use_cache: self.api_cache[cache_key] = _id
# return _id
# else:
# logger.debug(f'{debug_header} not found, attempt {attempt+1}/{retries}.')
# if attempt < retries - 1:
# logger.debug(f'sleeping for {sleep_seconds} second(s)')
# time.sleep(sleep_seconds)
# if raise_on_missing:
# raise ValueError(f'{debug_header} not found after {retries} attempt(s)')
# return None
# def _mkdir(self, name: str, parent_id: str):
# """
# Creates a new GDrive folder @name inside folder @parent_id
# Returns id of the created folder
# """
# logger.debug(f'Creating new folder with {name=} inside {parent_id=}')
# file_metadata = {
# 'name': [name],
# 'mimeType': 'application/vnd.google-apps.folder',
# 'parents': [parent_id]
# }
# gd_folder = self.service.files().create(body=file_metadata, fields='id').execute()
# return gd_folder.get('id')

Wyświetl plik

@ -1,12 +1,9 @@
import shutil
from typing import IO, Any
import boto3, uuid, os, mimetypes
from botocore.errorfactory import ClientError
from typing import IO
import os
from loguru import logger
from slugify import slugify
from ..core import Metadata
from ..core import Media
from ..storages import Storage
@ -28,7 +25,7 @@ class LocalStorage(Storage):
})
def get_cdn_url(self, media: Media) -> str:
#TODO: is this viable with Storage.configs on path/filename?
# TODO: is this viable with Storage.configs on path/filename?
dest = os.path.join(self.save_to, media.key)
if self.save_absolute:
dest = os.path.abspath(dest)

Wyświetl plik

@ -41,7 +41,6 @@ class S3Storage(Storage):
"help": "S3 CDN url, {bucket}, {region} and {key} are inserted at runtime"
},
"private": {"default": False, "help": "if true S3 files will not be readable online"},
# "key_path": {"default": "random", "help": "S3 file names are non-predictable strings, one of ['random', 'default']"},
})
def get_cdn_url(self, media: Media) -> str: