Pass media to storage cdn_call

pull/189/head
erinhmclark 2025-02-06 22:01:55 +00:00
rodzic 266c7a14e6
commit e9ad1e1b85
3 zmienionych plików z 49 dodań i 113 usunięć

Wyświetl plik

@ -65,7 +65,7 @@ class Media:
def is_stored(self, in_storage) -> bool:
# checks if the media is already stored in the given storage
return len(self.urls) > 0 and any([u for u in self.urls if in_storage.get_cdn_url() in u])
return len(self.urls) > 0 and any([u for u in self.urls if in_storage.get_cdn_url(self) in u])
def set(self, key: str, value: Any) -> Media:
self.properties[key] = value

Wyświetl plik

@ -74,7 +74,8 @@ class GDriveStorage(Storage):
parent_id = folder_id
# get id of file inside folder (or sub folder)
file_id = self._get_id_from_parent_and_name(folder_id, filename)
# TODO: supressing the error as being checked before first upload
file_id = self._get_id_from_parent_and_name(folder_id, filename, raise_on_missing=False)
return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing"
def upload(self, media: Media, **kwargs) -> bool:
@ -106,7 +107,13 @@ class GDriveStorage(Storage):
# must be implemented even if unused
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass
def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False):
def _get_id_from_parent_and_name(self, parent_id: str,
name: str,
retries: int = 1,
sleep_seconds: int = 10,
use_mime_type: bool = False,
raise_on_missing: bool = True,
use_cache=False):
"""
Retrieves the id of a folder or file from its @name and the @parent_id folder
Optionally does multiple @retries and sleeps @sleep_seconds between them

Wyświetl plik

@ -1,103 +1,11 @@
from typing import Type
import pytest
from unittest.mock import MagicMock, patch, PropertyMock
from unittest.mock import MagicMock, patch
from auto_archiver.core import Media
from auto_archiver.modules.hash_enricher import HashEnricher
from auto_archiver.modules.s3_storage import s3_storage
@patch('boto3.client')
@pytest.fixture
def s3_store(setup_module):
config: dict = {
"path_generator": "flat",
"filename_generator": "static",
"bucket": "test-bucket",
"region": "test-region",
"key": "test-key",
"secret": "test-secret",
"random_no_duplicate": False,
"endpoint_url": "https://{region}.example.com",
"cdn_url": "https://cdn.example.com/{key}",
"private": False,
}
s3_storage = setup_module("s3_storage", config)
return s3_storage
def test_client_initialization(s3_store):
"""Test that S3 client is initialized with correct parameters"""
assert s3_store.s3 is not None
assert s3_store.s3.meta.region_name == 'test-region'
def test_get_cdn_url_generation(s3_store):
"""Test CDN URL formatting """
media = Media("test.txt")
media.key = "path/to/file.txt"
url = s3_store.get_cdn_url(media)
assert url == "https://cdn.example.com/path/to/file.txt"
media.key = "another/path.jpg"
assert s3_store.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"
@patch.object(s3_storage.S3Storage, 'file_in_folder')
def test_skips_upload_when_duplicate_exists(mock_file_in_folder, s3_store):
"""Test that upload skips when file_in_folder finds existing object"""
# Setup test-specific configuration
s3_store.random_no_duplicate = True
mock_file_in_folder.return_value = "existing_folder/existing_file.txt"
# Create test media with calculated hash
media = Media("test.txt")
media.key = "original_path.txt"
# Mock hash calculation
with patch.object(s3_store, 'calculate_hash') as mock_calculate_hash:
mock_calculate_hash.return_value = "testhash123"
# Verify upload
assert s3_store.is_upload_needed(media) is False
assert media.key == "existing_folder/existing_file.txt"
assert media.get("previously archived") is True
with patch.object(s3_store.s3, 'upload_fileobj') as mock_upload:
result = s3_store.uploadf(None, media)
mock_upload.assert_not_called()
assert result is True
@patch.object(s3_storage.S3Storage, 'is_upload_needed')
def test_uploads_with_correct_parameters(mock_upload_needed, s3_store):
media = Media("test.txt")
mock_upload_needed.return_value = True
media.mimetype = 'image/png'
mock_file = MagicMock()
with patch.object(s3_store.s3, 'upload_fileobj') as mock_upload:
s3_store.uploadf(mock_file, media)
# Verify core upload parameters
mock_upload.assert_called_once_with(
mock_file,
Bucket='test-bucket',
# Key='original_key.txt',
Key=None,
ExtraArgs={
'ACL': 'public-read',
'ContentType': 'image/png'
}
)
# ============================================================
class TestGDriveStorage:
"""
Test suite for GDriveStorage.
@ -121,10 +29,9 @@ class TestGDriveStorage:
@patch('boto3.client')
@pytest.fixture(autouse=True)
def setup_storage(self, setup_module):
he = HashEnricher()
self.storage = setup_module(self.module_name, self.config)
def test_client_initialization(self, setup_storage):
def test_client_initialization(self):
"""Test that S3 client is initialized with correct parameters"""
assert self.storage.s3 is not None
assert self.storage.s3.meta.region_name == 'test-region'
@ -138,37 +45,55 @@ class TestGDriveStorage:
media.key = "another/path.jpg"
assert self.storage.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"
def test_uploadf_sets_acl_public(self):
media = Media("test.txt")
mock_file = MagicMock()
with patch.object(self.storage.s3, 'upload_fileobj') as mock_s3_upload, \
patch.object(self.storage, 'is_upload_needed', return_value=True):
self.storage.uploadf(mock_file, media)
mock_s3_upload.assert_called_once_with(
mock_file,
Bucket='test-bucket',
Key=media.key,
ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
)
def test_upload_decision_logic(self):
"""Test is_upload_needed under different conditions"""
media = Media("test.txt")
# Test random_no_duplicate disabled
# Test default state (random_no_duplicate=False)
assert self.storage.is_upload_needed(media) is True
# Set duplicate checking config to true:
# Test duplicate exists
self.storage.random_no_duplicate = True
with patch.object(self.storage, 'file_in_folder', return_value='existing.txt'):
with patch('auto_archiver.modules.hash_enricher.HashEnricher.calculate_hash') as mock_calc_hash, \
patch.object(self.storage, 'file_in_folder') as mock_file_in_folder:
mock_calc_hash.return_value = 'beepboop123beepboop123beepboop123'
mock_file_in_folder.return_value = 'existing_key.txt'
# Test duplicate result
assert self.storage.is_upload_needed(media) is False
assert media.key == 'existing.txt'
assert media.key == 'existing_key.txt'
mock_file_in_folder.assert_called_with(
# (first 24 chars of hash)
'no-dups/beepboop123beepboop123be'
)
@patch.object(s3_storage.S3Storage, 'file_in_folder')
def test_skips_upload_when_duplicate_exists(self, mock_file_in_folder):
"""Test that upload skips when file_in_folder finds existing object"""
# Setup test-specific configuration
self.storage.random_no_duplicate = True
mock_file_in_folder.return_value = "existing_folder/existing_file.txt"
# Create test media with calculated hash
media = Media("test.txt")
media.key = "original_path.txt"
# Mock hash calculation
with patch.object(self.storage, 'calculate_hash') as mock_calculate_hash:
mock_calculate_hash.return_value = "testhash123"
with patch('auto_archiver.modules.hash_enricher.HashEnricher.calculate_hash') as mock_calculate_hash:
mock_calculate_hash.return_value = "beepboop123beepboop123beepboop123"
# Verify upload
assert self.storage.is_upload_needed(media) is False
assert media.key == "existing_folder/existing_file.txt"
assert media.get("previously archived") is True
with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
result = self.storage.uploadf(None, media)
mock_upload.assert_not_called()
@ -177,21 +102,25 @@ class TestGDriveStorage:
@patch.object(s3_storage.S3Storage, 'is_upload_needed')
def test_uploads_with_correct_parameters(self, mock_upload_needed):
media = Media("test.txt")
media.key = "original_key.txt"
mock_upload_needed.return_value = True
media.mimetype = 'image/png'
mock_file = MagicMock()
with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
self.storage.uploadf(mock_file, media)
# Verify core upload parameters
# verify call occured with these params
mock_upload.assert_called_once_with(
mock_file,
Bucket='test-bucket',
# Key='original_key.txt',
Key=None,
Key='original_key.txt',
ExtraArgs={
'ACL': 'public-read',
'ContentType': 'image/png'
}
)
)
def test_file_in_folder_exists(self):
with patch.object(self.storage.s3, 'list_objects') as mock_list_objects:
mock_list_objects.return_value = {'Contents': [{'Key': 'path/to/file.txt'}]}
assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'