kopia lustrzana https://github.com/bellingcat/auto-archiver
Pass media to storage cdn_call
rodzic
266c7a14e6
commit
e9ad1e1b85
|
@ -65,7 +65,7 @@ class Media:
|
|||
|
||||
def is_stored(self, in_storage) -> bool:
|
||||
# checks if the media is already stored in the given storage
|
||||
return len(self.urls) > 0 and any([u for u in self.urls if in_storage.get_cdn_url() in u])
|
||||
return len(self.urls) > 0 and any([u for u in self.urls if in_storage.get_cdn_url(self) in u])
|
||||
|
||||
def set(self, key: str, value: Any) -> Media:
|
||||
self.properties[key] = value
|
||||
|
|
|
@ -74,7 +74,8 @@ class GDriveStorage(Storage):
|
|||
parent_id = folder_id
|
||||
|
||||
# get id of file inside folder (or sub folder)
|
||||
file_id = self._get_id_from_parent_and_name(folder_id, filename)
|
||||
# TODO: supressing the error as being checked before first upload
|
||||
file_id = self._get_id_from_parent_and_name(folder_id, filename, raise_on_missing=False)
|
||||
return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing"
|
||||
|
||||
def upload(self, media: Media, **kwargs) -> bool:
|
||||
|
@ -106,7 +107,13 @@ class GDriveStorage(Storage):
|
|||
# must be implemented even if unused
|
||||
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass
|
||||
|
||||
def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False):
|
||||
def _get_id_from_parent_and_name(self, parent_id: str,
|
||||
name: str,
|
||||
retries: int = 1,
|
||||
sleep_seconds: int = 10,
|
||||
use_mime_type: bool = False,
|
||||
raise_on_missing: bool = True,
|
||||
use_cache=False):
|
||||
"""
|
||||
Retrieves the id of a folder or file from its @name and the @parent_id folder
|
||||
Optionally does multiple @retries and sleeps @sleep_seconds between them
|
||||
|
|
|
@ -1,103 +1,11 @@
|
|||
from typing import Type
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, PropertyMock
|
||||
from unittest.mock import MagicMock, patch
|
||||
from auto_archiver.core import Media
|
||||
from auto_archiver.modules.hash_enricher import HashEnricher
|
||||
from auto_archiver.modules.s3_storage import s3_storage
|
||||
|
||||
|
||||
@patch('boto3.client')
|
||||
@pytest.fixture
|
||||
def s3_store(setup_module):
|
||||
config: dict = {
|
||||
"path_generator": "flat",
|
||||
"filename_generator": "static",
|
||||
"bucket": "test-bucket",
|
||||
"region": "test-region",
|
||||
"key": "test-key",
|
||||
"secret": "test-secret",
|
||||
"random_no_duplicate": False,
|
||||
"endpoint_url": "https://{region}.example.com",
|
||||
"cdn_url": "https://cdn.example.com/{key}",
|
||||
"private": False,
|
||||
}
|
||||
s3_storage = setup_module("s3_storage", config)
|
||||
return s3_storage
|
||||
|
||||
def test_client_initialization(s3_store):
|
||||
"""Test that S3 client is initialized with correct parameters"""
|
||||
assert s3_store.s3 is not None
|
||||
assert s3_store.s3.meta.region_name == 'test-region'
|
||||
|
||||
|
||||
def test_get_cdn_url_generation(s3_store):
|
||||
"""Test CDN URL formatting """
|
||||
media = Media("test.txt")
|
||||
media.key = "path/to/file.txt"
|
||||
url = s3_store.get_cdn_url(media)
|
||||
assert url == "https://cdn.example.com/path/to/file.txt"
|
||||
media.key = "another/path.jpg"
|
||||
assert s3_store.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"
|
||||
|
||||
|
||||
@patch.object(s3_storage.S3Storage, 'file_in_folder')
|
||||
def test_skips_upload_when_duplicate_exists(mock_file_in_folder, s3_store):
|
||||
"""Test that upload skips when file_in_folder finds existing object"""
|
||||
# Setup test-specific configuration
|
||||
s3_store.random_no_duplicate = True
|
||||
mock_file_in_folder.return_value = "existing_folder/existing_file.txt"
|
||||
# Create test media with calculated hash
|
||||
media = Media("test.txt")
|
||||
media.key = "original_path.txt"
|
||||
|
||||
# Mock hash calculation
|
||||
with patch.object(s3_store, 'calculate_hash') as mock_calculate_hash:
|
||||
mock_calculate_hash.return_value = "testhash123"
|
||||
# Verify upload
|
||||
assert s3_store.is_upload_needed(media) is False
|
||||
assert media.key == "existing_folder/existing_file.txt"
|
||||
assert media.get("previously archived") is True
|
||||
|
||||
with patch.object(s3_store.s3, 'upload_fileobj') as mock_upload:
|
||||
result = s3_store.uploadf(None, media)
|
||||
mock_upload.assert_not_called()
|
||||
assert result is True
|
||||
|
||||
@patch.object(s3_storage.S3Storage, 'is_upload_needed')
|
||||
def test_uploads_with_correct_parameters(mock_upload_needed, s3_store):
|
||||
media = Media("test.txt")
|
||||
mock_upload_needed.return_value = True
|
||||
media.mimetype = 'image/png'
|
||||
mock_file = MagicMock()
|
||||
|
||||
with patch.object(s3_store.s3, 'upload_fileobj') as mock_upload:
|
||||
s3_store.uploadf(mock_file, media)
|
||||
|
||||
# Verify core upload parameters
|
||||
mock_upload.assert_called_once_with(
|
||||
mock_file,
|
||||
Bucket='test-bucket',
|
||||
# Key='original_key.txt',
|
||||
Key=None,
|
||||
ExtraArgs={
|
||||
'ACL': 'public-read',
|
||||
'ContentType': 'image/png'
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# ============================================================
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class TestGDriveStorage:
|
||||
"""
|
||||
Test suite for GDriveStorage.
|
||||
|
@ -121,10 +29,9 @@ class TestGDriveStorage:
|
|||
@patch('boto3.client')
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_storage(self, setup_module):
|
||||
he = HashEnricher()
|
||||
self.storage = setup_module(self.module_name, self.config)
|
||||
|
||||
def test_client_initialization(self, setup_storage):
|
||||
def test_client_initialization(self):
|
||||
"""Test that S3 client is initialized with correct parameters"""
|
||||
assert self.storage.s3 is not None
|
||||
assert self.storage.s3.meta.region_name == 'test-region'
|
||||
|
@ -138,37 +45,55 @@ class TestGDriveStorage:
|
|||
media.key = "another/path.jpg"
|
||||
assert self.storage.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"
|
||||
|
||||
def test_uploadf_sets_acl_public(self):
|
||||
media = Media("test.txt")
|
||||
mock_file = MagicMock()
|
||||
with patch.object(self.storage.s3, 'upload_fileobj') as mock_s3_upload, \
|
||||
patch.object(self.storage, 'is_upload_needed', return_value=True):
|
||||
self.storage.uploadf(mock_file, media)
|
||||
mock_s3_upload.assert_called_once_with(
|
||||
mock_file,
|
||||
Bucket='test-bucket',
|
||||
Key=media.key,
|
||||
ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
|
||||
)
|
||||
|
||||
def test_upload_decision_logic(self):
|
||||
"""Test is_upload_needed under different conditions"""
|
||||
media = Media("test.txt")
|
||||
|
||||
# Test random_no_duplicate disabled
|
||||
# Test default state (random_no_duplicate=False)
|
||||
assert self.storage.is_upload_needed(media) is True
|
||||
# Set duplicate checking config to true:
|
||||
|
||||
# Test duplicate exists
|
||||
self.storage.random_no_duplicate = True
|
||||
with patch.object(self.storage, 'file_in_folder', return_value='existing.txt'):
|
||||
with patch('auto_archiver.modules.hash_enricher.HashEnricher.calculate_hash') as mock_calc_hash, \
|
||||
patch.object(self.storage, 'file_in_folder') as mock_file_in_folder:
|
||||
mock_calc_hash.return_value = 'beepboop123beepboop123beepboop123'
|
||||
mock_file_in_folder.return_value = 'existing_key.txt'
|
||||
# Test duplicate result
|
||||
assert self.storage.is_upload_needed(media) is False
|
||||
assert media.key == 'existing.txt'
|
||||
assert media.key == 'existing_key.txt'
|
||||
mock_file_in_folder.assert_called_with(
|
||||
# (first 24 chars of hash)
|
||||
'no-dups/beepboop123beepboop123be'
|
||||
)
|
||||
|
||||
|
||||
@patch.object(s3_storage.S3Storage, 'file_in_folder')
|
||||
def test_skips_upload_when_duplicate_exists(self, mock_file_in_folder):
|
||||
"""Test that upload skips when file_in_folder finds existing object"""
|
||||
# Setup test-specific configuration
|
||||
self.storage.random_no_duplicate = True
|
||||
mock_file_in_folder.return_value = "existing_folder/existing_file.txt"
|
||||
# Create test media with calculated hash
|
||||
media = Media("test.txt")
|
||||
media.key = "original_path.txt"
|
||||
|
||||
# Mock hash calculation
|
||||
with patch.object(self.storage, 'calculate_hash') as mock_calculate_hash:
|
||||
mock_calculate_hash.return_value = "testhash123"
|
||||
with patch('auto_archiver.modules.hash_enricher.HashEnricher.calculate_hash') as mock_calculate_hash:
|
||||
mock_calculate_hash.return_value = "beepboop123beepboop123beepboop123"
|
||||
# Verify upload
|
||||
assert self.storage.is_upload_needed(media) is False
|
||||
assert media.key == "existing_folder/existing_file.txt"
|
||||
assert media.get("previously archived") is True
|
||||
|
||||
with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
|
||||
result = self.storage.uploadf(None, media)
|
||||
mock_upload.assert_not_called()
|
||||
|
@ -177,21 +102,25 @@ class TestGDriveStorage:
|
|||
@patch.object(s3_storage.S3Storage, 'is_upload_needed')
|
||||
def test_uploads_with_correct_parameters(self, mock_upload_needed):
|
||||
media = Media("test.txt")
|
||||
media.key = "original_key.txt"
|
||||
mock_upload_needed.return_value = True
|
||||
media.mimetype = 'image/png'
|
||||
mock_file = MagicMock()
|
||||
|
||||
with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
|
||||
self.storage.uploadf(mock_file, media)
|
||||
|
||||
# Verify core upload parameters
|
||||
# verify call occured with these params
|
||||
mock_upload.assert_called_once_with(
|
||||
mock_file,
|
||||
Bucket='test-bucket',
|
||||
# Key='original_key.txt',
|
||||
Key=None,
|
||||
Key='original_key.txt',
|
||||
ExtraArgs={
|
||||
'ACL': 'public-read',
|
||||
'ContentType': 'image/png'
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
def test_file_in_folder_exists(self):
|
||||
with patch.object(self.storage.s3, 'list_objects') as mock_list_objects:
|
||||
mock_list_objects.return_value = {'Contents': [{'Key': 'path/to/file.txt'}]}
|
||||
assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'
|
Ładowanie…
Reference in New Issue