diff --git a/docs/other-commands.md b/docs/other-commands.md index 7a643aa..ada25a9 100644 --- a/docs/other-commands.md +++ b/docs/other-commands.md @@ -369,6 +369,29 @@ This defaults to outputting the downloaded file to the terminal. You can instead s3-credentials get-object my-bucket hello.txt -o /path/to/hello.txt +## get-objects + +`s3-credentials get-objects` can be used to download multiple files from a bucket at once. + +Without extra arguments, this downloads everything: + + s3-credentials get-objects my-bucket + +Files will be written to the current directory by default, preserving their directory structure from the bucket. + +To write to a different directory use `--output` or `-o`: + + s3-credentials get-objects my-bucket -o /path/to/output + +To download multiple specific files, add them as arguments to the command: + + s3-credentials get-objects my-bucket one.txt two.txt path/to/three.txt + +You can pass one or more `--pattern` or `-p` options to download files matching a specific pattern: + + s3-credentials get-objects my-bucket -p "*.txt" -p "static/*.css" + +Here the `*` wildcard will match any sequence of characters, including `/`. `?` will match a single character. ## set-cors-policy and get-cors-policy diff --git a/s3_credentials/cli.py b/s3_credentials/cli.py index 707bb29..1b3819a 100644 --- a/s3_credentials/cli.py +++ b/s3_credentials/cli.py @@ -4,11 +4,13 @@ import botocore import click import configparser from csv import DictWriter +import fnmatch import io import itertools import json import mimetypes import os +import pathlib import re import sys import textwrap @@ -1029,6 +1031,84 @@ def get_object(bucket, key, output, **boto_options): s3.download_fileobj(bucket, key, fp) +@cli.command() +@click.argument("bucket") +@click.argument( + "keys", + nargs=-1, + required=False, +) +@click.option( + "output", + "-o", + "--output", + type=click.Path(file_okay=False, dir_okay=True, writable=True, allow_dash=False), + help="Write to this directory instead of one matching the bucket name", +) +@click.option( + "patterns", + "-p", + "--pattern", + multiple=True, + help="Glob patterns for files to download, e.g. '*/*.js'", +) +@common_boto3_options +def get_objects(bucket, keys, output, patterns, **boto_options): + """ + Download multiple objects from an S3 bucket + + To download everything, run: + + s3-credentials get-objects my-bucket + + Files will be saved to a directory called my-bucket. Use -o dirname to save to a + different directory. + + To download specific keys, list them: + + s3-credentials get-objects my-bucket one.txt path/two.txt + + To download files matching a glob-style pattern, use: + + s3-credentials get-objects my-bucket --pattern '*/*.js' + """ + s3 = make_client("s3", **boto_options) + + # If user specified keys and no patterns, use the keys they specified + keys_to_download = list(keys) + + if (not keys) or patterns: + # Fetch all keys, then filter them if --pattern + all_keys = [ + obj["Key"] + for obj in paginate(s3, "list_objects_v2", "Contents", Bucket=bucket) + ] + if patterns: + filtered = [] + for pattern in patterns: + filtered.extend(fnmatch.filter(all_keys, pattern)) + keys_to_download.extend(filtered) + else: + keys_to_download.extend(all_keys) + + output_dir = pathlib.Path(output or ".") + if not output_dir.exists(): + output_dir.mkdir(parents=True) + + errors = [] + for key in keys_to_download: + # Ensure directory for key exists + key_dir = (output_dir / key).parent + if not key_dir.exists(): + key_dir.mkdir(parents=True) + try: + s3.download_file(bucket, key, str(output_dir / key)) + except botocore.exceptions.ClientError as e: + errors.append("Not found: {}".format(key)) + if errors: + raise click.ClickException("\n".join(errors)) + + @cli.command() @click.argument("bucket") @click.option( diff --git a/setup.py b/setup.py index b97e267..e2adc74 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,6 @@ setup( s3-credentials=s3_credentials.cli:cli """, install_requires=["click", "boto3"], - extras_require={"test": ["pytest", "pytest-mock", "cogapp"]}, + extras_require={"test": ["pytest", "pytest-mock", "cogapp", "moto[s3]"]}, python_requires=">=3.6", ) diff --git a/tests/conftest.py b/tests/conftest.py index a06da53..16641bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,8 @@ import boto3 import logging +import os import pytest +from moto import mock_s3 def pytest_addoption(parser): @@ -35,3 +37,23 @@ def pytest_collection_modifyitems(config, items): for item in items: if "integration" in item.keywords: item.add_marker(skip_slow) + + +@pytest.fixture(scope="function") +def aws_credentials(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + + +@pytest.fixture(scope="function") +def moto_s3(aws_credentials): + with mock_s3(): + client = boto3.client("s3", region_name="us-east-1") + client.create_bucket(Bucket="my-bucket") + for key in ("one.txt", "directory/two.txt", "directory/three.json"): + client.put_object(Bucket="my-bucket", Key=key, Body=key.encode("utf-8")) + yield client diff --git a/tests/test_s3_credentials.py b/tests/test_s3_credentials.py index b45d346..a225be7 100644 --- a/tests/test_s3_credentials.py +++ b/tests/test_s3_credentials.py @@ -2,6 +2,8 @@ import botocore from click.testing import CliRunner from s3_credentials.cli import cli import json +import os +import pathlib import pytest from unittest.mock import call, Mock from botocore.stub import Stubber @@ -1102,3 +1104,60 @@ def test_list_roles_csv(stub_iam_for_list_roles): " }\n" ']"\n' ) + + +@pytest.mark.parametrize( + "files,patterns,expected,error", + ( + # Without arguments return everything + (None, None, {"one.txt", "directory/two.txt", "directory/three.json"}, None), + # Positional arguments returns files + (["one.txt"], None, {"one.txt"}, None), + (["directory/two.txt"], None, {"directory/two.txt"}, None), + (["one.txt"], None, {"one.txt"}, None), + ( + ["directory/two.txt", "directory/three.json"], + None, + {"directory/two.txt", "directory/three.json"}, + None, + ), + # Invalid positional argument downloads file and shows error + ( + ["directory/two.txt", "directory/bad.json"], + None, + {"directory/two.txt"}, + "Not found: directory/bad.json", + ), + # --pattern returns files matching pattern + (None, ["*e.txt"], {"one.txt"}, None), + (None, ["*e.txt", "invalid-pattern"], {"one.txt"}, None), + (None, ["directory/*"], {"directory/two.txt", "directory/three.json"}, None), + # positional and patterns can be combined + (["one.txt"], ["directory/*.json"], {"one.txt", "directory/three.json"}, None), + ), +) +@pytest.mark.parametrize("output", (None, "out")) +def test_get_objects(moto_s3, output, files, patterns, expected, error): + runner = CliRunner() + with runner.isolated_filesystem(): + args = ["get-objects", "my-bucket"] + (files or []) + if patterns: + for pattern in patterns: + args.extend(["--pattern", pattern]) + if output: + args.extend(["--output", output]) + result = runner.invoke(cli, args, catch_exceptions=False) + if error: + assert result.exit_code != 0 + else: + assert result.exit_code == 0 + # Build list of all files in output directory using glob + output_dir = pathlib.Path(output or ".") + all_files = { + str(p.relative_to(output_dir)) + for p in output_dir.glob("**/*") + if p.is_file() + } + assert all_files == expected + if error: + assert error in result.output