Initial get-objects command, refs #78

2022-09-15 10:06:53 -07:00 · 2022-09-15 10:06:53 -07:00 · 1a2325e33b
commit 1a2325e33b
--- a/docs/other-commands.md
+++ b/docs/other-commands.md
@ -369,6 +369,29 @@ This defaults to outputting the downloaded file to the terminal. You can instead

    s3-credentials get-object my-bucket hello.txt -o /path/to/hello.txt

+## get-objects
+
+`s3-credentials get-objects` can be used to download multiple files from a bucket at once.
+
+Without extra arguments, this downloads everything:
+
+    s3-credentials get-objects my-bucket
+
+Files will be written to the current directory by default, preserving their directory structure from the bucket.
+
+To write to a different directory use `--output` or `-o`:
+
+    s3-credentials get-objects my-bucket -o /path/to/output
+
+To download multiple specific files, add them as arguments to the command:
+
+    s3-credentials get-objects my-bucket one.txt two.txt path/to/three.txt
+
+You can pass one or more `--pattern` or `-p` options to download files matching a specific pattern:
+
+    s3-credentials get-objects my-bucket -p "*.txt" -p "static/*.css"
+
+Here the `*` wildcard will match any sequence of characters, including `/`. `?` will match a single character.

 ## set-cors-policy and get-cors-policy

--- a/s3_credentials/cli.py
+++ b/s3_credentials/cli.py
@ -4,11 +4,13 @@ import botocore
 import click
 import configparser
 from csv import DictWriter
+import fnmatch
 import io
 import itertools
 import json
 import mimetypes
 import os
+import pathlib
 import re
 import sys
 import textwrap
@ -1029,6 +1031,84 @@ def get_object(bucket, key, output, **boto_options):
    s3.download_fileobj(bucket, key, fp)


+@cli.command()
+@click.argument("bucket")
+@click.argument(
+    "keys",
+    nargs=-1,
+    required=False,
+)
+@click.option(
+    "output",
+    "-o",
+    "--output",
+    type=click.Path(file_okay=False, dir_okay=True, writable=True, allow_dash=False),
+    help="Write to this directory instead of one matching the bucket name",
+)
+@click.option(
+    "patterns",
+    "-p",
+    "--pattern",
+    multiple=True,
+    help="Glob patterns for files to download, e.g. '*/*.js'",
+)
+@common_boto3_options
+def get_objects(bucket, keys, output, patterns, **boto_options):
+    """
+    Download multiple objects from an S3 bucket
+
+    To download everything, run:
+
+        s3-credentials get-objects my-bucket
+
+    Files will be saved to a directory called my-bucket. Use -o dirname to save to a
+    different directory.
+
+    To download specific keys, list them:
+
+        s3-credentials get-objects my-bucket one.txt path/two.txt
+
+    To download files matching a glob-style pattern, use:
+
+        s3-credentials get-objects my-bucket --pattern '*/*.js'
+    """
+    s3 = make_client("s3", **boto_options)
+
+    # If user specified keys and no patterns, use the keys they specified
+    keys_to_download = list(keys)
+
+    if (not keys) or patterns:
+        # Fetch all keys, then filter them if --pattern
+        all_keys = [
+            obj["Key"]
+            for obj in paginate(s3, "list_objects_v2", "Contents", Bucket=bucket)
+        ]
+        if patterns:
+            filtered = []
+            for pattern in patterns:
+                filtered.extend(fnmatch.filter(all_keys, pattern))
+            keys_to_download.extend(filtered)
+        else:
+            keys_to_download.extend(all_keys)
+
+    output_dir = pathlib.Path(output or ".")
+    if not output_dir.exists():
+        output_dir.mkdir(parents=True)
+
+    errors = []
+    for key in keys_to_download:
+        # Ensure directory for key exists
+        key_dir = (output_dir / key).parent
+        if not key_dir.exists():
+            key_dir.mkdir(parents=True)
+        try:
+            s3.download_file(bucket, key, str(output_dir / key))
+        except botocore.exceptions.ClientError as e:
+            errors.append("Not found: {}".format(key))
+    if errors:
+        raise click.ClickException("\n".join(errors))
+
+
@cli.command()
@click.argument("bucket")
@click.option(
--- a/setup.py
+++ b/setup.py
@ -32,6 +32,6 @@ setup(
        s3-credentials=s3_credentials.cli:cli
    """,
    install_requires=["click", "boto3"],
-    extras_require={"test": ["pytest", "pytest-mock", "cogapp"]},
+    extras_require={"test": ["pytest", "pytest-mock", "cogapp", "moto[s3]"]},
    python_requires=">=3.6",
 )
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,6 +1,8 @@
 import boto3
 import logging
+import os
 import pytest
+from moto import mock_s3


 def pytest_addoption(parser):
@ -35,3 +37,23 @@ def pytest_collection_modifyitems(config, items):
    for item in items:
        if "integration" in item.keywords:
            item.add_marker(skip_slow)
+
+
+@pytest.fixture(scope="function")
+def aws_credentials():
+    """Mocked AWS Credentials for moto."""
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SECURITY_TOKEN"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
+
+
+@pytest.fixture(scope="function")
+def moto_s3(aws_credentials):
+    with mock_s3():
+        client = boto3.client("s3", region_name="us-east-1")
+        client.create_bucket(Bucket="my-bucket")
+        for key in ("one.txt", "directory/two.txt", "directory/three.json"):
+            client.put_object(Bucket="my-bucket", Key=key, Body=key.encode("utf-8"))
+        yield client
--- a/tests/test_s3_credentials.py
+++ b/tests/test_s3_credentials.py
@ -2,6 +2,8 @@ import botocore
 from click.testing import CliRunner
 from s3_credentials.cli import cli
 import json
+import os
+import pathlib
 import pytest
 from unittest.mock import call, Mock
 from botocore.stub import Stubber
@ -1102,3 +1104,60 @@ def test_list_roles_csv(stub_iam_for_list_roles):
        "  }\n"
        ']"\n'
    )
+
+
+@pytest.mark.parametrize(
+    "files,patterns,expected,error",
+    (
+        # Without arguments return everything
+        (None, None, {"one.txt", "directory/two.txt", "directory/three.json"}, None),
+        # Positional arguments returns files
+        (["one.txt"], None, {"one.txt"}, None),
+        (["directory/two.txt"], None, {"directory/two.txt"}, None),
+        (["one.txt"], None, {"one.txt"}, None),
+        (
+            ["directory/two.txt", "directory/three.json"],
+            None,
+            {"directory/two.txt", "directory/three.json"},
+            None,
+        ),
+        # Invalid positional argument downloads file and shows error
+        (
+            ["directory/two.txt", "directory/bad.json"],
+            None,
+            {"directory/two.txt"},
+            "Not found: directory/bad.json",
+        ),
+        # --pattern returns files matching pattern
+        (None, ["*e.txt"], {"one.txt"}, None),
+        (None, ["*e.txt", "invalid-pattern"], {"one.txt"}, None),
+        (None, ["directory/*"], {"directory/two.txt", "directory/three.json"}, None),
+        # positional and patterns can be combined
+        (["one.txt"], ["directory/*.json"], {"one.txt", "directory/three.json"}, None),
+    ),
+)
+@pytest.mark.parametrize("output", (None, "out"))
+def test_get_objects(moto_s3, output, files, patterns, expected, error):
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        args = ["get-objects", "my-bucket"] + (files or [])
+        if patterns:
+            for pattern in patterns:
+                args.extend(["--pattern", pattern])
+        if output:
+            args.extend(["--output", output])
+        result = runner.invoke(cli, args, catch_exceptions=False)
+        if error:
+            assert result.exit_code != 0
+        else:
+            assert result.exit_code == 0
+        # Build list of all files in output directory using glob
+        output_dir = pathlib.Path(output or ".")
+        all_files = {
+            str(p.relative_to(output_dir))
+            for p in output_dir.glob("**/*")
+            if p.is_file()
+        }
+        assert all_files == expected
+        if error:
+            assert error in result.output