Initial get-objects command, refs #78

pull/84/head
Simon Willison 2022-09-15 10:06:53 -07:00
rodzic ee5523461d
commit 1a2325e33b
5 zmienionych plików z 185 dodań i 1 usunięć

Wyświetl plik

@ -369,6 +369,29 @@ This defaults to outputting the downloaded file to the terminal. You can instead
s3-credentials get-object my-bucket hello.txt -o /path/to/hello.txt s3-credentials get-object my-bucket hello.txt -o /path/to/hello.txt
## get-objects
`s3-credentials get-objects` can be used to download multiple files from a bucket at once.
Without extra arguments, this downloads everything:
s3-credentials get-objects my-bucket
Files will be written to the current directory by default, preserving their directory structure from the bucket.
To write to a different directory use `--output` or `-o`:
s3-credentials get-objects my-bucket -o /path/to/output
To download multiple specific files, add them as arguments to the command:
s3-credentials get-objects my-bucket one.txt two.txt path/to/three.txt
You can pass one or more `--pattern` or `-p` options to download files matching a specific pattern:
s3-credentials get-objects my-bucket -p "*.txt" -p "static/*.css"
Here the `*` wildcard will match any sequence of characters, including `/`. `?` will match a single character.
## set-cors-policy and get-cors-policy ## set-cors-policy and get-cors-policy

Wyświetl plik

@ -4,11 +4,13 @@ import botocore
import click import click
import configparser import configparser
from csv import DictWriter from csv import DictWriter
import fnmatch
import io import io
import itertools import itertools
import json import json
import mimetypes import mimetypes
import os import os
import pathlib
import re import re
import sys import sys
import textwrap import textwrap
@ -1029,6 +1031,84 @@ def get_object(bucket, key, output, **boto_options):
s3.download_fileobj(bucket, key, fp) s3.download_fileobj(bucket, key, fp)
@cli.command()
@click.argument("bucket")
@click.argument(
"keys",
nargs=-1,
required=False,
)
@click.option(
"output",
"-o",
"--output",
type=click.Path(file_okay=False, dir_okay=True, writable=True, allow_dash=False),
help="Write to this directory instead of one matching the bucket name",
)
@click.option(
"patterns",
"-p",
"--pattern",
multiple=True,
help="Glob patterns for files to download, e.g. '*/*.js'",
)
@common_boto3_options
def get_objects(bucket, keys, output, patterns, **boto_options):
"""
Download multiple objects from an S3 bucket
To download everything, run:
s3-credentials get-objects my-bucket
Files will be saved to a directory called my-bucket. Use -o dirname to save to a
different directory.
To download specific keys, list them:
s3-credentials get-objects my-bucket one.txt path/two.txt
To download files matching a glob-style pattern, use:
s3-credentials get-objects my-bucket --pattern '*/*.js'
"""
s3 = make_client("s3", **boto_options)
# If user specified keys and no patterns, use the keys they specified
keys_to_download = list(keys)
if (not keys) or patterns:
# Fetch all keys, then filter them if --pattern
all_keys = [
obj["Key"]
for obj in paginate(s3, "list_objects_v2", "Contents", Bucket=bucket)
]
if patterns:
filtered = []
for pattern in patterns:
filtered.extend(fnmatch.filter(all_keys, pattern))
keys_to_download.extend(filtered)
else:
keys_to_download.extend(all_keys)
output_dir = pathlib.Path(output or ".")
if not output_dir.exists():
output_dir.mkdir(parents=True)
errors = []
for key in keys_to_download:
# Ensure directory for key exists
key_dir = (output_dir / key).parent
if not key_dir.exists():
key_dir.mkdir(parents=True)
try:
s3.download_file(bucket, key, str(output_dir / key))
except botocore.exceptions.ClientError as e:
errors.append("Not found: {}".format(key))
if errors:
raise click.ClickException("\n".join(errors))
@cli.command() @cli.command()
@click.argument("bucket") @click.argument("bucket")
@click.option( @click.option(

Wyświetl plik

@ -32,6 +32,6 @@ setup(
s3-credentials=s3_credentials.cli:cli s3-credentials=s3_credentials.cli:cli
""", """,
install_requires=["click", "boto3"], install_requires=["click", "boto3"],
extras_require={"test": ["pytest", "pytest-mock", "cogapp"]}, extras_require={"test": ["pytest", "pytest-mock", "cogapp", "moto[s3]"]},
python_requires=">=3.6", python_requires=">=3.6",
) )

Wyświetl plik

@ -1,6 +1,8 @@
import boto3 import boto3
import logging import logging
import os
import pytest import pytest
from moto import mock_s3
def pytest_addoption(parser): def pytest_addoption(parser):
@ -35,3 +37,23 @@ def pytest_collection_modifyitems(config, items):
for item in items: for item in items:
if "integration" in item.keywords: if "integration" in item.keywords:
item.add_marker(skip_slow) item.add_marker(skip_slow)
@pytest.fixture(scope="function")
def aws_credentials():
"""Mocked AWS Credentials for moto."""
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURITY_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
@pytest.fixture(scope="function")
def moto_s3(aws_credentials):
with mock_s3():
client = boto3.client("s3", region_name="us-east-1")
client.create_bucket(Bucket="my-bucket")
for key in ("one.txt", "directory/two.txt", "directory/three.json"):
client.put_object(Bucket="my-bucket", Key=key, Body=key.encode("utf-8"))
yield client

Wyświetl plik

@ -2,6 +2,8 @@ import botocore
from click.testing import CliRunner from click.testing import CliRunner
from s3_credentials.cli import cli from s3_credentials.cli import cli
import json import json
import os
import pathlib
import pytest import pytest
from unittest.mock import call, Mock from unittest.mock import call, Mock
from botocore.stub import Stubber from botocore.stub import Stubber
@ -1102,3 +1104,60 @@ def test_list_roles_csv(stub_iam_for_list_roles):
" }\n" " }\n"
']"\n' ']"\n'
) )
@pytest.mark.parametrize(
"files,patterns,expected,error",
(
# Without arguments return everything
(None, None, {"one.txt", "directory/two.txt", "directory/three.json"}, None),
# Positional arguments returns files
(["one.txt"], None, {"one.txt"}, None),
(["directory/two.txt"], None, {"directory/two.txt"}, None),
(["one.txt"], None, {"one.txt"}, None),
(
["directory/two.txt", "directory/three.json"],
None,
{"directory/two.txt", "directory/three.json"},
None,
),
# Invalid positional argument downloads file and shows error
(
["directory/two.txt", "directory/bad.json"],
None,
{"directory/two.txt"},
"Not found: directory/bad.json",
),
# --pattern returns files matching pattern
(None, ["*e.txt"], {"one.txt"}, None),
(None, ["*e.txt", "invalid-pattern"], {"one.txt"}, None),
(None, ["directory/*"], {"directory/two.txt", "directory/three.json"}, None),
# positional and patterns can be combined
(["one.txt"], ["directory/*.json"], {"one.txt", "directory/three.json"}, None),
),
)
@pytest.mark.parametrize("output", (None, "out"))
def test_get_objects(moto_s3, output, files, patterns, expected, error):
runner = CliRunner()
with runner.isolated_filesystem():
args = ["get-objects", "my-bucket"] + (files or [])
if patterns:
for pattern in patterns:
args.extend(["--pattern", pattern])
if output:
args.extend(["--output", output])
result = runner.invoke(cli, args, catch_exceptions=False)
if error:
assert result.exit_code != 0
else:
assert result.exit_code == 0
# Build list of all files in output directory using glob
output_dir = pathlib.Path(output or ".")
all_files = {
str(p.relative_to(output_dir))
for p in output_dir.glob("**/*")
if p.is_file()
}
assert all_files == expected
if error:
assert error in result.output