diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index bdb4290c..145d2964 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -9,9 +9,6 @@ import string import sys import hashlib import escapism -import xml.etree.ElementTree as ET - -from traitlets import Dict # Only use syntax features supported by Docker 17.09 TEMPLATE = r""" @@ -181,6 +178,8 @@ ENV R2D_ENTRYPOINT "{{ start_script }}" {% endif -%} # Add entrypoint +ENV PYTHONUNBUFFERED=1 +COPY /python3-login /usr/local/bin/python3-login COPY /repo2docker-entrypoint /usr/local/bin/repo2docker-entrypoint ENTRYPOINT ["/usr/local/bin/repo2docker-entrypoint"] @@ -193,9 +192,7 @@ CMD ["jupyter", "notebook", "--ip", "0.0.0.0"] {% endif %} """ -ENTRYPOINT_FILE = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "repo2docker-entrypoint" -) +HERE = os.path.dirname(os.path.abspath(__file__)) # Also used for the group DEFAULT_NB_UID = 1000 @@ -582,7 +579,8 @@ class BuildPack: dest_path, src_path = self.generate_build_context_filename(src) tar.add(src_path, dest_path, filter=_filter_tar) - tar.add(ENTRYPOINT_FILE, "repo2docker-entrypoint", filter=_filter_tar) + for fname in ("repo2docker-entrypoint", "python3-login"): + tar.add(os.path.join(HERE, fname), fname, filter=_filter_tar) tar.add(".", "src/", filter=_filter_tar) diff --git a/repo2docker/buildpacks/python3-login b/repo2docker/buildpacks/python3-login new file mode 100755 index 00000000..a7b9dd07 --- /dev/null +++ b/repo2docker/buildpacks/python3-login @@ -0,0 +1,11 @@ +#!/bin/bash -l +# This is an executable that launches Python in a login shell +# to ensure that full profile setup occurs. +# shebang on linux only allows 1 argument, +# so we couldn't pick a login shell in one shebang line +# for a Python script + +# -u means unbuffered, which one ~always wants in a container +# otherwise output can be mysteriously missing + +exec python3 -u "$@" diff --git a/repo2docker/buildpacks/repo2docker-entrypoint b/repo2docker/buildpacks/repo2docker-entrypoint index cc1404d5..9e8fba52 100755 --- a/repo2docker/buildpacks/repo2docker-entrypoint +++ b/repo2docker/buildpacks/repo2docker-entrypoint @@ -1,24 +1,97 @@ -#!/bin/bash -l -# lightest possible entrypoint that ensures that -# we use a login shell to get a fully configured shell environment -# (e.g. sourcing /etc/profile.d, ~/.bashrc, and friends) +#!/usr/local/bin/python3-login +# note: must run on Python >= 3.5, which mainly means no f-strings -# Setup a file descriptor (FD) that is connected to a tee process which -# writes its input to $REPO_DIR/.jupyter-server-log.txt -# We later use this FD as a place to redirect the output of the actual -# command to. We can't add `tee` to the command directly as that will prevent -# the container from exiting when `docker stop` is run. -# See https://stackoverflow.com/a/55678435 -exec {log_fd}> >(exec tee $REPO_DIR/.jupyter-server-log.txt) +# goals: +# - load environment variables from a login shell (bash -l) +# - preserve signal handling of subprocess (kill -TERM and friends) +# - tee output to a log file -if [[ ! -z "${R2D_ENTRYPOINT:-}" ]]; then - if [[ ! -x "$R2D_ENTRYPOINT" ]]; then - chmod u+x "$R2D_ENTRYPOINT" - fi - exec "$R2D_ENTRYPOINT" "$@" 2>&1 >&"$log_fd" -else - exec "$@" 2>&1 >&"$log_fd" -fi +import fcntl +import os +import select +import signal +import subprocess +import sys -# Close the logging output again -exec {log_fd}>&- +# output chunk size to read +CHUNK_SIZE = 1024 + +# signals to be forwarded to the child +# everything catchable, excluding SIGCHLD +SIGNALS = set(signal.Signals) - {signal.SIGKILL, signal.SIGSTOP, signal.SIGCHLD} + + +def main(): + + # open log file to send output + log_file = open( + os.path.join(os.environ.get("REPO_DIR", "."), ".jupyter-server-log.txt"), + "ab", + ) + + # build the command + # like `exec "$@"` + command = sys.argv[1:] + # load entrypoint override from env + r2d_entrypoint = os.environ.get("R2D_ENTRYPOINT") + if r2d_entrypoint: + command.insert(0, r2d_entrypoint) + + # launch the subprocess + child = subprocess.Popen( + command, + bufsize=1, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + + # hook up ~all signals so that every signal the parent gets, + # the children also get + + def relay_signal(sig, frame): + """Relay a signal to children""" + # DEBUG: show signal + child.send_signal(sig) + + for signum in SIGNALS: + signal.signal(signum, relay_signal) + + # tee output from child to both our stdout and the log file + def tee(chunk): + """Tee output from child to both our stdout and the log file""" + for f in [sys.stdout.buffer, log_file]: + f.write(chunk) + f.flush() + + # make stdout pipe non-blocking + # this means child.stdout.read(nbytes) + # will always return immediately, even if there's nothing to read + flags = fcntl.fcntl(child.stdout, fcntl.F_GETFL) + fcntl.fcntl(child.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) + poller = select.poll() + poller.register(child.stdout) + + # while child is running, constantly relay output + while child.poll() is None: + chunk = child.stdout.read(CHUNK_SIZE) + if chunk: + tee(chunk) + else: + # empty chunk means nothing to read + # wait for output on the pipe + # timeout is in milliseconds + poller.poll(1000) + + # child has exited, continue relaying any remaining output + # At this point, read() will return an empty string when it's done + chunk = child.stdout.read() + while chunk: + tee(chunk) + chunk = child.stdout.read() + + # make our returncode match the child's returncode + sys.exit(child.returncode) + + +if __name__ == "__main__": + main() diff --git a/tests/unit/test_env.py b/tests/unit/test_env.py index 4602210f..ee01b361 100644 --- a/tests/unit/test_env.py +++ b/tests/unit/test_env.py @@ -3,12 +3,13 @@ Test that environment variables may be defined """ import os import subprocess +import sys import tempfile import time from getpass import getuser -def test_env(): +def test_env(capfd): """ Validate that you can define environment variables @@ -42,20 +43,19 @@ def test_env(): # value "--env", "SPAM_2=", - # "--", tmpdir, + "--", "/bin/bash", "-c", # Docker exports all passed env variables, so we can # just look at exported variables. - "export; sleep 1", - # "export; echo TIMDONE", - # "export", + "export", ], - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, ) + captured = capfd.readouterr() + print(captured.out, end="") + print(captured.err, file=sys.stderr, end="") + assert result.returncode == 0 # all docker output is returned by repo2docker on stderr @@ -63,11 +63,8 @@ def test_env(): # stdout should be empty assert not result.stdout - print(result.stderr.split("\n")) - # assert False - # stderr should contain lines of output - declares = [x for x in result.stderr.split("\n") if x.startswith("declare")] + declares = [x for x in captured.err.splitlines() if x.startswith("declare")] assert 'declare -x FOO="{}"'.format(ts) in declares assert 'declare -x BAR="baz"' in declares assert 'declare -x SPAM="eggs"' in declares