simplify python entrypoint

- remove redundant monitor sibling process
- use python3-login executable instead of login shell subprocess
  (same effect, but in more natural order)
- use non-blocking binary IO in tee instead of readline
  (switch to binary mode, as text wrappers don't support non-blocking mode
   see https://bugs.python.org/issue13322)
pull/1014/head
Min RK 2021-02-18 15:52:52 +01:00
rodzic 0f848f7855
commit b36a6a75f5
3 zmienionych plików z 52 dodań i 107 usunięć

Wyświetl plik

@ -9,9 +9,6 @@ import string
import sys
import hashlib
import escapism
import xml.etree.ElementTree as ET
from traitlets import Dict
# Only use syntax features supported by Docker 17.09
TEMPLATE = r"""
@ -182,6 +179,7 @@ ENV R2D_ENTRYPOINT "{{ start_script }}"
# Add entrypoint
ENV PYTHONUNBUFFERED=1
COPY /python3-login /usr/local/bin/python3-login
COPY /repo2docker-entrypoint /usr/local/bin/repo2docker-entrypoint
ENTRYPOINT ["/usr/local/bin/repo2docker-entrypoint"]
@ -194,9 +192,7 @@ CMD ["jupyter", "notebook", "--ip", "0.0.0.0"]
{% endif %}
"""
ENTRYPOINT_FILE = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "repo2docker-entrypoint"
)
HERE = os.path.dirname(os.path.abspath(__file__))
# Also used for the group
DEFAULT_NB_UID = 1000
@ -583,7 +579,8 @@ class BuildPack:
dest_path, src_path = self.generate_build_context_filename(src)
tar.add(src_path, dest_path, filter=_filter_tar)
tar.add(ENTRYPOINT_FILE, "repo2docker-entrypoint", filter=_filter_tar)
for fname in ("repo2docker-entrypoint", "python3-login"):
tar.add(os.path.join(HERE, fname), fname, filter=_filter_tar)
tar.add(".", "src/", filter=_filter_tar)

Wyświetl plik

@ -0,0 +1,11 @@
#!/bin/bash -l
# This is an executable that launches Python in a login shell
# to ensure that full profile setup occurs.
# shebang on linux only allows 1 argument,
# so we couldn't pick a login shell in one shebang line
# for a Python script
# -u means unbuffered, which one ~always wants in a container
# otherwise output can be mysteriously missing
exec python3 -u "$@"

Wyświetl plik

@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/local/bin/python3-login
# note: must run on Python >= 3.5, which mainly means no f-strings
# goals:
@ -6,153 +6,90 @@
# - preserve signal handling of subprocess (kill -TERM and friends)
# - tee output to a log file
import json
import fcntl
import os
import select
import signal
import subprocess
import sys
import time
def get_login_env():
"""Instantiate a login shell to retrieve environment variables
Serialize with Python to ensure proper escapes
"""
p = subprocess.run(
[
"bash",
"-l",
"-c",
"python3 -c 'import os, json; print(json.dumps(dict(os.environ)))'",
],
stdout=subprocess.PIPE,
)
if p.returncode:
print("Error getting login env")
return {}
last_line = p.stdout.splitlines()[-1]
try:
return json.loads(last_line)
except Exception as e:
print("Error getting login env: {e}".format(e=e), file=sys.stderr)
return {}
def monitor_parent(parent_pid, child_pgid):
"""Monitor parent_pid and shutdown child_pgid if parent goes away first"""
while True:
try:
os.kill(parent_pid, 0)
except ProcessLookupError:
# parent is gone, likely by SIGKILL
# send SIGKILL to child process group
try:
os.killpg(child_pgid, signal.SIGKILL)
except (ProcessLookupError, PermissionError):
# ignore if the child is already gone
pass
return
else:
time.sleep(1)
# output chunk size to read
CHUNK_SIZE = 1024
# signals to be forwarded to the child
SIGNALS = [
signal.SIGHUP,
signal.SIGINT,
# signal.SIGKILL,
signal.SIGQUIT,
signal.SIGTERM,
signal.SIGUSR1,
signal.SIGUSR2,
signal.SIGWINCH,
]
# everything catchable, excluding SIGCHLD
SIGNALS = set(signal.Signals) - {signal.SIGKILL, signal.SIGSTOP, signal.SIGCHLD}
def main():
# load login shell environment
login_env = get_login_env()
env = os.environ.copy()
env.update(login_env)
# open log file to send output
log_file = open(
os.path.join(os.environ.get("REPO_DIR", "."), ".jupyter-server-log.txt"),
"a",
"ab",
)
# build the command
# like `exec "$@"`
command = sys.argv[1:]
# load entrypoint override from env
r2d_entrypoint = os.environ.get("R2D_ENTRYPOINT")
if r2d_entrypoint:
command.insert(0, r2d_entrypoint)
# launch the subprocess
child = subprocess.Popen(
command,
bufsize=1,
env=env,
start_new_session=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
)
child_pgid = os.getpgid(child.pid)
# if parent is forcefully shutdown,
# make sure child shuts down immediately as well
parent_pid = os.getpid()
monitor_pid = os.fork()
if monitor_pid == 0:
# child process, sibling of 'real' command
# avoid receiving signals sent to parent
os.setpgrp()
# terminate child if parent goes away,
# e.g. in ungraceful KILL not relayed to children
monitor_parent(parent_pid, child_pgid)
return
# hook up ~all signals so that every signal the parent gets,
# the children also get
def relay_signal(sig, frame):
"""Relay a signal to children"""
print(
"Forwarding signal {sig} to {child_pgid}".format(
sig=sig, child_pgid=child_pgid
)
)
os.killpg(child_pgid, sig)
# DEBUG: show signal
child.send_signal(sig)
# question: maybe use all valid_signals() except a few, e.g. SIGCHLD?
# rather than opt-in list
for signum in SIGNALS:
signal.signal(signum, relay_signal)
# tee output from child to both our stdout and the log file
def tee(chunk):
for f in [sys.stdout, log_file]:
"""Tee output from child to both our stdout and the log file"""
for f in [sys.stdout.buffer, log_file]:
f.write(chunk)
f.flush()
while child.poll() is None:
tee(child.stdout.readline())
# make stdout pipe non-blocking
# this means child.stdout.read(nbytes)
# will always return immediately, even if there's nothing to read
flags = fcntl.fcntl(child.stdout, fcntl.F_GETFL)
fcntl.fcntl(child.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK)
poller = select.poll()
poller.register(child.stdout)
# flush the rest
# while child is running, constantly relay output
while child.poll() is None:
chunk = child.stdout.read(CHUNK_SIZE)
if chunk:
tee(chunk)
else:
# empty chunk means nothing to read
# wait for output on the pipe
# timeout is in milliseconds
poller.poll(1000)
# child has exited, continue relaying any remaining output
# At this point, read() will return an empty string when it's done
chunk = child.stdout.read()
while chunk:
tee(chunk)
chunk = child.stdout.read()
# child exited, cleanup monitor
try:
os.kill(monitor_pid, signal.SIGKILL)
except ProcessLookupError:
pass
# preserve returncode
# make our returncode match the child's returncode
sys.exit(child.returncode)