2017-05-23 19:29:27 +00:00
|
|
|
"""repo2docker: convert git repositories into jupyter-suitable docker images
|
|
|
|
|
2017-10-23 22:39:01 +00:00
|
|
|
Images produced by repo2docker can be used with Jupyter notebooks standalone
|
|
|
|
or with BinderHub.
|
2017-05-23 19:29:27 +00:00
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
|
|
|
python -m repo2docker https://github.com/you/your-repo
|
|
|
|
"""
|
2017-05-19 07:07:39 +00:00
|
|
|
import sys
|
2017-05-09 08:37:19 +00:00
|
|
|
import json
|
|
|
|
import os
|
2017-05-16 01:54:51 +00:00
|
|
|
import time
|
|
|
|
import logging
|
2017-05-22 21:41:52 +00:00
|
|
|
import uuid
|
|
|
|
import shutil
|
2017-07-29 06:46:04 +00:00
|
|
|
import argparse
|
2017-05-16 01:54:51 +00:00
|
|
|
from pythonjsonlogger import jsonlogger
|
2017-05-23 05:16:30 +00:00
|
|
|
import escapism
|
2017-05-09 08:37:19 +00:00
|
|
|
|
|
|
|
|
2017-05-22 21:41:52 +00:00
|
|
|
from traitlets.config import Application, LoggingConfigurable
|
2017-07-30 00:14:49 +00:00
|
|
|
from traitlets import Type, Bool, Unicode, Dict, List, default, Tuple
|
2017-05-09 08:37:19 +00:00
|
|
|
import docker
|
2017-05-22 17:29:48 +00:00
|
|
|
from docker.utils import kwargs_from_env
|
2017-05-09 08:37:19 +00:00
|
|
|
|
|
|
|
import subprocess
|
|
|
|
|
2017-05-25 22:15:00 +00:00
|
|
|
from .detectors import (
|
|
|
|
BuildPack, PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack,
|
2017-07-04 17:28:23 +00:00
|
|
|
CondaBuildPack, JuliaBuildPack, Python2BuildPack, BaseImage
|
2017-05-25 22:15:00 +00:00
|
|
|
)
|
2017-05-16 01:54:51 +00:00
|
|
|
from .utils import execute_cmd
|
2017-05-23 19:29:27 +00:00
|
|
|
from . import __version__
|
2017-05-09 08:37:19 +00:00
|
|
|
|
2017-07-04 17:28:23 +00:00
|
|
|
|
2017-09-07 12:30:14 +00:00
|
|
|
def compose(buildpacks, parent=None):
|
2017-07-04 17:28:23 +00:00
|
|
|
"""
|
|
|
|
Shortcut to compose many buildpacks together
|
|
|
|
"""
|
2017-09-07 12:30:14 +00:00
|
|
|
image = buildpacks[0](parent=parent)
|
|
|
|
for buildpack in buildpacks[1:]:
|
|
|
|
image = image.compose_with(buildpack(parent=parent))
|
2017-07-04 17:28:23 +00:00
|
|
|
return image
|
|
|
|
|
|
|
|
|
2017-05-22 23:22:36 +00:00
|
|
|
class Repo2Docker(Application):
|
2017-05-23 19:29:27 +00:00
|
|
|
name = 'jupyter-repo2docker'
|
|
|
|
version = __version__
|
|
|
|
description = __doc__
|
2017-05-24 00:56:03 +00:00
|
|
|
|
2017-05-24 21:11:37 +00:00
|
|
|
@default('log_level')
|
|
|
|
def _default_log_level(self):
|
|
|
|
return logging.INFO
|
2017-05-09 08:37:19 +00:00
|
|
|
|
|
|
|
git_workdir = Unicode(
|
2017-05-23 03:10:59 +00:00
|
|
|
"/tmp",
|
|
|
|
config=True,
|
|
|
|
help="""
|
|
|
|
The directory to use to check out git repositories into.
|
|
|
|
|
|
|
|
Should be somewhere ephemeral, such as /tmp
|
|
|
|
"""
|
2017-05-09 08:37:19 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
buildpacks = List(
|
2017-07-04 17:28:23 +00:00
|
|
|
[
|
|
|
|
(LegacyBinderDockerBuildPack, ),
|
|
|
|
(DockerBuildPack, ),
|
|
|
|
|
|
|
|
(BaseImage, CondaBuildPack, JuliaBuildPack),
|
|
|
|
(BaseImage, CondaBuildPack),
|
|
|
|
|
|
|
|
(BaseImage, PythonBuildPack, Python2BuildPack, JuliaBuildPack),
|
|
|
|
(BaseImage, PythonBuildPack, JuliaBuildPack),
|
|
|
|
(BaseImage, PythonBuildPack, Python2BuildPack),
|
|
|
|
(BaseImage, PythonBuildPack),
|
|
|
|
],
|
2017-05-23 03:10:59 +00:00
|
|
|
config=True,
|
|
|
|
help="""
|
|
|
|
Ordered list of BuildPacks to try to use to build a git repository.
|
|
|
|
"""
|
2017-05-09 08:37:19 +00:00
|
|
|
)
|
|
|
|
|
2017-07-30 00:14:49 +00:00
|
|
|
default_buildpack = Tuple(
|
|
|
|
(BaseImage, PythonBuildPack),
|
|
|
|
config=True,
|
|
|
|
help="""
|
|
|
|
The build pack to use when no buildpacks are found
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
2017-05-23 03:26:27 +00:00
|
|
|
def fetch(self, url, ref, checkout_path):
|
2017-05-19 07:07:39 +00:00
|
|
|
try:
|
2017-10-24 21:28:36 +00:00
|
|
|
for line in execute_cmd(['git', 'clone', '--depth', '50',
|
|
|
|
url, checkout_path],
|
2017-05-24 21:11:37 +00:00
|
|
|
capture=self.json_logs):
|
2017-05-19 07:07:39 +00:00
|
|
|
self.log.info(line, extra=dict(phase='fetching'))
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
self.log.error('Failed to clone repository!', extra=dict(phase='failed'))
|
|
|
|
sys.exit(1)
|
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
if ref:
|
|
|
|
try:
|
|
|
|
for line in execute_cmd(['git', 'reset', '--hard', ref], cwd=checkout_path,
|
|
|
|
capture=self.json_logs):
|
|
|
|
self.log.info(line, extra=dict(phase='fetching'))
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
self.log.error('Failed to check out ref %s', ref, extra=dict(phase='failed'))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
def get_argparser(self):
|
|
|
|
argparser = argparse.ArgumentParser()
|
|
|
|
argparser.add_argument(
|
|
|
|
'--config',
|
|
|
|
default='repo2docker_config.py',
|
|
|
|
help="Path to config file for repo2docker"
|
|
|
|
)
|
2017-05-09 08:37:19 +00:00
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
argparser.add_argument(
|
|
|
|
'--json-logs',
|
|
|
|
default=False,
|
|
|
|
action='store_true',
|
|
|
|
help='Emit JSON logs instead of human readable logs'
|
|
|
|
)
|
|
|
|
|
|
|
|
argparser.add_argument(
|
|
|
|
'repo',
|
2017-07-29 21:17:32 +00:00
|
|
|
help='Path to repository that should be built. Could be local path or a git URL.'
|
2017-07-29 06:46:04 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
argparser.add_argument(
|
|
|
|
'--image-name',
|
|
|
|
help='Name of image to be built. If unspecified will be autogenerated'
|
|
|
|
)
|
|
|
|
|
2017-07-29 21:17:32 +00:00
|
|
|
argparser.add_argument(
|
|
|
|
'--ref',
|
|
|
|
help='If building a git url, which ref to check out'
|
|
|
|
)
|
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
argparser.add_argument(
|
2017-10-23 15:22:12 +00:00
|
|
|
'--debug',
|
|
|
|
help="Turn on debug logging",
|
2017-07-29 06:46:04 +00:00
|
|
|
action='store_true',
|
|
|
|
)
|
2017-05-09 08:37:19 +00:00
|
|
|
|
2017-08-27 14:51:01 +00:00
|
|
|
argparser.add_argument(
|
2017-10-23 15:22:12 +00:00
|
|
|
'--no-build',
|
|
|
|
dest='build',
|
|
|
|
action='store_false',
|
|
|
|
help="Do not actually build the image. Useful in conjunction with --debug."
|
|
|
|
)
|
|
|
|
|
|
|
|
argparser.add_argument(
|
|
|
|
'cmd',
|
2017-10-23 15:53:33 +00:00
|
|
|
nargs=argparse.REMAINDER,
|
2017-10-23 15:22:12 +00:00
|
|
|
help='Custom command to run after building container'
|
2017-08-27 14:51:01 +00:00
|
|
|
)
|
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
argparser.add_argument(
|
|
|
|
'--no-run',
|
|
|
|
dest='run',
|
|
|
|
action='store_false',
|
|
|
|
help='Do not run container after it has been built'
|
|
|
|
)
|
|
|
|
|
2017-07-30 10:44:21 +00:00
|
|
|
argparser.add_argument(
|
|
|
|
'--no-clean',
|
|
|
|
dest='clean',
|
|
|
|
action='store_false',
|
|
|
|
help="Don't clean up remote checkouts after we are done"
|
|
|
|
)
|
|
|
|
|
2017-08-27 14:51:01 +00:00
|
|
|
argparser.add_argument(
|
2017-10-23 15:22:12 +00:00
|
|
|
'--push',
|
|
|
|
dest='push',
|
|
|
|
action='store_true',
|
|
|
|
help='Push docker image to repository'
|
2017-07-29 06:46:04 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
return argparser
|
|
|
|
|
2017-10-17 12:04:16 +00:00
|
|
|
def json_excepthook(self, etype, evalue, traceback):
|
|
|
|
"""Called on an uncaught exception when using json logging
|
|
|
|
|
|
|
|
Avoids non-JSON output on errors when using --json-logs
|
|
|
|
"""
|
|
|
|
self.log.error("Error during build: %s", evalue,
|
|
|
|
exc_info=(etype, evalue, traceback),
|
|
|
|
extra=dict(phase='failed'),
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
def initialize(self):
|
|
|
|
args = self.get_argparser().parse_args()
|
|
|
|
|
2017-08-27 15:04:01 +00:00
|
|
|
if args.debug:
|
|
|
|
self.log_level = logging.DEBUG
|
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
self.load_config_file(args.config)
|
|
|
|
|
2017-07-29 21:17:32 +00:00
|
|
|
if os.path.exists(args.repo):
|
|
|
|
# Let's treat this as a local directory we are building
|
|
|
|
self.repo_type = 'local'
|
2017-07-29 06:46:04 +00:00
|
|
|
self.repo = args.repo
|
|
|
|
self.ref = None
|
2017-07-30 10:44:21 +00:00
|
|
|
self.cleanup_checkout = False
|
2017-07-29 21:17:32 +00:00
|
|
|
else:
|
|
|
|
self.repo_type = 'remote'
|
|
|
|
self.repo = args.repo
|
|
|
|
self.ref = args.ref
|
2017-07-30 10:44:21 +00:00
|
|
|
self.cleanup_checkout = args.clean
|
2017-07-29 06:46:04 +00:00
|
|
|
|
|
|
|
if args.json_logs:
|
2017-10-17 12:04:16 +00:00
|
|
|
# register JSON excepthook to avoid non-JSON output on errors
|
|
|
|
sys.excepthook = self.json_excepthook
|
2017-05-24 21:11:37 +00:00
|
|
|
# Need to reset existing handlers, or we repeat messages
|
|
|
|
logHandler = logging.StreamHandler()
|
|
|
|
formatter = jsonlogger.JsonFormatter()
|
|
|
|
logHandler.setFormatter(formatter)
|
|
|
|
self.log.handlers = []
|
|
|
|
self.log.addHandler(logHandler)
|
|
|
|
self.log.setLevel(logging.INFO)
|
|
|
|
else:
|
|
|
|
# due to json logger stuff above,
|
|
|
|
# our log messages include carriage returns, newlines, etc.
|
|
|
|
# remove the additional newline from the stream handler
|
|
|
|
self.log.handlers[0].terminator = ''
|
2017-07-29 03:06:54 +00:00
|
|
|
# We don't want a [Repo2Docker] on all messages
|
|
|
|
self.log.handlers[0].formatter = logging.Formatter(fmt='%(message)s')
|
2017-05-24 21:11:37 +00:00
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
if args.image_name:
|
|
|
|
self.output_image_spec = args.image_name
|
|
|
|
else:
|
2017-05-23 05:16:30 +00:00
|
|
|
# Attempt to set a sane default!
|
|
|
|
# HACK: Provide something more descriptive?
|
2017-07-29 21:17:32 +00:00
|
|
|
self.output_image_spec = 'r2d' + escapism.escape(self.repo, escape_char='-').lower() + str(int(time.time()))
|
2017-07-29 06:46:04 +00:00
|
|
|
|
|
|
|
self.push = args.push
|
|
|
|
self.run = args.run
|
|
|
|
self.json_logs = args.json_logs
|
|
|
|
|
2017-08-27 14:51:01 +00:00
|
|
|
self.build = args.build
|
|
|
|
if not self.build:
|
|
|
|
# Can't push nor run if we aren't building
|
|
|
|
self.run = False
|
|
|
|
self.push = False
|
|
|
|
|
2017-07-29 06:46:04 +00:00
|
|
|
self.run_cmd = args.cmd
|
2017-05-23 05:16:30 +00:00
|
|
|
|
|
|
|
|
2017-05-23 05:55:17 +00:00
|
|
|
def push_image(self):
|
|
|
|
client = docker.APIClient(version='auto', **kwargs_from_env())
|
|
|
|
# Build a progress setup for each layer, and only emit per-layer info every 1.5s
|
|
|
|
layers = {}
|
|
|
|
last_emit_time = time.time()
|
|
|
|
for line in client.push(self.output_image_spec, stream=True):
|
|
|
|
progress = json.loads(line.decode('utf-8'))
|
|
|
|
if 'error' in progress:
|
|
|
|
self.log.error(progress['error'], extra=dict(phase='failed'))
|
|
|
|
sys.exit(1)
|
|
|
|
if 'id' not in progress:
|
|
|
|
continue
|
|
|
|
if 'progressDetail' in progress and progress['progressDetail']:
|
|
|
|
layers[progress['id']] = progress['progressDetail']
|
|
|
|
else:
|
|
|
|
layers[progress['id']] = progress['status']
|
|
|
|
if time.time() - last_emit_time > 1.5:
|
2017-05-24 21:11:37 +00:00
|
|
|
self.log.info('Pushing image\n', extra=dict(progress=layers, phase='pushing'))
|
2017-05-23 05:55:17 +00:00
|
|
|
last_emit_time = time.time()
|
|
|
|
|
|
|
|
def run_image(self):
|
|
|
|
client = docker.from_env(version='auto')
|
2017-05-24 01:08:53 +00:00
|
|
|
port = self._get_free_port()
|
2017-07-29 06:46:04 +00:00
|
|
|
if not self.run_cmd:
|
|
|
|
port = str(self._get_free_port())
|
|
|
|
run_cmd = ['jupyter', 'notebook', '--ip', '0.0.0.0', '--port', port]
|
|
|
|
ports={'%s/tcp' % port: port}
|
|
|
|
else:
|
|
|
|
run_cmd = self.run_cmd
|
|
|
|
ports = {}
|
2017-05-23 05:55:17 +00:00
|
|
|
container = client.containers.run(
|
|
|
|
self.output_image_spec,
|
2017-07-30 03:14:36 +00:00
|
|
|
ports=ports,
|
2017-05-24 01:08:53 +00:00
|
|
|
detach=True,
|
2017-07-29 06:46:04 +00:00
|
|
|
command=run_cmd
|
2017-05-23 05:55:17 +00:00
|
|
|
)
|
2017-05-23 06:57:40 +00:00
|
|
|
while container.status == 'created':
|
|
|
|
time.sleep(0.5)
|
|
|
|
container.reload()
|
|
|
|
|
2017-05-23 06:00:37 +00:00
|
|
|
try:
|
|
|
|
for line in container.logs(stream=True):
|
2017-05-24 21:11:37 +00:00
|
|
|
self.log.info(line.decode('utf-8'), extra=dict(phase='running'))
|
2017-05-23 06:00:37 +00:00
|
|
|
finally:
|
2017-07-30 00:36:07 +00:00
|
|
|
container.reload()
|
2017-07-29 06:46:04 +00:00
|
|
|
if container.status == 'running':
|
2017-07-30 00:36:07 +00:00
|
|
|
self.log.info('Stopping container...\n', extra=dict(phase='running'))
|
2017-07-29 06:46:04 +00:00
|
|
|
container.kill()
|
2017-07-29 21:31:23 +00:00
|
|
|
exit_code = container.attrs['State']['ExitCode']
|
2017-05-23 06:00:37 +00:00
|
|
|
container.remove()
|
2017-07-29 21:31:23 +00:00
|
|
|
sys.exit(exit_code)
|
2017-05-23 05:55:17 +00:00
|
|
|
|
2017-05-24 01:08:53 +00:00
|
|
|
def _get_free_port(self):
|
|
|
|
"""
|
|
|
|
Hacky method to get a free random port on local host
|
|
|
|
"""
|
|
|
|
import socket
|
|
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
|
|
s.bind(("",0))
|
|
|
|
port = s.getsockname()[1]
|
|
|
|
s.close()
|
|
|
|
return port
|
|
|
|
|
2017-05-24 00:56:03 +00:00
|
|
|
def start(self):
|
2017-07-29 21:17:32 +00:00
|
|
|
if self.repo_type == 'local':
|
|
|
|
checkout_path = self.repo
|
|
|
|
else:
|
|
|
|
checkout_path = os.path.join(self.git_workdir, str(uuid.uuid4()))
|
|
|
|
self.fetch(
|
|
|
|
self.repo,
|
|
|
|
self.ref,
|
|
|
|
checkout_path
|
|
|
|
)
|
2017-05-22 21:41:52 +00:00
|
|
|
|
2017-07-04 17:28:23 +00:00
|
|
|
os.chdir(checkout_path)
|
2017-09-07 12:30:14 +00:00
|
|
|
picked_buildpack = compose(self.default_buildpack, parent=self)
|
2017-07-30 00:14:49 +00:00
|
|
|
|
2017-07-04 17:28:23 +00:00
|
|
|
for bp_spec in self.buildpacks:
|
2017-09-07 12:30:14 +00:00
|
|
|
bp = compose(bp_spec, parent=self)
|
2017-07-04 17:28:23 +00:00
|
|
|
if bp.detect():
|
2017-07-30 00:14:49 +00:00
|
|
|
picked_buildpack = bp
|
2017-07-04 18:16:52 +00:00
|
|
|
break
|
2017-07-30 00:14:49 +00:00
|
|
|
|
2017-08-27 15:04:01 +00:00
|
|
|
self.log.debug(picked_buildpack.render(), extra=dict(phase='building'))
|
2017-08-27 14:51:01 +00:00
|
|
|
|
|
|
|
if self.build:
|
|
|
|
self.log.info('Using %s builder\n', bp.name, extra=dict(phase='building'))
|
|
|
|
for l in picked_buildpack.build(self.output_image_spec):
|
|
|
|
if 'stream' in l:
|
|
|
|
self.log.info(l['stream'], extra=dict(phase='building'))
|
|
|
|
elif 'error' in l:
|
|
|
|
self.log.info(l['error'], extra=dict(phase='failure'))
|
|
|
|
sys.exit(1)
|
|
|
|
elif 'status' in l:
|
|
|
|
self.log.info('Fetching base image...\r', extra=dict(phase='building'))
|
|
|
|
else:
|
|
|
|
self.log.info(json.dumps(l), extra=dict(phase='building'))
|
2017-05-31 05:39:37 +00:00
|
|
|
|
2017-07-30 10:44:21 +00:00
|
|
|
if self.cleanup_checkout:
|
2017-09-11 16:54:59 +00:00
|
|
|
shutil.rmtree(checkout_path, ignore_errors=True)
|
2017-05-23 05:55:17 +00:00
|
|
|
|
2017-05-23 05:17:02 +00:00
|
|
|
if self.push:
|
2017-05-23 05:55:17 +00:00
|
|
|
self.push_image()
|
2017-05-23 05:17:02 +00:00
|
|
|
|
2017-05-23 05:55:17 +00:00
|
|
|
if self.run:
|
|
|
|
self.run_image()
|