repo2docker/repo2docker/app.py

386 wiersze
12 KiB
Python
Czysty Zwykły widok Historia

"""repo2docker: convert git repositories into jupyter-suitable docker images
2017-10-23 22:39:01 +00:00
Images produced by repo2docker can be used with Jupyter notebooks standalone
or with BinderHub.
Usage:
python -m repo2docker https://github.com/you/your-repo
"""
import sys
import json
import os
import time
import logging
import argparse
import tempfile
from pythonjsonlogger import jsonlogger
import escapism
from traitlets.config import Application
from traitlets import Unicode, List, default, Tuple
import docker
from docker.utils import kwargs_from_env
import subprocess
from .buildpacks import (
PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack,
CondaBuildPack, JuliaBuildPack, Python2BuildPack, BaseImage
)
from .utils import execute_cmd, ByteSpecification, maybe_cleanup
from . import __version__
def compose(buildpacks, parent=None):
"""
Shortcut to compose many buildpacks together
"""
image = buildpacks[0](parent=parent)
for buildpack in buildpacks[1:]:
image = image.compose_with(buildpack(parent=parent))
return image
class Repo2Docker(Application):
name = 'jupyter-repo2docker'
version = __version__
description = __doc__
@default('log_level')
def _default_log_level(self):
return logging.INFO
git_workdir = Unicode(
None,
2017-05-23 03:10:59 +00:00
config=True,
2017-11-09 15:41:00 +00:00
allow_none=True,
2017-05-23 03:10:59 +00:00
help="""
Working directory to check out git repositories to.
2017-05-23 03:10:59 +00:00
The default is to use the system's temporary directory. Should be
somewhere ephemeral, such as /tmp.
2017-05-23 03:10:59 +00:00
"""
)
buildpacks = List(
[
(LegacyBinderDockerBuildPack, ),
(DockerBuildPack, ),
(BaseImage, CondaBuildPack, JuliaBuildPack),
(BaseImage, CondaBuildPack),
(BaseImage, PythonBuildPack, Python2BuildPack, JuliaBuildPack),
(BaseImage, PythonBuildPack, JuliaBuildPack),
(BaseImage, PythonBuildPack, Python2BuildPack),
(BaseImage, PythonBuildPack),
],
2017-05-23 03:10:59 +00:00
config=True,
help="""
Ordered list of BuildPacks to try to use to build a git repository.
"""
)
2017-07-30 00:14:49 +00:00
default_buildpack = Tuple(
(BaseImage, PythonBuildPack),
config=True,
help="""
The build pack to use when no buildpacks are found
"""
)
build_memory_limit = ByteSpecification(
0,
help="""
Total memory that can be used by the docker image building process.
Set to 0 for no limits.
""",
config=True
)
2017-05-23 03:26:27 +00:00
def fetch(self, url, ref, checkout_path):
try:
for line in execute_cmd(['git', 'clone', url, checkout_path],
capture=self.json_logs):
self.log.info(line, extra=dict(phase='fetching'))
except subprocess.CalledProcessError:
self.log.error('Failed to clone repository!', extra=dict(phase='failed'))
sys.exit(1)
if ref:
try:
for line in execute_cmd(['git', 'reset', '--hard', ref], cwd=checkout_path,
capture=self.json_logs):
self.log.info(line, extra=dict(phase='fetching'))
except subprocess.CalledProcessError:
self.log.error('Failed to check out ref %s', ref, extra=dict(phase='failed'))
sys.exit(1)
def get_argparser(self):
argparser = argparse.ArgumentParser()
argparser.add_argument(
'--config',
default='repo2docker_config.py',
help="Path to config file for repo2docker"
)
argparser.add_argument(
'--json-logs',
default=False,
action='store_true',
help='Emit JSON logs instead of human readable logs'
)
argparser.add_argument(
'repo',
help='Path to repository that should be built. Could be local path or a git URL.'
)
argparser.add_argument(
'--image-name',
help='Name of image to be built. If unspecified will be autogenerated'
)
argparser.add_argument(
'--ref',
help='If building a git url, which ref to check out'
)
argparser.add_argument(
'--debug',
help="Turn on debug logging",
action='store_true',
)
argparser.add_argument(
'--no-build',
dest='build',
action='store_false',
help="Do not actually build the image. Useful in conjunction with --debug."
)
argparser.add_argument(
2017-12-01 01:23:00 +00:00
'--build-memory-limit',
help='Total Memory that can be used by the docker build process'
)
argparser.add_argument(
'cmd',
nargs=argparse.REMAINDER,
help='Custom command to run after building container'
)
argparser.add_argument(
'--no-run',
dest='run',
action='store_false',
help='Do not run container after it has been built'
)
argparser.add_argument(
'--no-clean',
dest='clean',
action='store_false',
help="Don't clean up remote checkouts after we are done"
)
argparser.add_argument(
'--push',
dest='push',
action='store_true',
help='Push docker image to repository'
)
return argparser
def json_excepthook(self, etype, evalue, traceback):
"""Called on an uncaught exception when using json logging
Avoids non-JSON output on errors when using --json-logs
"""
self.log.error("Error during build: %s", evalue,
exc_info=(etype, evalue, traceback),
extra=dict(phase='failed'),
)
def initialize(self):
args = self.get_argparser().parse_args()
if args.debug:
self.log_level = logging.DEBUG
self.load_config_file(args.config)
if os.path.exists(args.repo):
# Let's treat this as a local directory we are building
self.repo_type = 'local'
self.repo = args.repo
self.ref = None
self.cleanup_checkout = False
else:
self.repo_type = 'remote'
self.repo = args.repo
self.ref = args.ref
self.cleanup_checkout = args.clean
if args.json_logs:
# register JSON excepthook to avoid non-JSON output on errors
sys.excepthook = self.json_excepthook
# Need to reset existing handlers, or we repeat messages
logHandler = logging.StreamHandler()
formatter = jsonlogger.JsonFormatter()
logHandler.setFormatter(formatter)
self.log.handlers = []
self.log.addHandler(logHandler)
self.log.setLevel(logging.INFO)
else:
# due to json logger stuff above,
# our log messages include carriage returns, newlines, etc.
# remove the additional newline from the stream handler
self.log.handlers[0].terminator = ''
# We don't want a [Repo2Docker] on all messages
self.log.handlers[0].formatter = logging.Formatter(fmt='%(message)s')
if args.image_name:
self.output_image_spec = args.image_name
else:
# Attempt to set a sane default!
# HACK: Provide something more descriptive?
self.output_image_spec = 'r2d' + escapism.escape(self.repo, escape_char='-').lower() + str(int(time.time()))
self.push = args.push
self.run = args.run
self.json_logs = args.json_logs
self.build = args.build
if not self.build:
# Can't push nor run if we aren't building
self.run = False
self.push = False
self.run_cmd = args.cmd
if args.build_memory_limit:
self.build_memory_limit = args.build_memory_limit
def push_image(self):
client = docker.APIClient(version='auto', **kwargs_from_env())
# Build a progress setup for each layer, and only emit per-layer info every 1.5s
layers = {}
last_emit_time = time.time()
for line in client.push(self.output_image_spec, stream=True):
progress = json.loads(line.decode('utf-8'))
if 'error' in progress:
self.log.error(progress['error'], extra=dict(phase='failed'))
sys.exit(1)
if 'id' not in progress:
continue
if 'progressDetail' in progress and progress['progressDetail']:
layers[progress['id']] = progress['progressDetail']
else:
layers[progress['id']] = progress['status']
if time.time() - last_emit_time > 1.5:
self.log.info('Pushing image\n', extra=dict(progress=layers, phase='pushing'))
last_emit_time = time.time()
def run_image(self):
client = docker.from_env(version='auto')
port = self._get_free_port()
if not self.run_cmd:
port = str(self._get_free_port())
run_cmd = ['jupyter', 'notebook', '--ip', '0.0.0.0', '--port', port]
ports={'%s/tcp' % port: port}
else:
run_cmd = self.run_cmd
ports = {}
container = client.containers.run(
self.output_image_spec,
2017-07-30 03:14:36 +00:00
ports=ports,
detach=True,
command=run_cmd
)
while container.status == 'created':
time.sleep(0.5)
container.reload()
try:
for line in container.logs(stream=True):
self.log.info(line.decode('utf-8'), extra=dict(phase='running'))
finally:
container.reload()
if container.status == 'running':
self.log.info('Stopping container...\n', extra=dict(phase='running'))
container.kill()
2017-07-29 21:31:23 +00:00
exit_code = container.attrs['State']['ExitCode']
container.remove()
2017-07-29 21:31:23 +00:00
sys.exit(exit_code)
def _get_free_port(self):
"""
Hacky method to get a free random port on local host
"""
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(("",0))
port = s.getsockname()[1]
s.close()
return port
def start(self):
if self.repo_type == 'local':
checkout_path = self.repo
else:
if self.git_workdir is None:
checkout_path = tempfile.mkdtemp(prefix='repo2docker')
else:
checkout_path = self.git_workdir
# keep as much as possible in the context manager to make sure we
# cleanup if things go wrong
with maybe_cleanup(checkout_path, self.cleanup_checkout):
if self.repo_type == 'remote':
self.fetch(
self.repo,
self.ref,
checkout_path
)
os.chdir(checkout_path)
picked_buildpack = compose(self.default_buildpack, parent=self)
for bp_spec in self.buildpacks:
bp = compose(bp_spec, parent=self)
if bp.detect():
picked_buildpack = bp
break
self.log.debug(picked_buildpack.render(),
extra=dict(phase='building'))
if self.build:
self.log.info('Using %s builder\n', bp.name,
extra=dict(phase='building'))
for l in picked_buildpack.build(self.output_image_spec, self.build_memory_limit):
if 'stream' in l:
self.log.info(l['stream'],
extra=dict(phase='building'))
elif 'error' in l:
self.log.info(l['error'], extra=dict(phase='failure'))
sys.exit(1)
elif 'status' in l:
self.log.info('Fetching base image...\r',
extra=dict(phase='building'))
else:
self.log.info(json.dumps(l),
extra=dict(phase='building'))
if self.push:
self.push_image()
if self.run:
self.run_image()