kopia lustrzana https://github.com/jupyterhub/repo2docker
Merge pull request #461 from betatim/caching-builds
[MRG] Start reusing existing docker images if content hasn't changedpull/481/head
commit
8c9f08cd1e
|
@ -13,7 +13,6 @@ import sys
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import pwd
|
import pwd
|
||||||
import subprocess
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
@ -220,6 +219,15 @@ class Repo2Docker(Application):
|
||||||
spec, checkout_path, yield_output=self.json_logs):
|
spec, checkout_path, yield_output=self.json_logs):
|
||||||
self.log.info(log_line, extra=dict(phase='fetching'))
|
self.log.info(log_line, extra=dict(phase='fetching'))
|
||||||
|
|
||||||
|
self.output_image_spec = (
|
||||||
|
'r2d' +
|
||||||
|
escapism.escape(self.repo, escape_char='-').lower()
|
||||||
|
)
|
||||||
|
if picked_content_provider.content_id is not None:
|
||||||
|
self.output_image_spec += picked_content_provider.content_id
|
||||||
|
else:
|
||||||
|
self.output_image_spec += str(int(time.time()))
|
||||||
|
|
||||||
def validate_image_name(self, image_name):
|
def validate_image_name(self, image_name):
|
||||||
"""
|
"""
|
||||||
Validate image_name read by argparse
|
Validate image_name read by argparse
|
||||||
|
@ -476,13 +484,8 @@ class Repo2Docker(Application):
|
||||||
if args.image_name:
|
if args.image_name:
|
||||||
self.output_image_spec = args.image_name
|
self.output_image_spec = args.image_name
|
||||||
else:
|
else:
|
||||||
# Attempt to set a sane default!
|
# we will pick a name after fetching the repository
|
||||||
# HACK: Provide something more descriptive?
|
self.output_image_spec = None
|
||||||
self.output_image_spec = (
|
|
||||||
'r2d' +
|
|
||||||
escapism.escape(self.repo, escape_char='-').lower() +
|
|
||||||
str(int(time.time()))
|
|
||||||
)
|
|
||||||
|
|
||||||
self.push = args.push
|
self.push = args.push
|
||||||
self.run = args.run
|
self.run = args.run
|
||||||
|
@ -674,40 +677,18 @@ class Repo2Docker(Application):
|
||||||
s.close()
|
s.close()
|
||||||
return port
|
return port
|
||||||
|
|
||||||
def start(self):
|
def find_image(self):
|
||||||
"""Start execution of repo2docker"""
|
# check if we already have an image for this content
|
||||||
# Check if r2d can connect to docker daemon
|
client = docker.APIClient(version='auto', **kwargs_from_env())
|
||||||
if self.build:
|
for image in client.images():
|
||||||
try:
|
if image['RepoTags'] is not None:
|
||||||
client = docker.APIClient(version='auto',
|
for tags in image['RepoTags']:
|
||||||
**kwargs_from_env())
|
if tags == self.output_image_spec + ":latest":
|
||||||
del client
|
return True
|
||||||
except DockerException as e:
|
|
||||||
print("Docker client initialization error. Check if docker is"
|
|
||||||
" running on the host.")
|
|
||||||
print(e)
|
|
||||||
if self.log_level == logging.DEBUG:
|
|
||||||
raise e
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# If the source to be executed is a directory, continue using the
|
return False
|
||||||
# directory. In the case of a local directory, it is used as both the
|
|
||||||
# source and target. Reusing a local directory seems better than
|
|
||||||
# making a copy of it as it might contain large files that would be
|
|
||||||
# expensive to copy.
|
|
||||||
if os.path.isdir(self.repo):
|
|
||||||
checkout_path = self.repo
|
|
||||||
else:
|
|
||||||
if self.git_workdir is None:
|
|
||||||
checkout_path = tempfile.mkdtemp(prefix='repo2docker')
|
|
||||||
else:
|
|
||||||
checkout_path = self.git_workdir
|
|
||||||
|
|
||||||
# keep as much as possible in the context manager to make sure we
|
|
||||||
# cleanup if things go wrong
|
|
||||||
with maybe_cleanup(checkout_path, self.cleanup_checkout):
|
|
||||||
self.fetch(self.repo, self.ref, checkout_path)
|
|
||||||
|
|
||||||
|
def _build_image(self, checkout_path):
|
||||||
if self.subdir:
|
if self.subdir:
|
||||||
checkout_path = os.path.join(checkout_path, self.subdir)
|
checkout_path = os.path.join(checkout_path, self.subdir)
|
||||||
if not os.path.isdir(checkout_path):
|
if not os.path.isdir(checkout_path):
|
||||||
|
@ -752,6 +733,45 @@ class Repo2Docker(Application):
|
||||||
self.log.info(json.dumps(l),
|
self.log.info(json.dumps(l),
|
||||||
extra=dict(phase='building'))
|
extra=dict(phase='building'))
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
"""Start execution of repo2docker"""
|
||||||
|
# Check if r2d can connect to docker daemon
|
||||||
|
if self.build:
|
||||||
|
try:
|
||||||
|
docker.APIClient(version='auto', **kwargs_from_env())
|
||||||
|
except DockerException as e:
|
||||||
|
print("Docker client initialization error. Check if docker is"
|
||||||
|
" running on the host.")
|
||||||
|
print(e)
|
||||||
|
if self.log_level == logging.DEBUG:
|
||||||
|
raise e
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# If the source to be executed is a directory, continue using the
|
||||||
|
# directory. In the case of a local directory, it is used as both the
|
||||||
|
# source and target. Reusing a local directory seems better than
|
||||||
|
# making a copy of it as it might contain large files that would be
|
||||||
|
# expensive to copy.
|
||||||
|
if os.path.isdir(self.repo):
|
||||||
|
checkout_path = self.repo
|
||||||
|
else:
|
||||||
|
if self.git_workdir is None:
|
||||||
|
checkout_path = tempfile.mkdtemp(prefix='repo2docker')
|
||||||
|
else:
|
||||||
|
checkout_path = self.git_workdir
|
||||||
|
|
||||||
|
# keep as much as possible in the context manager to make sure we
|
||||||
|
# cleanup if things go wrong
|
||||||
|
with maybe_cleanup(checkout_path, self.cleanup_checkout):
|
||||||
|
self.fetch(self.repo, self.ref, checkout_path)
|
||||||
|
|
||||||
|
if self.find_image():
|
||||||
|
self.log.info("Reusing existing image ({}), not "
|
||||||
|
"building.".format(self.output_image_spec))
|
||||||
|
|
||||||
|
else:
|
||||||
|
self._build_image(checkout_path)
|
||||||
|
|
||||||
if self.push:
|
if self.push:
|
||||||
self.push_image()
|
self.push_image()
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,24 @@ class ContentProvider:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.log = logging.getLogger("repo2docker")
|
self.log = logging.getLogger("repo2docker")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content_id(self):
|
||||||
|
"""A unique ID to represent the version of the content.
|
||||||
|
|
||||||
|
This ID is used to name the built images. If the ID is the same between
|
||||||
|
two runs of repo2docker we will reuse an existing image (if it exists).
|
||||||
|
|
||||||
|
By providing an ID that summarizes the content we can reuse existing
|
||||||
|
images and speed up build times. A good ID is the revision of a Git
|
||||||
|
repository or a hash computed from all the content.
|
||||||
|
|
||||||
|
The type content ID can be any string.
|
||||||
|
|
||||||
|
To disable this behaviour set this property to `None` in which case
|
||||||
|
a fresh image will always be built.
|
||||||
|
"""
|
||||||
|
return None
|
||||||
|
|
||||||
def detect(self, repo, ref=None, extra_args=None):
|
def detect(self, repo, ref=None, extra_args=None):
|
||||||
"""Determine compatibility between source and this provider.
|
"""Determine compatibility between source and this provider.
|
||||||
|
|
||||||
|
|
|
@ -44,3 +44,15 @@ class Git(ContentProvider):
|
||||||
cwd=output_dir,
|
cwd=output_dir,
|
||||||
capture=yield_output):
|
capture=yield_output):
|
||||||
yield line
|
yield line
|
||||||
|
|
||||||
|
cmd = ['git', 'rev-parse', 'HEAD']
|
||||||
|
sha1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=output_dir)
|
||||||
|
self._sha1 = sha1.stdout.read().decode().strip()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content_id(self):
|
||||||
|
"""A unique ID to represent the version of the content.
|
||||||
|
|
||||||
|
Uses the first seven characters of the git commit ID of the repository.
|
||||||
|
"""
|
||||||
|
return self._sha1[:7]
|
||||||
|
|
Ładowanie…
Reference in New Issue