From 91e41fca90846fbcf51e41a6a64835a8be4dd440 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Mon, 15 May 2017 18:54:51 -0700 Subject: [PATCH] Revamp logging to provide structured JSON logs - Filter out useless info from docker build / push steps - Use JSON logging everywhere. User readable messages go into message, extra useful info goes into extra. - Capture output from all spawned processes and stuff them into an appropriate JSON structure --- builder/app.py | 57 +++++++++++++++++++++++++++++--------------- builder/detectors.py | 26 +++++++++++++++++--- builder/utils.py | 9 +++++++ setup.py | 3 ++- 4 files changed, 72 insertions(+), 23 deletions(-) create mode 100644 builder/utils.py diff --git a/builder/app.py b/builder/app.py index 8b096e61..d2f77bdb 100644 --- a/builder/app.py +++ b/builder/app.py @@ -1,5 +1,8 @@ import json import os +import time +import logging +from pythonjsonlogger import jsonlogger from traitlets.config import Application, LoggingConfigurable, Unicode, Dict, List @@ -8,8 +11,9 @@ import docker import subprocess - from .detectors import BuildPack, PythonBuildPack, DockerBuildPack +from .utils import execute_cmd + class Builder(Application): config_file = Unicode( @@ -55,15 +59,18 @@ class Builder(Application): def fetch(self, url, ref, output_path): - subprocess.check_call([ - "git", "clone", "--depth", "1", - url, output_path - ]) - + for line in execute_cmd(['git', 'clone', '--depth', '1', url, output_path]): + self.log.info(line, extra=dict(phase='fetching')) def initialize(self, *args, **kwargs): super().initialize(*args, **kwargs) - + logHandler = logging.StreamHandler() + formatter = jsonlogger.JsonFormatter() + logHandler.setFormatter(formatter) + # Need to reset existing handlers, or we repeat messages + self.log.handlers = [] + self.log.addHandler(logHandler) + self.log.setLevel(logging.INFO) self.load_config_file(self.config_file) def run(self): @@ -72,17 +79,17 @@ class Builder(Application): # WHAT WE REALLY WANT IS TO NOT DO ANY WORK IF THE IMAGE EXISTS client = docker.APIClient(base_url='unix://var/run/docker.sock', version='auto') - try: - repo, tag = self.output_image_spec.split(':') - for line in client.pull( - repository=repo, - tag=tag, - stream=True, - ): - print(json.loads(line.decode('utf-8'))) + repo, tag = self.output_image_spec.split(':') + for line in client.pull( + repository=repo, + tag=tag, + stream=True, + ): + progress = json.loads(line.decode('utf-8')) + if 'error' in progress: + break + else: return - except docker.errors.ImageNotFound: - pass output_path = os.path.join(self.git_workdir, self.build_name) self.fetch( @@ -93,15 +100,27 @@ class Builder(Application): for bp_class in self.buildpacks: bp = bp_class() if bp.detect(output_path): + self.log.info('Using %s builder', bp.name, extra=dict(phase='building')) bp.build(output_path, self.output_image_spec) break else: raise Exception("No compatible builders found") - + # Build a progress setup for each layer, and only emit per-layer info every 1.5s + layers = {} + last_emit_time = time.time() for line in client.push(self.output_image_spec, stream=True): progress = json.loads(line.decode('utf-8')) - print(progress) + if 'id' not in progress: + continue + if 'progressDetail' in progress and progress['progressDetail']: + layers[progress['id']] = progress['progressDetail'] + else: + layers[progress['id']] = progress['status'] + if time.time() - last_emit_time > 1.5: + self.log.info('Pushing image', extra=dict(progress=layers, phase='pushing')) + last_emit_time = time.time() + if __name__ == '__main__': f = Builder() diff --git a/builder/detectors.py b/builder/detectors.py index 093fc52a..b70a3023 100644 --- a/builder/detectors.py +++ b/builder/detectors.py @@ -6,8 +6,25 @@ import docker from traitlets import Unicode, Dict from traitlets.config import LoggingConfigurable +import logging +from pythonjsonlogger import jsonlogger + +from .utils import execute_cmd + class BuildPack(LoggingConfigurable): + name = Unicode() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # FIXME: Not sure why this needs to be repeated - shouldn't configuring Application be enough? + logHandler = logging.StreamHandler() + formatter = jsonlogger.JsonFormatter() + logHandler.setFormatter(formatter) + # Need to reset existing handlers, or we repeat messages + self.log.handlers = [] + self.log.addHandler(logHandler) + self.log.setLevel(logging.INFO) + def detect(self, workdir): """ Return True if app in workdir can be built with this buildpack @@ -23,6 +40,7 @@ class BuildPack(LoggingConfigurable): class DockerBuildPack(BuildPack): + name = Unicode('Dockerfile') def detect(self, workdir): return os.path.exists(os.path.join(workdir, 'Dockerfile')) @@ -33,11 +51,12 @@ class DockerBuildPack(BuildPack): tag=output_image_spec, decode=True ): - # FIXME: Properly stream back useful information only - pass + if 'stream' in progress: + self.log.info(progress['stream'].rstrip(), extra=dict(phase='building')) class PythonBuildPack(BuildPack): + name = Unicode('python-pip') runtime_builder_map = Dict({ 'python-2.7': 'jupyterhub/singleuser-builder-venv-2.7:v0.1.5', 'python-3.5': 'jupyterhub/singleuser-builder-venv-3.5:v0.1.5', @@ -65,4 +84,5 @@ class PythonBuildPack(BuildPack): self.runtime_builder_map[self.runtime], output_image_spec ] - subprocess.check_call(cmd) + for line in execute_cmd(cmd): + self.log.info(line, extra=dict(phase='building', builder=self.name)) diff --git a/builder/utils.py b/builder/utils.py new file mode 100644 index 00000000..947e6c78 --- /dev/null +++ b/builder/utils.py @@ -0,0 +1,9 @@ +import subprocess + +def execute_cmd(cmd): + """ + Call given command, yielding output line by line + """ + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) as proc: + for line in iter(proc.stdout.readline, ''): + yield line.rstrip() diff --git a/setup.py b/setup.py index c54cf2fa..bde00ffd 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,8 @@ setup( version='0.1', install_requires=[ 'docker', - 'traitlets' + 'traitlets', + 'python-json-logger' ], author='Yuvi Panda', author_email='yuvipanda@gmail.com',