From e2099dbc97e807ce9935e428e148ac2dd08f93c6 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Thu, 1 Feb 2018 22:08:08 -0800 Subject: [PATCH 1/7] Add basic R + IRKernel + RStudio support This sets up R + RStudio + IRKernel for a repository that contains: 1. A `runtime.txt` file with the text: r--- Where 'year', 'month' and 'date' refer to a specific date snapshot of https://mran.microsoft.com/timemachine from which libraries are to be installed. 2. An optional `install.R` file that will be executed at build time, and can be used for installing packages from both MRAN and GitHub. It currently sets up R from the ubuntu repository being used. This is unideal, and we should investigate other solutions! Fixes #24 --- .travis.yml | 1 + repo2docker/app.py | 4 +- repo2docker/buildpacks/__init__.py | 1 + repo2docker/buildpacks/r.py | 158 +++++++++++++++++++++++++++++ tests/r/simple/install.R | 1 + tests/r/simple/runtime.txt | 1 + tests/r/simple/verify | 2 + 7 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 repo2docker/buildpacks/r.py create mode 100644 tests/r/simple/install.R create mode 100644 tests/r/simple/runtime.txt create mode 100755 tests/r/simple/verify diff --git a/.travis.yml b/.travis.yml index d691fa2f..01c92c9b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,6 +29,7 @@ env: - REPO_TYPE=conda - REPO_TYPE=venv - REPO_TYPE=julia + - REPO_TYPE=r - REPO_TYPE=dockerfile - REPO_TYPE=external/* - REPO_TYPE=*.py diff --git a/repo2docker/app.py b/repo2docker/app.py index 21e909c7..cab2e2d5 100644 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -29,7 +29,8 @@ import subprocess from .buildpacks import ( PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack, - CondaBuildPack, JuliaBuildPack, Python2BuildPack, BaseImage + CondaBuildPack, JuliaBuildPack, Python2BuildPack, BaseImage, + RBuildPack ) from .utils import execute_cmd, ByteSpecification, maybe_cleanup, is_valid_docker_image_name, validate_and_generate_port_mapping from . import __version__ @@ -64,6 +65,7 @@ class Repo2Docker(Application): JuliaBuildPack(), CondaBuildPack(), Python2BuildPack(), + RBuildPack(), PythonBuildPack() ], config=True, diff --git a/repo2docker/buildpacks/__init__.py b/repo2docker/buildpacks/__init__.py index 0f54f390..ae60c05a 100644 --- a/repo2docker/buildpacks/__init__.py +++ b/repo2docker/buildpacks/__init__.py @@ -4,3 +4,4 @@ from .conda import CondaBuildPack from .julia import JuliaBuildPack from .docker import DockerBuildPack from .legacy import LegacyBinderDockerBuildPack +from .r import RBuildPack diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py new file mode 100644 index 00000000..b18241d7 --- /dev/null +++ b/repo2docker/buildpacks/r.py @@ -0,0 +1,158 @@ +import re +import os +import datetime + +from .python import PythonBuildPack + +class RBuildPack(PythonBuildPack): + """ + Setup R for use with a repository + + This sets up R + RStudio + IRKernel for a repository that contains: + + 1. A `runtime.txt` file with the text: + + r--- + + Where 'year', 'month' and 'date' refer to a specific + date snapshot of https://mran.microsoft.com/timemachine + from which libraries are to be installed. + + 2. An optional `install.R` file that will be executed at build time, + and can be used for installing packages from both MRAN and GitHub. + + It currently sets up R from the ubuntu repository being used. This + is unideal, and we should investigate other solutions! + """ + @property + def runtime(self): + """ + Return contents of runtime.txt if it exists, '' otherwise + """ + if not hasattr(self, '_runtime'): + runtime_path = self.binder_path('runtime.txt') + try: + with open(runtime_path) as f: + self._runtime = f.read().strip() + except FileNotFoundError: + self._runtime = '' + + return self._runtime + + @property + def checkpoint_date(self): + """ + Return the date of MRAN checkpoint to use for this repo + + Returns '' if no date is specified + """ + if not hasattr(self, '_checkpoint_date'): + match = re.match(r'r-(\d\d\d\d)-(\d\d)-(\d\d)', self.runtime) + if not match: + self._checkpoint_date = False + + year, month, day = [int(s) for s in match.groups()] + + self._checkpoint_date = datetime.date(year, month, day) + + return self._checkpoint_date + + def detect(self): + """ + Check if current repo should be built with the R Build pack + + Note that we explicitly do *not* check if a requirements.txt + is present here (by calling super().detect()). + """ + return bool(self.checkpoint_date) + + def get_path(self): + return super().get_path() + [ + '/usr/lib/rstudio-server/bin/' + ] + + def get_env(self): + return super().get_env() + [ + # This is the path where user libraries are installed + ('R_LIBS_USER', '${APP_BASE}/rlibs') + ] + + def get_packages(self): + return super().get_packages().union([ + 'r-base', + # For rstudio + 'psmisc', + 'libapparmor1', + 'sudo', + 'lsb-release' + ]) + + def get_build_scripts(self): + mran_url = 'https://mran.microsoft.com/snapshot/{}'.format( + self.checkpoint_date.isoformat() + ) + rstudio_url = 'https://download2.rstudio.org/rstudio-server-1.1.419-amd64.deb' + # This is MD5, because that is what RStudio download page provides! + rstudio_checksum = '24cd11f0405d8372b4168fc9956e0386' + return super().get_build_scripts() + [ + ( + "root", + r""" + mkdir -p ${R_LIBS_USER} && \ + chown -R ${NB_USER}:${NB_USER} ${R_LIBS_USER} + """ + ), + ( + "root", + # We set the default CRAN repo to the MRAN one at given date + # We set download method to be curl so we get HTTPS support + r""" + echo "options(repos = c(CRAN='{mran_url}'), download.file.method = 'libcurl')" > /etc/R/Rprofile.site + """.format(mran_url=mran_url) + ), + ( + "root", + # Install RStudio! + r""" + curl -L --fail {rstudio_url} > /tmp/rstudio.deb && \ + echo '{rstudio_checksum} /tmp/rstudio.deb' | md5sum -c - && \ + dpkg -i /tmp/rstudio.deb && \ + rm /tmp/rstudio.deb + """.format( + rstudio_url=rstudio_url, + rstudio_checksum=rstudio_checksum + ) + ), + ( + "${NB_USER}", + # Install a pinned version of IRKernel and set it up for use! + r""" + R --quiet -e "install.packages('devtools')" && \ + R --quiet -e "devtools::install_github('IRkernel/IRkernel', ref='0.8.11')" && \ + R --quiet -e "IRkernel::installspec(prefix='${NB_PYTHON_PREFIX}')" + """ + ), + ( + "${NB_USER}", + # Install nbrsessionproxy + r""" + pip install --no-cache-dir nbrsessionproxy==0.6.1 && \ + jupyter serverextension enable nbrsessionproxy --sys-prefix && \ + jupyter nbextension install --py nbrsessionproxy --sys-prefix && \ + jupyter nbextension enable --py nbrsessionproxy --sys-prefix + """ + ) + + ] + + def get_assemble_scripts(self): + assemble_scripts = super().get_assemble_scripts() + if os.path.exists('install.R'): + assemble_scripts += [ + ( + "${NB_USER}", + "Rscript install.R" + ) + ] + + return assemble_scripts diff --git a/tests/r/simple/install.R b/tests/r/simple/install.R new file mode 100644 index 00000000..df6d6a83 --- /dev/null +++ b/tests/r/simple/install.R @@ -0,0 +1 @@ +install.packages("ggplot2") diff --git a/tests/r/simple/runtime.txt b/tests/r/simple/runtime.txt new file mode 100644 index 00000000..c7960351 --- /dev/null +++ b/tests/r/simple/runtime.txt @@ -0,0 +1 @@ +r-2017-10-24 diff --git a/tests/r/simple/verify b/tests/r/simple/verify new file mode 100755 index 00000000..eec6690f --- /dev/null +++ b/tests/r/simple/verify @@ -0,0 +1,2 @@ +#!/usr/bin/env Rscript +library('ggplot2') From 25611f0230013b0da63d5090088da53c1a4b61e2 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Fri, 2 Feb 2018 09:37:56 -0800 Subject: [PATCH 2/7] Fix venv test failure caused by r buildpack addition --- repo2docker/buildpacks/r.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py index b18241d7..90b5fe83 100644 --- a/repo2docker/buildpacks/r.py +++ b/repo2docker/buildpacks/r.py @@ -50,10 +50,8 @@ class RBuildPack(PythonBuildPack): match = re.match(r'r-(\d\d\d\d)-(\d\d)-(\d\d)', self.runtime) if not match: self._checkpoint_date = False - - year, month, day = [int(s) for s in match.groups()] - - self._checkpoint_date = datetime.date(year, month, day) + else: + self._checkpoint_date = datetime.date(*[int(s) for s in match.groups()]) return self._checkpoint_date From 39d3e6a8f777a0e18da1bc8b60db68b2f765f1b3 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Fri, 2 Feb 2018 17:38:36 -0800 Subject: [PATCH 3/7] Add more docstrings in R BuildPack - Version-pin the devtools package installation independent of the current version - Make sure get_build_scripts is independent of contents of current repository --- repo2docker/buildpacks/r.py | 123 ++++++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 34 deletions(-) diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py index 90b5fe83..07f8f39b 100644 --- a/repo2docker/buildpacks/r.py +++ b/repo2docker/buildpacks/r.py @@ -10,19 +10,19 @@ class RBuildPack(PythonBuildPack): This sets up R + RStudio + IRKernel for a repository that contains: - 1. A `runtime.txt` file with the text: + 1. A `runtime.txt` file with the text: - r--- + r--- - Where 'year', 'month' and 'date' refer to a specific - date snapshot of https://mran.microsoft.com/timemachine - from which libraries are to be installed. + Where 'year', 'month' and 'date' refer to a specific + date snapshot of https://mran.microsoft.com/timemachine + from which libraries are to be installed. - 2. An optional `install.R` file that will be executed at build time, - and can be used for installing packages from both MRAN and GitHub. + 2. An optional `install.R` file that will be executed at build time, + and can be used for installing packages from both MRAN and GitHub. - It currently sets up R from the ubuntu repository being used. This - is unideal, and we should investigate other solutions! + The `r-base` package from Ubuntu apt repositories is used to install + R itself, rather than any of the methods from https://cran.r-project.org/. """ @property def runtime(self): @@ -59,23 +59,48 @@ class RBuildPack(PythonBuildPack): """ Check if current repo should be built with the R Build pack - Note that we explicitly do *not* check if a requirements.txt - is present here (by calling super().detect()). + super().detect() is not called in this function - it would return false + unless a `requirements.txt` is present and we do not want to require the + presence of a `requirements.txt` to use R. + + Instead we just check if runtime.txt contains a string of the form + `r---
` """ + # If no date is found, then self.checkpoint_date will be False + # Otherwise, it'll be a date object, which will evaluate to True return bool(self.checkpoint_date) def get_path(self): + """ + Return paths to be added to the PATH environment variable. + + The RStudio package installs its binaries in a non-standard path, + so we explicitly add that path to PATH. + """ return super().get_path() + [ '/usr/lib/rstudio-server/bin/' ] def get_env(self): + """ + Return environment variables to be set. + + We want libraries to be installed in a path that users can write to + without needing root. This is set via the `R_LIBS_USER` environment + variable, so we set that here. + """ return super().get_env() + [ # This is the path where user libraries are installed ('R_LIBS_USER', '${APP_BASE}/rlibs') ] def get_packages(self): + """ + Return list of packages to be installed. + + We install a base version of R, and packages required for RStudio to + be installed. + """ return super().get_packages().union([ 'r-base', # For rstudio @@ -86,12 +111,29 @@ class RBuildPack(PythonBuildPack): ]) def get_build_scripts(self): - mran_url = 'https://mran.microsoft.com/snapshot/{}'.format( - self.checkpoint_date.isoformat() - ) + """ + Return series of build-steps common to all R repositories + + All scripts here should be independent of contents of the repository. + + This sets up: + + - A directory owned by non-root in ${R_LIBS_USER} for installing R packages into + - RStudio + - R's devtools package, at a particular frozen version (determined by MRAN) + - IRKernel + - nbrsessionproxy (to access RStudio via Jupyter Notebook) + """ rstudio_url = 'https://download2.rstudio.org/rstudio-server-1.1.419-amd64.deb' # This is MD5, because that is what RStudio download page provides! rstudio_checksum = '24cd11f0405d8372b4168fc9956e0386' + + # Version of MRAN to pull devtools from. + devtools_version = '2018-02-01' + + # IRKernel version - specified as a tag in the IRKernel repository + irkernel_version = '0.8.11' + return super().get_build_scripts() + [ ( "root", @@ -100,19 +142,11 @@ class RBuildPack(PythonBuildPack): chown -R ${NB_USER}:${NB_USER} ${R_LIBS_USER} """ ), - ( - "root", - # We set the default CRAN repo to the MRAN one at given date - # We set download method to be curl so we get HTTPS support - r""" - echo "options(repos = c(CRAN='{mran_url}'), download.file.method = 'libcurl')" > /etc/R/Rprofile.site - """.format(mran_url=mran_url) - ), ( "root", # Install RStudio! r""" - curl -L --fail {rstudio_url} > /tmp/rstudio.deb && \ + curl --quiet -L --fail {rstudio_url} > /tmp/rstudio.deb && \ echo '{rstudio_checksum} /tmp/rstudio.deb' | md5sum -c - && \ dpkg -i /tmp/rstudio.deb && \ rm /tmp/rstudio.deb @@ -121,15 +155,6 @@ class RBuildPack(PythonBuildPack): rstudio_checksum=rstudio_checksum ) ), - ( - "${NB_USER}", - # Install a pinned version of IRKernel and set it up for use! - r""" - R --quiet -e "install.packages('devtools')" && \ - R --quiet -e "devtools::install_github('IRkernel/IRkernel', ref='0.8.11')" && \ - R --quiet -e "IRkernel::installspec(prefix='${NB_PYTHON_PREFIX}')" - """ - ), ( "${NB_USER}", # Install nbrsessionproxy @@ -139,12 +164,42 @@ class RBuildPack(PythonBuildPack): jupyter nbextension install --py nbrsessionproxy --sys-prefix && \ jupyter nbextension enable --py nbrsessionproxy --sys-prefix """ + ), + ( + "${NB_USER}", + # Install a pinned version of IRKernel and set it up for use! + r""" + R --quiet -e "install.packages('devtools', repos='https://mran.microsoft.com/snapshot/{devtools_version}', method='libcurl')" && \ + R --quiet -e "devtools::install_github('IRkernel/IRkernel', ref='{irkernel_version}')" && \ + R --quiet -e "IRkernel::installspec(prefix='$NB_PYTHON_PREFIX')" + """.format( + devtools_version=devtools_version, + irkernel_version=irkernel_version + ) ) - ] def get_assemble_scripts(self): - assemble_scripts = super().get_assemble_scripts() + """ + Return series of build-steps specific to this repository + + We set the snapshot date used to install R libraries from based on the + contents of runtime.txt, and run the `install.R` script if it exists. + """ + mran_url = 'https://mran.microsoft.com/snapshot/{}'.format( + self.checkpoint_date.isoformat() + ) + assemble_scripts = super().get_assemble_scripts() + [ + ( + "root", + # We set the default CRAN repo to the MRAN one at given date + # We set download method to be curl so we get HTTPS support + r""" + echo "options(repos = c(CRAN='{mran_url}'), download.file.method = 'libcurl')" > /etc/R/Rprofile.site + """.format(mran_url=mran_url) + ), + ] + if os.path.exists('install.R'): assemble_scripts += [ ( From 957acbcd2bff9be04b60fed0eec98dd9ad1a5ed4 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Fri, 2 Feb 2018 17:40:18 -0800 Subject: [PATCH 4/7] Fix curl commandline parameters --- repo2docker/buildpacks/r.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py index 07f8f39b..20adb6f8 100644 --- a/repo2docker/buildpacks/r.py +++ b/repo2docker/buildpacks/r.py @@ -146,7 +146,7 @@ class RBuildPack(PythonBuildPack): "root", # Install RStudio! r""" - curl --quiet -L --fail {rstudio_url} > /tmp/rstudio.deb && \ + curl --silent --location --fail {rstudio_url} > /tmp/rstudio.deb && \ echo '{rstudio_checksum} /tmp/rstudio.deb' | md5sum -c - && \ dpkg -i /tmp/rstudio.deb && \ rm /tmp/rstudio.deb From 0e2fec2302799b00fd84291528cb2b8b904aa170 Mon Sep 17 00:00:00 2001 From: Chris Holdgraf Date: Tue, 6 Feb 2018 10:48:05 -0800 Subject: [PATCH 5/7] adding R README file --- tests/r/simple/README.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/r/simple/README.rst diff --git a/tests/r/simple/README.rst b/tests/r/simple/README.rst new file mode 100644 index 00000000..ff3a58bc --- /dev/null +++ b/tests/r/simple/README.rst @@ -0,0 +1,10 @@ +R environment - install.R +------------------------- + +You can install an R environment with the following two files: + +* ``install.R``: a script that will be run from an R installation. This is + generally used to install and set up packages. +* ``runtime.txt``: include a line that specifies a date for the appropriate + MRAN repository version for packages. It should have the structure + ``r-YYYY-MM-DD``. From 2a03420f35221bb8f22398809d14c9202252b278 Mon Sep 17 00:00:00 2001 From: Chris Holdgraf Date: Tue, 6 Feb 2018 10:54:44 -0800 Subject: [PATCH 6/7] improving readme --- tests/r/simple/README.rst | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/r/simple/README.rst b/tests/r/simple/README.rst index ff3a58bc..d8241a82 100644 --- a/tests/r/simple/README.rst +++ b/tests/r/simple/README.rst @@ -1,10 +1,19 @@ R environment - install.R ------------------------- -You can install an R environment with the following two files: +You can install an R, RStudio, and IRKernel environment with the following +two files: -* ``install.R``: a script that will be run from an R installation. This is - generally used to install and set up packages. -* ``runtime.txt``: include a line that specifies a date for the appropriate - MRAN repository version for packages. It should have the structure - ``r-YYYY-MM-DD``. +* A ``runtime.txt`` file with the text:: + + r-YYYY-MM-DD + + Where 'YYYY', 'MM' and 'DD' refer to a specific + date snapshot of https://mran.microsoft.com/timemachine + from which libraries will be installed. +* An optional ``install.R`` file that will be executed at build time and can + be used for installing packages from both MRAN and GitHub. + +The presence of ``runtime.txt`` is enough to set up R, RStudio, and IRKernel. It +uses the ``r-base`` package from the Ubuntu apt repositories to install +R itself. From decff2e7c7e3be11b908e07006c829e27c0abe9a Mon Sep 17 00:00:00 2001 From: Chris Holdgraf Date: Tue, 6 Feb 2018 11:19:40 -0800 Subject: [PATCH 7/7] willingc comment on r readme --- tests/r/simple/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/r/simple/README.rst b/tests/r/simple/README.rst index d8241a82..d4f81aa1 100644 --- a/tests/r/simple/README.rst +++ b/tests/r/simple/README.rst @@ -11,8 +11,8 @@ two files: Where 'YYYY', 'MM' and 'DD' refer to a specific date snapshot of https://mran.microsoft.com/timemachine from which libraries will be installed. -* An optional ``install.R`` file that will be executed at build time and can - be used for installing packages from both MRAN and GitHub. +* An optional ``install.R`` file that will be executed by an R installation + at build time. It can be used for installing packages from MRAN or GitHub. The presence of ``runtime.txt`` is enough to set up R, RStudio, and IRKernel. It uses the ``r-base`` package from the Ubuntu apt repositories to install