Merge branch 'jupyterhub:main' into test_R_conda

pull/1108/head
Andrei V. Plamada 2022-01-08 18:33:59 +01:00 zatwierdzone przez GitHub
commit b2b0f758ff
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
21 zmienionych plików z 274 dodań i 168 usunięć

Wyświetl plik

@ -3,65 +3,63 @@ Base information for using R in BuildPacks.
Keeping this in r.py would lead to cyclic imports.
"""
# Via https://rstudio.com/products/rstudio/download-server/debian-ubuntu/
RSTUDIO_URL = "https://download2.rstudio.org/server/bionic/amd64/rstudio-server-1.2.5001-amd64.deb"
# This is MD5, because that is what RStudio download page provides!
RSTUDIO_CHECKSUM = "d33881b9ab786c09556c410e7dc477de"
# Via https://www.rstudio.com/products/shiny/download-server/
SHINY_URL = "https://download3.rstudio.org/ubuntu-14.04/x86_64/shiny-server-1.5.12.933-amd64.deb"
SHINY_CHECKSUM = "9aeef6613e7f58f21c97a4600921340e"
# Version of MRAN to pull devtools from.
DEVTOOLS_VERSION = "2018-02-01"
# IRKernel version - specified as a tag in the IRKernel repository
IRKERNEL_VERSION = "1.1"
from distutils.version import LooseVersion as V
def rstudio_base_scripts():
def rstudio_base_scripts(r_version):
"""Base steps to install RStudio and shiny-server."""
# Shiny server (not the package!) seems to be the same version for all R versions
shiny_server_url = "https://download3.rstudio.org/ubuntu-14.04/x86_64/shiny-server-1.5.17.973-amd64.deb"
shiny_proxy_version = "1.1"
shiny_sha256sum = "80f1e48f6c824be7ef9c843bb7911d4981ac7e8a963e0eff823936a8b28476ee"
if V(r_version) <= V("4.1"):
# Older RStudio and jupyter-rsession-proxy for v4.1 and below
rstudio_url = "https://download2.rstudio.org/server/bionic/amd64/rstudio-server-1.3.959-amd64.deb"
rstudio_sha256sum = (
"187af05cab1221282487fdc33f4b161484c3228eaade3d6697b1d41c206ee6d9"
)
rsession_proxy_version = "1.4"
else:
rstudio_url = "https://download2.rstudio.org/server/bionic/amd64/rstudio-server-2021.09.1-372-amd64.deb"
rstudio_sha256sum = (
"c58df09468870b89f1796445853dce2dacaa0fc5b7bb1f92b036fa8da1d1f8a3"
)
rsession_proxy_version = "2.0.1"
return [
(
"root",
# Install RStudio!
r"""
curl --silent --location --fail {rstudio_url} > /tmp/rstudio.deb && \
echo '{rstudio_checksum} /tmp/rstudio.deb' | md5sum -c - && \
apt-get update > /dev/null && \
apt install -y /tmp/rstudio.deb > /dev/null && \
rm /tmp/rstudio.deb && \
apt-get -qq purge && \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*
""".format(
rstudio_url=RSTUDIO_URL, rstudio_checksum=RSTUDIO_CHECKSUM
),
),
(
"root",
# Install Shiny Server!
r"""
curl --silent --location --fail {url} > {deb} && \
echo '{checksum} {deb}' | md5sum -c - && \
dpkg -i {deb} > /dev/null && \
rm {deb}
""".format(
url=SHINY_URL, checksum=SHINY_CHECKSUM, deb="/tmp/shiny.deb"
curl --silent --location --fail {rstudio_url} > /tmp/rstudio.deb && \
curl --silent --location --fail {shiny_server_url} > /tmp/shiny.deb && \
echo '{rstudio_sha256sum} /tmp/rstudio.deb' | sha256sum -c - && \
echo '{shiny_sha256sum} /tmp/shiny.deb' | sha256sum -c - && \
apt-get update > /dev/null && \
apt install -y /tmp/rstudio.deb /tmp/shiny.deb > /dev/null && \
rm /tmp/rstudio.deb && \
apt-get -qq purge && \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*
""".format(
rstudio_url=rstudio_url,
rstudio_sha256sum=rstudio_sha256sum,
shiny_server_url=shiny_server_url,
shiny_sha256sum=shiny_sha256sum,
),
),
(
"${NB_USER}",
# Install nbrsessionproxy
# Install jupyter-rsession-proxy
r"""
pip install --no-cache-dir jupyter-server-proxy==1.4.0 && \
pip install --no-cache-dir https://github.com/jupyterhub/jupyter-rsession-proxy/archive/d5efed5455870556fc414f30871d0feca675a4b4.zip && \
pip install --no-cache-dir https://github.com/ryanlovett/jupyter-shiny-proxy/archive/47557dc47e2aeeab490eb5f3eeae414cdde4a6a9.zip && \
jupyter serverextension enable jupyter_server_proxy --sys-prefix && \
jupyter nbextension install --py jupyter_server_proxy --sys-prefix && \
jupyter nbextension enable --py jupyter_server_proxy --sys-prefix
""",
pip install --no-cache \
jupyter-rsession-proxy=={rsession_proxy_version} \
jupyter-shiny-proxy=={shiny_proxy_version}
""".format(
rsession_proxy_version=rsession_proxy_version,
shiny_proxy_version=shiny_proxy_version,
),
),
(
# Not all of these locations are configurable; so we make sure

Wyświetl plik

@ -6,7 +6,7 @@ from collections.abc import Mapping
from ruamel.yaml import YAML
from ..base import BaseImage
from .._r_base import rstudio_base_scripts, IRKERNEL_VERSION
from .._r_base import rstudio_base_scripts
from ...utils import is_local_pip_requirement
# pattern for parsing conda dependency line
@ -356,15 +356,15 @@ class CondaBuildPack(BaseImage):
(
"${NB_USER}",
r"""
mamba install -p {0} r-base{1} r-irkernel={2} r-devtools -y && \
mamba install -p {0} r-base{1} r-irkernel=1.2 r-devtools -y && \
mamba clean --all -f -y && \
mamba list -p {0}
""".format(
env_prefix, r_pin, IRKERNEL_VERSION
env_prefix, r_pin
),
)
)
scripts += rstudio_base_scripts()
scripts += rstudio_base_scripts(self.r_version)
scripts += [
(
"root",

Wyświetl plik

@ -6,7 +6,7 @@ import requests
from distutils.version import LooseVersion as V
from .python import PythonBuildPack
from ._r_base import rstudio_base_scripts, DEVTOOLS_VERSION, IRKERNEL_VERSION
from ._r_base import rstudio_base_scripts
class RBuildPack(PythonBuildPack):
@ -20,18 +20,19 @@ class RBuildPack(PythonBuildPack):
r-<year>-<month>-<date>
Where 'year', 'month' and 'date' refer to a specific
date snapshot of https://mran.microsoft.com/timemachine
from which libraries are to be installed.
date whose CRAN snapshot we will use to fetch packages.
Uses https://packagemanager.rstudio.com, or MRAN if no snapshot
is found on packagemanager.rstudio.com
2. A `DESCRIPTION` file signaling an R package
If there is no `runtime.txt`, then the MRAN snapshot is set to latest
If there is no `runtime.txt`, then the CRAN snapshot is set to latest
date that is guaranteed to exist across timezones.
Additional R packages are installed if specified either
- in a file `install.R`, that will be executed at build time,
and can be used for installing packages from both MRAN and GitHub
and can be used for installing packages from both CRAN and GitHub
- as dependencies in a `DESCRIPTION` file
@ -72,15 +73,15 @@ class RBuildPack(PythonBuildPack):
"3.5.1": "3.5.1-2bionic",
"3.5.2": "3.5.2-1bionic",
"3.5.3": "3.5.3-1bionic",
"3.6": "3.6.1-3bionic",
"3.6": "3.6.3-1bionic",
"3.6.0": "3.6.0-2bionic",
"3.6.1": "3.6.1-3bionic",
"4.0": "4.0.2-1.1804.0",
"4.0": "4.0.5-1.1804.0",
"4.0.2": "4.0.2-1.1804.0",
"4.1": "4.1.2-1.1804.0",
}
# the default if nothing is specified
r_version = "3.6"
r_version = "4.1"
if not hasattr(self, "_r_version"):
parts = self.runtime.split("-")
@ -99,7 +100,7 @@ class RBuildPack(PythonBuildPack):
@property
def checkpoint_date(self):
"""
Return the date of MRAN checkpoint to use for this repo
Return the date of CRAN checkpoint to use for this repo
Returns '' if no date is specified
"""
@ -132,11 +133,9 @@ class RBuildPack(PythonBuildPack):
if not self.binder_dir and os.path.exists(description_R):
if not self.checkpoint_date:
# no R snapshot date set through runtime.txt
# set the R runtime to the latest date that is guaranteed to
# be on MRAN across timezones
two_days_ago = datetime.date.today() - datetime.timedelta(days=2)
self._checkpoint_date = self._get_latest_working_mran_date(
two_days_ago, 3
# Set it to two days ago from today
self._checkpoint_date = datetime.date.today() - datetime.timedelta(
days=2
)
self._runtime = "r-{}".format(str(self._checkpoint_date))
return True
@ -186,28 +185,65 @@ class RBuildPack(PythonBuildPack):
return super().get_packages().union(packages)
def _get_latest_working_mran_date(self, startdate, max_prior):
"""
Look for a working MRAN snapshot
def get_rspm_snapshot_url(self, snapshot_date, max_days_prior=7):
for i in range(max_days_prior):
snapshots = requests.post(
"https://packagemanager.rstudio.com/__api__/url",
# Ask for midnight UTC snapshot
json={
"repo": "all",
"snapshot": (snapshot_date - datetime.timedelta(days=i)).strftime(
"%Y-%m-%dT00:00:00Z"
),
},
).json()
# Construct a snapshot URL that will give us binary packages for Ubuntu Bionic (18.04)
if "upsi" in snapshots:
return (
"https://packagemanager.rstudio.com/all/__linux__/bionic/"
+ snapshots["upsi"]
)
raise ValueError(
"No snapshot found for {} or {} days prior in packagemanager.rstudio.com".format(
snapshot_date.strftime("%Y-%m-%d"), max_days_prior
)
)
Starts from `startdate` and tries up to `max_prior` previous days.
Raises `requests.HTTPError` with the last tried URL if no working snapshot found.
"""
for days in range(max_prior + 1):
test_date = startdate - datetime.timedelta(days=days)
mran_url = "https://mran.microsoft.com/snapshot/{}".format(
test_date.isoformat()
)
r = requests.head(mran_url)
def get_mran_snapshot_url(self, snapshot_date, max_days_prior=7):
for i in range(max_days_prior):
try_date = snapshot_date - datetime.timedelta(days=i)
# Fall back to MRAN if packagemanager.rstudio.com doesn't have it
url = "https://mran.microsoft.com/snapshot/{}".format(try_date.isoformat())
r = requests.head(url)
if r.ok:
return test_date
self.log.warning(
"Failed to get MRAN snapshot URL %s: %s %s",
mran_url,
r.status_code,
r.reason,
return url
raise ValueError(
"No snapshot found for {} or {} days prior in mran.microsoft.com".format(
snapshot_date.strftime("%Y-%m-%d"), max_days_prior
)
r.raise_for_status()
)
def get_cran_mirror_url(self, snapshot_date):
# Date after which we will use rspm + binary packages instead of MRAN + source packages
rspm_cutoff_date = datetime.date(2022, 1, 1)
if snapshot_date >= rspm_cutoff_date or self.r_version >= V("4.1"):
return self.get_rspm_snapshot_url(snapshot_date)
else:
return self.get_mran_snapshot_url(snapshot_date)
def get_devtools_snapshot_url(self):
"""
Return url of snapshot to use for getting devtools install
devtools is part of our 'core' base install, so we should have some
control over what version we install here.
"""
# Picked from https://packagemanager.rstudio.com/client/#/repos/1/overview
# Hardcoded rather than dynamically determined from a date to avoid extra API calls
# Plus, we can always use packagemanager.rstudio.com here as we always install the
# necessary apt packages.
return "https://packagemanager.rstudio.com/all/__linux__/bionic/2022-01-04+Y3JhbiwyOjQ1MjYyMTU7NzlBRkJEMzg"
def get_build_scripts(self):
"""
@ -221,7 +257,7 @@ class RBuildPack(PythonBuildPack):
for installing R packages into
- RStudio
- R's devtools package, at a particular frozen version
(determined by MRAN)
(determined by CRAN)
- IRKernel
- nbrsessionproxy (to access RStudio via Jupyter Notebook)
@ -229,61 +265,58 @@ class RBuildPack(PythonBuildPack):
contents of runtime.txt.
"""
mran_url = "https://mran.microsoft.com/snapshot/{}".format(
self.checkpoint_date.isoformat()
)
cran_mirror_url = self.get_cran_mirror_url(self.checkpoint_date)
scripts = []
# For R 3.4 we want to use the default Ubuntu package but otherwise
# we use the packages from R's own repo
# Determine which R apt repository should be enabled
if V(self.r_version) >= V("3.5"):
if V(self.r_version) >= V("4"):
vs = "40"
else:
vs = "35"
scripts += [
(
"root",
rf"""
echo "deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran{vs}/" > /etc/apt/sources.list.d/r-ubuntu.list
""",
),
# Dont use apt-key directly, as gpg does not always respect *_proxy vars. This increase the chances
# of being able to reach it from behind a firewall
(
"root",
r"""
wget --quiet -O - 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xe298a3a825c0d65dfd57cbb651716619e084dab9' | apt-key add -
""",
),
(
"root",
r"""
apt-get update > /dev/null && \
apt-get install --yes r-base={R_version} \
r-base-dev={R_version} \
r-recommended={R_version} \
libclang-dev > /dev/null && \
apt-get -qq purge && \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*
""".format(
R_version=self.r_version
),
),
]
scripts.append(
scripts = [
(
"root",
rf"""
echo "deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran{vs}/" > /etc/apt/sources.list.d/r-ubuntu.list
""",
),
# Dont use apt-key directly, as gpg does not always respect *_proxy vars. This increase the chances
# of being able to reach it from behind a firewall
(
"root",
r"""
wget --quiet -O - 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xe298a3a825c0d65dfd57cbb651716619e084dab9' | apt-key add -
""",
),
(
"root",
r"""
apt-get update > /dev/null && \
apt-get install --yes r-base={R_version} r-base-core={R_version} \
r-base-dev={R_version} \
r-recommended={R_version} \
libclang-dev \
libzmq3-dev > /dev/null && \
apt-get -qq purge && \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*
""".format(
R_version=self.r_version
),
),
]
scripts += rstudio_base_scripts(self.r_version)
scripts += [
(
"root",
r"""
mkdir -p ${R_LIBS_USER} && \
chown -R ${NB_USER}:${NB_USER} ${R_LIBS_USER}
""",
)
)
scripts += rstudio_base_scripts()
scripts += [
),
(
"root",
# Set paths so that RStudio shares libraries with base R
@ -295,33 +328,30 @@ class RBuildPack(PythonBuildPack):
""",
),
(
"${NB_USER}",
# Install a pinned version of IRKernel and set it up for use!
"root",
# RStudio's CRAN mirror needs this to figure out which binary package to serve.
# If not set properly, it will just serve up source packages
# Quite hilarious, IMO.
# See https://docs.rstudio.com/rspm/1.0.12/admin/binaries.html
# Set mirror for RStudio too, by modifying rsession.conf
r"""
R --quiet -e "install.packages('devtools', repos='https://mran.microsoft.com/snapshot/{devtools_version}', method='libcurl')" && \
R --quiet -e "devtools::install_github('IRkernel/IRkernel', ref='{irkernel_version}')" && \
R RHOME && \
mkdir -p /usr/lib/R/etc /etc/rstudio && \
echo 'options(repos = c(CRAN = "{cran_mirror_url}"))' > /usr/lib/R/etc/Rprofile.site && \
echo 'options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os)))' >> /usr/lib/R/etc/Rprofile.site && \
echo 'r-cran-repos={cran_mirror_url}' > /etc/rstudio/rsession.conf
""".format(
cran_mirror_url=cran_mirror_url
),
),
(
"${NB_USER}",
# Install a pinned version of devtools, IRKernel and shiny
r"""
R --quiet -e "install.packages(c('devtools', 'IRkernel', 'shiny'), repos='{devtools_cran_mirror_url}')" && \
R --quiet -e "IRkernel::installspec(prefix='$NB_PYTHON_PREFIX')"
""".format(
devtools_version=DEVTOOLS_VERSION, irkernel_version=IRKERNEL_VERSION
),
),
(
"${NB_USER}",
# Install shiny library
r"""
R --quiet -e "install.packages('shiny', repos='{}', method='libcurl')"
""".format(
mran_url
),
),
(
"root",
# We set the default CRAN repo to the MRAN one at given date
# We set download method to be curl so we get HTTPS support
r"""
echo "options(repos = c(CRAN='{mran_url}'), download.file.method = 'libcurl')" > /etc/R/Rprofile.site
""".format(
mran_url=mran_url
devtools_cran_mirror_url=self.get_devtools_snapshot_url()
),
),
]

Wyświetl plik

@ -0,0 +1 @@
r-3.6-2016-01-03

Wyświetl plik

@ -0,0 +1,9 @@
#!/usr/bin/env Rscript
library('ggplot2')
print(version)
# Fail if MRAN isn't the configured CRAN mirror
if (!(startsWith(options()$repos["CRAN"], "https://mran.microsoft.com"))) {
quit("yes", 1)
}

Wyświetl plik

@ -0,0 +1 @@
install.packages("ggplot2")

Wyświetl plik

@ -0,0 +1 @@
r-3.6-2018-07-07

Wyświetl plik

@ -0,0 +1,8 @@
#!/usr/bin/env Rscript
library('ggplot2')
print(version)
# Fail if version is not 3.6
if (!(version$major == "3" && as.double(version$minor) >= 6 && as.double(version$minor) < 7)) {
quit("yes", 1)
}

Wyświetl plik

@ -0,0 +1 @@
install.packages("ggplot2")

Wyświetl plik

@ -0,0 +1 @@
r-4.0-2022-01-01

Wyświetl plik

@ -0,0 +1,13 @@
#!/usr/bin/env Rscript
library('ggplot2')
print(version)
# Fail if version is not 4.0
if (!(version$major == "4" && as.double(version$minor) >= 0 && as.double(version$minor) < 1)) {
quit("yes", 1)
}
# The date we have chosen should give us an rspm mirror
if (!(startsWith(options()$repos["CRAN"], "https://packagemanager.rstudio.com"))) {
quit("yes", 1)
}

Wyświetl plik

@ -0,0 +1 @@
install.packages("ggplot2")

Wyświetl plik

@ -0,0 +1 @@
r-4.0-2021-07-07

Wyświetl plik

@ -0,0 +1,13 @@
#!/usr/bin/env Rscript
library('ggplot2')
print(version)
# Fail if version is not 4.0
if (!(version$major == "4" && as.double(version$minor) >= 0 && as.double(version$minor) < 1)) {
quit("yes", 1)
}
# The date we have chosen should give us an MRAN mirror
if (!(startsWith(options()$repos["CRAN"], "https://mran.microsoft.com"))) {
quit("yes", 1)
}

Wyświetl plik

@ -0,0 +1 @@
install.packages("ggplot2")

Wyświetl plik

@ -0,0 +1 @@
r-4.1-2021-10-22

Wyświetl plik

@ -1,8 +1,12 @@
#!/usr/bin/env Rscript
library('ggplot2')
print(version)
# Fail if version is not 4.1
if (!(version$major == "4" && as.double(version$minor) >= 1 && as.double(version$minor) < 2)) {
quit("yes", 1)
}
# R 4.1 always uses rspm snapshots
if (!(startsWith(options()$repos["CRAN"], "https://packagemanager.rstudio.com"))) {
quit("yes", 1)
}

Wyświetl plik

@ -1 +0,0 @@
r-4.1-2021-07-07

Wyświetl plik

@ -1,2 +1,7 @@
#!/usr/bin/env Rscript
library('ggplot2')
# Fail if version is not 4.1
if (!(version$major == "4" && as.double(version$minor) >= 1 && as.double(version$minor) < 2)) {
quit("yes", 1)
}

Wyświetl plik

@ -22,7 +22,7 @@ def test_unsupported_version(tmpdir):
@pytest.mark.parametrize(
"runtime_version, expected", [("", "3.6"), ("3.6", "3.6"), ("3.5.1", "3.5")]
"runtime_version, expected", [("", "4.1"), ("3.6", "3.6"), ("3.5.1", "3.5")]
)
def test_version_specification(tmpdir, runtime_version, expected):
tmpdir.chdir()
@ -43,7 +43,7 @@ def test_version_completion(tmpdir):
f.write(f"r-3.6-2019-01-01")
r = buildpacks.RBuildPack()
assert r.r_version == "3.6.1-3bionic"
assert r.r_version == "3.6.3-1bionic"
@pytest.mark.parametrize(
@ -64,28 +64,46 @@ def test_mran_date(tmpdir, runtime, expected):
assert r.checkpoint_date == date(*expected)
@pytest.mark.parametrize("expected", ["2019-12-29", "2019-12-26"])
def test_mran_latestdate(tmpdir, expected):
def test_snapshot_rspm_date():
test_dates = {
# Even though there is no snapshot specified in the interface at https://packagemanager.rstudio.com/client/#/repos/1/overview
# For 2021 Oct 22, the API still returns a valid URL that one can install
# packages from - probably some server side magic that repeats our client side logic.
# No snapshot for this date from
date(2021, 10, 22): date(2021, 10, 22),
# Snapshot exists for this date
date(2022, 1, 1): date(2022, 1, 1),
}
r = buildpacks.RBuildPack()
for requested, expected in test_dates.items():
snapshot_url = r.get_rspm_snapshot_url(requested)
assert snapshot_url.startswith(
"https://packagemanager.rstudio.com/all/__linux__/bionic/"
+ expected.strftime("%Y-%m-%d")
)
with pytest.raises(ValueError):
r.get_rspm_snapshot_url(date(1691, 9, 5))
@pytest.mark.parametrize("expected", [date(2019, 12, 29), date(2019, 12, 26)])
@pytest.mark.parametrize("requested", [date(2019, 12, 31)])
def test_snapshot_mran_date(requested, expected):
def mock_request_head(url):
r = Response()
if url == "https://mran.microsoft.com/snapshot/" + expected:
if url == "https://mran.microsoft.com/snapshot/" + expected.isoformat():
r.status_code = 200
else:
r.status_code = 404
r.reason = "Mock MRAN no snapshot"
return r
tmpdir.chdir()
with open("DESCRIPTION", "w") as f:
f.write("")
with patch("requests.head", side_effect=mock_request_head):
with patch("datetime.date") as mockdate:
mockdate.today.return_value = date(2019, 12, 31)
r = buildpacks.RBuildPack()
r.detect()
assert r.checkpoint_date.isoformat() == expected
r = buildpacks.RBuildPack()
assert r.get_mran_snapshot_url(
requested
) == "https://mran.microsoft.com/snapshot/{}".format(expected.isoformat())
def test_install_from_base(tmpdir):