Get binary R packages from packagemanager.rstudio.com

packagemanager.rstudio.com is a CRAN mirror provided
by rstudio, with *binary packages* prebuilt for many Linux
Distributions! https://www.rstudio.com/blog/announcing-public-package-manager/
has more excellent detail. It cuts down install times for R packages
by almost 90% in some cases!

Like MRAN (which we use now), they also provide a daily snapshot
of CRAN at that date
(https://docs.rstudio.com/rspm/news/#rstudio-package-manager-2021090).
The URL for CRAN for a particular date can be fetched via an API
call. We call that API, and if there is no snapshot for that date
(anything before Oct 2017), we fall back on to MRAN. Adds a test
to test this fallback.

One possible issue about changing existing binder repos to use binary
builds rather than source builds is that the binary builds sometimes
require you have an apt package installed, and will fail if it is
not. We had to install the zmq library apt package for example -
source installs compile zmq from source, which is where the speedup
comes from. But unlike python wheels or conda packages, these binary
builds are not self-contained - they are linked to apt packages from
the specific distros. So some repos that worked before might fail now.
We can choose a more recent cut-off date to prevent this from happening.
pull/1104/head
YuviPanda 2021-12-17 14:00:12 +05:30
rodzic 1146d4fbde
commit 476a25998b
4 zmienionych plików z 61 dodań i 45 usunięć

Wyświetl plik

@ -132,11 +132,9 @@ class RBuildPack(PythonBuildPack):
if not self.binder_dir and os.path.exists(description_R): if not self.binder_dir and os.path.exists(description_R):
if not self.checkpoint_date: if not self.checkpoint_date:
# no R snapshot date set through runtime.txt # no R snapshot date set through runtime.txt
# set the R runtime to the latest date that is guaranteed to # Set it to two days ago from today
# be on MRAN across timezones self._checkpoint_date = datetime.date.today() - datetime.timedelta(
two_days_ago = datetime.date.today() - datetime.timedelta(days=2) days=2
self._checkpoint_date = self._get_latest_working_mran_date(
two_days_ago, 3
) )
self._runtime = "r-{}".format(str(self._checkpoint_date)) self._runtime = "r-{}".format(str(self._checkpoint_date))
return True return True
@ -186,28 +184,29 @@ class RBuildPack(PythonBuildPack):
return super().get_packages().union(packages) return super().get_packages().union(packages)
def _get_latest_working_mran_date(self, startdate, max_prior): def get_cran_mirror_url(self, snapshot_date):
"""
Look for a working MRAN snapshot
Starts from `startdate` and tries up to `max_prior` previous days. # Call the API to find out if we have a snapshot available for the given date.
Raises `requests.HTTPError` with the last tried URL if no working snapshot found. # If so, use the URL for that snapshot. If not, fall back to MRAN.
""" snapshots = requests.post(
for days in range(max_prior + 1): "https://packagemanager.rstudio.com/__api__/url",
test_date = startdate - datetime.timedelta(days=days) # Ask for midnight UTC snapshot
mran_url = "https://mran.microsoft.com/snapshot/{}".format( json={
test_date.isoformat() "repo": "all",
"snapshot": snapshot_date.strftime("%Y-%m-%dT00:00:00Z"),
},
).json()
# Construct a snapshot URL that will give us binary packages for Ubuntu Bionic (18.04)
if "upsi" in snapshots:
return (
"https://packagemanager.rstudio.com/all/__linux__/bionic/"
+ snapshots["upsi"]
) )
r = requests.head(mran_url)
if r.ok: # Fall back to MRAN if packagemanager.rstudio.com doesn't have it
return test_date return "https://mran.microsoft.com/snapshot/{}".format(
self.log.warning( snapshot_date.isoformat()
"Failed to get MRAN snapshot URL %s: %s %s", )
mran_url,
r.status_code,
r.reason,
)
r.raise_for_status()
def get_build_scripts(self): def get_build_scripts(self):
""" """
@ -229,9 +228,7 @@ class RBuildPack(PythonBuildPack):
contents of runtime.txt. contents of runtime.txt.
""" """
mran_url = "https://mran.microsoft.com/snapshot/{}".format( cran_mirror_url = self.get_cran_mirror_url(self.checkpoint_date)
self.checkpoint_date.isoformat()
)
scripts = [] scripts = []
# For R 3.4 we want to use the default Ubuntu package but otherwise # For R 3.4 we want to use the default Ubuntu package but otherwise
@ -263,7 +260,8 @@ class RBuildPack(PythonBuildPack):
apt-get install --yes r-base={R_version} \ apt-get install --yes r-base={R_version} \
r-base-dev={R_version} \ r-base-dev={R_version} \
r-recommended={R_version} \ r-recommended={R_version} \
libclang-dev > /dev/null && \ libclang-dev \
libzmq3-dev > /dev/null && \
apt-get -qq purge && \ apt-get -qq purge && \
apt-get -qq clean && \ apt-get -qq clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
@ -294,35 +292,42 @@ class RBuildPack(PythonBuildPack):
echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron
""", """,
), ),
(
"root",
# RStudio's CRAN mirror needs this to figure out which binary package to serve.
# If not set properly, it will just serve up source packages
# Quite hilarious, IMO.
# See https://docs.rstudio.com/rspm/admin/binaries.html
# Set mirror for RStudio too
r"""
R RHOME && \
mkdir -p /usr/lib/R/etc /etc/rstudio && \
echo 'options(repos = c(CRAN = "{cran_mirror_url}"))' > /usr/lib/R/etc/Rprofile.site && \
echo 'options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os)))' >> /usr/lib/R/etc/Rprofile.site && \
echo 'r-cran-repos={cran_mirror_url}' > /etc/rstudio/rsession.conf
""".format(
cran_mirror_url=cran_mirror_url
),
),
( (
"${NB_USER}", "${NB_USER}",
# Install a pinned version of IRKernel and set it up for use! # Install a pinned version of IRKernel and set it up for use!
r""" r"""
R --quiet -e "install.packages('devtools', repos='https://mran.microsoft.com/snapshot/{devtools_version}', method='libcurl')" && \ R --quiet -e "install.packages('devtools')" && \
R --quiet -e "devtools::install_github('IRkernel/IRkernel', ref='{irkernel_version}')" && \ R --quiet -e "devtools::install_github('IRkernel/IRkernel', ref='{irkernel_version}')" && \
R --quiet -e "IRkernel::installspec(prefix='$NB_PYTHON_PREFIX')" R --quiet -e "IRkernel::installspec(prefix='$NB_PYTHON_PREFIX')"
""".format( """.format(
devtools_version=DEVTOOLS_VERSION, irkernel_version=IRKERNEL_VERSION cran_mirror_url=cran_mirror_url,
devtools_version=DEVTOOLS_VERSION,
irkernel_version=IRKERNEL_VERSION,
), ),
), ),
( (
"${NB_USER}", "${NB_USER}",
# Install shiny library # Install shiny library
r""" r"""
R --quiet -e "install.packages('shiny', repos='{}', method='libcurl')" R --quiet -e "install.packages('shiny')"
""".format( """,
mran_url
),
),
(
"root",
# We set the default CRAN repo to the MRAN one at given date
# We set download method to be curl so we get HTTPS support
r"""
echo "options(repos = c(CRAN='{mran_url}'), download.file.method = 'libcurl')" > /etc/R/Rprofile.site
""".format(
mran_url=mran_url
),
), ),
] ]

Wyświetl plik

@ -0,0 +1 @@
install.packages("ggplot2")

Wyświetl plik

@ -0,0 +1 @@
r-3.6-2016-01-03

Wyświetl plik

@ -0,0 +1,9 @@
#!/usr/bin/env Rscript
library('ggplot2')
print(version)
# Fail if MRAN isn't the configured CRAN mirror
if (!grepl("https://mran.microsoft.com", options()$CRAN, fixed=TRUE)) {
quit("yes", 1)
}