From fd6314d07d3c225c5e86aeb9bffa6f6e5ac134be Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Tue, 4 Jan 2022 18:30:39 +0530 Subject: [PATCH] Solidify logic for picking rspm vs mran - MRAN doesn't seem to have R 4.1 specific snapshots, so let's default to RSPM for anything 4.1+. - Otherwise, snapshot dates in 2022 will result in using rspm --- repo2docker/buildpacks/r.py | 64 +++++++++++++++++++++++++------------ tests/r/r4.1/runtime.txt | 2 +- tests/r/r4.1/verify | 6 +++- tests/unit/test_r.py | 44 +++++++++++++++++-------- 4 files changed, 80 insertions(+), 36 deletions(-) diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py index ce9c5072..59f3a052 100644 --- a/repo2docker/buildpacks/r.py +++ b/repo2docker/buildpacks/r.py @@ -185,28 +185,50 @@ class RBuildPack(PythonBuildPack): return super().get_packages().union(packages) - def get_cran_mirror_url(self, snapshot_date): - # Call the API to find out if we have a snapshot available for the given date. - # If so, use the URL for that snapshot. If not, fall back to MRAN. - snapshots = requests.post( - "https://packagemanager.rstudio.com/__api__/url", - # Ask for midnight UTC snapshot - json={ - "repo": "all", - "snapshot": snapshot_date.strftime("%Y-%m-%dT00:00:00Z"), - }, - ).json() - # Construct a snapshot URL that will give us binary packages for Ubuntu Bionic (18.04) - if "upsi" in snapshots: - return ( - "https://packagemanager.rstudio.com/all/__linux__/bionic/" - + snapshots["upsi"] - ) + def get_rspm_snapshot_url(self, snapshot_date, max_days_prior=7): + for i in range(max_days_prior): + snapshots = requests.post( + "https://packagemanager.rstudio.com/__api__/url", + # Ask for midnight UTC snapshot + json={ + "repo": "all", + "snapshot": (snapshot_date - datetime.timedelta(days=i)).strftime("%Y-%m-%dT00:00:00Z"), + }, + ).json() + # Construct a snapshot URL that will give us binary packages for Ubuntu Bionic (18.04) + if "upsi" in snapshots: + return ( + "https://packagemanager.rstudio.com/all/__linux__/bionic/" + + snapshots["upsi"] + ) + raise ValueError('No snapshot found for {} or {} days prior in packagemanager.rstudio.com'.format( + snapshot_date.strftime("%Y-%m-%d"), max_days_prior + )) + + def get_mran_snapshot_url(self, snapshot_date, max_days_prior=7): + for i in range(max_days_prior): + try_date = snapshot_date - datetime.timedelta(days=i) + # Fall back to MRAN if packagemanager.rstudio.com doesn't have it + url = "https://mran.microsoft.com/snapshot/{}".format( + try_date.isoformat() + ) + r = requests.head(url) + if r.ok: + return url + else: + raise ValueError('No snapshot found for {} or {} days prior in mran.microsoft.com'.format( + snapshot_date.strftime("%Y-%m-%d"), max_days_prior + )) + + def get_cran_mirror_url(self, snapshot_date): + # Date after which we will use rspm + binary packages instead of MRAN + source packages + rspm_cutoff_date = datetime.date(2022, 1, 1) + + if snapshot_date >= rspm_cutoff_date or self.r_version >= V('4.1'): + return self.get_rspm_snapshot_url(snapshot_date) + else: + return self.get_mran_snapshot_url(snapshot_date) - # Fall back to MRAN if packagemanager.rstudio.com doesn't have it - return "https://mran.microsoft.com/snapshot/{}".format( - snapshot_date.isoformat() - ) def get_devtools_snapshot_date(self): """ diff --git a/tests/r/r4.1/runtime.txt b/tests/r/r4.1/runtime.txt index 2beef50f..a05a6121 100644 --- a/tests/r/r4.1/runtime.txt +++ b/tests/r/r4.1/runtime.txt @@ -1 +1 @@ -r-4.1-2021-07-07 +r-4.1-2021-10-22 diff --git a/tests/r/r4.1/verify b/tests/r/r4.1/verify index 508b6fb0..2989bbf4 100755 --- a/tests/r/r4.1/verify +++ b/tests/r/r4.1/verify @@ -1,8 +1,12 @@ #!/usr/bin/env Rscript library('ggplot2') -print(version) # Fail if version is not 4.1 if (!(version$major == "4" && as.double(version$minor) >= 1 && as.double(version$minor) < 2)) { quit("yes", 1) } + +# R 4.1 always uses rspm snapshots +if (!(startsWith(options()$repos["CRAN"], "https://packagemanager.rstudio.com"))) { + quit("yes", 1) +} diff --git a/tests/unit/test_r.py b/tests/unit/test_r.py index dda984af..c9806942 100644 --- a/tests/unit/test_r.py +++ b/tests/unit/test_r.py @@ -64,28 +64,46 @@ def test_mran_date(tmpdir, runtime, expected): assert r.checkpoint_date == date(*expected) -@pytest.mark.parametrize("expected", ["2019-12-29", "2019-12-26"]) -def test_mran_latestdate(tmpdir, expected): +def test_snapshot_rspm_date(): + test_dates = { + # Even though there is no snapshot specified in the interface at https://packagemanager.rstudio.com/client/#/repos/1/overview + # For 2021 Oct 22, the API still returns a valid URL that one can install + # packages from - probably some server side magic that repeats our client side logic. + # No snapshot for this date from + date(2021, 10, 22): date(2021, 10, 22), + # Snapshot exists for this date + date(2022, 1, 1): date(2022, 1, 1), + } + + r = buildpacks.RBuildPack() + for requested, expected in test_dates.items(): + snapshot_url = r.get_rspm_snapshot_url(requested) + print(snapshot_url) + assert snapshot_url.startswith( + "https://packagemanager.rstudio.com/all/__linux__/bionic/" + expected.strftime("%Y-%m-%d") + ) + + with pytest.raises(ValueError): + r.get_rspm_snapshot_url(date(1691, 9, 5)) + + +@pytest.mark.parametrize("expected", [date(2019, 12, 29), date(2019, 12, 26)]) +@pytest.mark.parametrize("requested", [date(2019, 12, 31)]) +def test_snapshot_mran_date(requested, expected): def mock_request_head(url): r = Response() - if url == "https://mran.microsoft.com/snapshot/" + expected: + if url == "https://mran.microsoft.com/snapshot/" + expected.isoformat(): r.status_code = 200 else: r.status_code = 404 r.reason = "Mock MRAN no snapshot" return r - tmpdir.chdir() - - with open("DESCRIPTION", "w") as f: - f.write("") - with patch("requests.head", side_effect=mock_request_head): - with patch("datetime.date") as mockdate: - mockdate.today.return_value = date(2019, 12, 31) - r = buildpacks.RBuildPack() - r.detect() - assert r.checkpoint_date.isoformat() == expected + r = buildpacks.RBuildPack() + assert r.get_mran_snapshot_url( + requested + ) == "https://mran.microsoft.com/snapshot/{}".format(expected.isoformat()) def test_install_from_base(tmpdir):