From 4bb0bb24f5a5938dec6e3d8f46a3e06101746fdd Mon Sep 17 00:00:00 2001 From: Matt Westcott Date: Fri, 9 May 2025 21:14:41 +0100 Subject: [PATCH] Update oembed finder to use requests instead of urllib.request (#13102) --- CHANGELOG.txt | 1 + docs/releases/7.1.md | 1 + pyproject.toml | 1 + wagtail/embeds/finders/oembed.py | 16 +-- wagtail/embeds/tests/test_embeds.py | 202 ++++++++++++++++------------ 5 files changed, 125 insertions(+), 96 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 69c0c41118..8bd3004f3e 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -11,6 +11,7 @@ Changelog * Render listing buttons as template components (Sage Abdullah) * Define default `GenericRelations` for `RevisionMixin` and `WorkflowMixin`, to avoid issues with deletion cascades (Sage Abdullah) * Document and relocate the `init_new_page` signal (Maciek Baron) + * Use `requests` to access oEmbed endpoints, for more robust SSL certificate handling (Matt Westcott) * Fix: Handle lazy translation strings as `preview_value` for `RichTextBlock` (Seb Corbin) * Fix: Fix handling of newline-separated choices in form builder when using non-windows newline characters (Baptiste Mispelon) * Fix: Ensure `WAGTAILADMIN_LOGIN_URL` is respected when logging out of the admin (Antoine Rodriguez, Ramon de Jezus) diff --git a/docs/releases/7.1.md b/docs/releases/7.1.md index b6c59aa44d..09603ae1e2 100644 --- a/docs/releases/7.1.md +++ b/docs/releases/7.1.md @@ -20,6 +20,7 @@ depth: 1 * Define default `GenericRelations` for `RevisionMixin` and `WorkflowMixin`, to avoid issues with deletion cascades (Sage Abdullah) * Update Twitter oembed provider to recognise x.com links (manu) * Document and relocate the `init_new_page` signal (Maciek Baron) + * Use `requests` to access oEmbed endpoints, for more robust SSL certificate handling (Matt Westcott) ### Bug fixes diff --git a/pyproject.toml b/pyproject.toml index 8839b442c7..0e6ae47bb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ testing = [ "azure-mgmt-cdn>=12.0,<13.0", "azure-mgmt-frontdoor>=1.0,<1.1", "django-pattern-library>=0.7", + "responses>=0.25,<1", # For coverage and PEP8 linting "coverage>=3.7.0", "doc8==1.1.2", diff --git a/wagtail/embeds/finders/oembed.py b/wagtail/embeds/finders/oembed.py index 79fa699f78..7a704b3b9a 100644 --- a/wagtail/embeds/finders/oembed.py +++ b/wagtail/embeds/finders/oembed.py @@ -1,11 +1,7 @@ -import json import re from datetime import timedelta -from urllib import request as urllib_request -from urllib.error import URLError -from urllib.parse import urlencode -from urllib.request import Request +import requests from django.utils import timezone from wagtail.embeds.exceptions import EmbedNotFoundException @@ -60,12 +56,12 @@ class OEmbedFinder(EmbedFinder): params["maxheight"] = max_height # Perform request - request = Request(endpoint + "?" + urlencode(params)) - request.add_header("User-agent", "Mozilla/5.0") try: - r = urllib_request.urlopen(request) - oembed = json.loads(r.read().decode("utf-8")) - except (URLError, json.decoder.JSONDecodeError): + r = requests.get( + endpoint, params=params, headers={"User-agent": "Mozilla/5.0"} + ) + oembed = r.json() + except requests.RequestException: raise EmbedNotFoundException # Convert photos into HTML diff --git a/wagtail/embeds/tests/test_embeds.py b/wagtail/embeds/tests/test_embeds.py index 80e8304a01..6173811c5b 100644 --- a/wagtail/embeds/tests/test_embeds.py +++ b/wagtail/embeds/tests/test_embeds.py @@ -5,11 +5,13 @@ import urllib.request from unittest.mock import patch from urllib.error import HTTPError, URLError +import responses from django import template from django.core.exceptions import ValidationError from django.test import TestCase, override_settings from django.urls import reverse from django.utils.timezone import make_aware, now +from responses import matchers from wagtail import blocks from wagtail.embeds import oembed_providers @@ -477,60 +479,75 @@ class TestEmbedly(TestCase): class TestOembed(TestCase): - def setUp(self): - class DummyResponse: - def read(self): - return b"foo" - - self.dummy_response = DummyResponse() - def test_oembed_invalid_provider(self): self.assertRaises(EmbedNotFoundException, OEmbedFinder().find_embed, "foo") + @responses.activate def test_oembed_invalid_request(self): - config = {"side_effect": URLError("foo")} - with patch.object(urllib.request, "urlopen", **config): - self.assertRaises( - EmbedNotFoundException, - OEmbedFinder().find_embed, - "http://www.youtube.com/watch/", - ) + # no response set up, so responses will raise a ConnectionError + self.assertRaises( + EmbedNotFoundException, + OEmbedFinder().find_embed, + "https://www.youtube.com/watch/", + ) - @patch("urllib.request.urlopen") - def test_oembed_non_json_response(self, urlopen): - urlopen.return_value = self.dummy_response + @responses.activate + def test_oembed_non_json_response(self): + responses.get( + url="https://www.youtube.com/oembed", + match=[ + matchers.query_param_matcher( + { + "url": "https://www.youtube.com/watch?v=ReblZ7o7lu4", + "format": "json", + } + ), + ], + body="foo", + ) self.assertRaises( EmbedNotFoundException, OEmbedFinder().find_embed, "https://www.youtube.com/watch?v=ReblZ7o7lu4", ) - @patch("urllib.request.urlopen") - @patch("json.loads") - def test_oembed_photo_request(self, loads, urlopen): - urlopen.return_value = self.dummy_response - loads.return_value = {"type": "photo", "url": "http://www.example.com"} - result = OEmbedFinder().find_embed("http://www.youtube.com/watch/") + @responses.activate + def test_oembed_photo_request(self): + responses.get( + url="https://www.youtube.com/oembed", + match=[ + matchers.query_param_matcher( + {"url": "https://www.youtube.com/watch/", "format": "json"} + ), + ], + json={"type": "photo", "url": "http://www.example.com"}, + ) + result = OEmbedFinder().find_embed("https://www.youtube.com/watch/") self.assertEqual(result["type"], "photo") self.assertEqual(result["html"], '') - loads.assert_called_with("foo") - @patch("urllib.request.urlopen") - @patch("json.loads") - def test_oembed_return_values(self, loads, urlopen): - urlopen.return_value = self.dummy_response - loads.return_value = { - "type": "something", - "url": "http://www.example.com", - "title": "test_title", - "author_name": "test_author", - "provider_name": "test_provider_name", - "thumbnail_url": "test_thumbail_url", - "width": "test_width", - "height": "test_height", - "html": "test_html", - } - result = OEmbedFinder().find_embed("http://www.youtube.com/watch/") + @responses.activate + def test_oembed_return_values(self): + responses.get( + url="https://www.youtube.com/oembed", + match=[ + matchers.query_param_matcher( + {"url": "https://www.youtube.com/watch/", "format": "json"} + ), + ], + json={ + "type": "something", + "url": "http://www.example.com", + "title": "test_title", + "author_name": "test_author", + "provider_name": "test_provider_name", + "thumbnail_url": "test_thumbail_url", + "width": "test_width", + "height": "test_height", + "html": "test_html", + }, + ) + result = OEmbedFinder().find_embed("https://www.youtube.com/watch/") self.assertEqual( result, { @@ -546,24 +563,30 @@ class TestOembed(TestCase): ) @patch("django.utils.timezone.now") - @patch("urllib.request.urlopen") - @patch("json.loads") - def test_oembed_cache_until(self, loads, urlopen, now): - urlopen.return_value = self.dummy_response - loads.return_value = { - "type": "something", - "url": "http://www.example.com", - "title": "test_title", - "author_name": "test_author", - "provider_name": "test_provider_name", - "thumbnail_url": "test_thumbail_url", - "width": "test_width", - "height": "test_height", - "html": "test_html", - "cache_age": 3600, - } + @responses.activate + def test_oembed_cache_until(self, now): + responses.get( + url="https://www.youtube.com/oembed", + match=[ + matchers.query_param_matcher( + {"url": "https://www.youtube.com/watch/", "format": "json"} + ), + ], + json={ + "type": "something", + "url": "http://www.example.com", + "title": "test_title", + "author_name": "test_author", + "provider_name": "test_provider_name", + "thumbnail_url": "test_thumbail_url", + "width": "test_width", + "height": "test_height", + "html": "test_html", + "cache_age": 3600, + }, + ) now.return_value = make_aware(datetime.datetime(2001, 2, 3)) - result = OEmbedFinder().find_embed("http://www.youtube.com/watch/") + result = OEmbedFinder().find_embed("https://www.youtube.com/watch/") self.assertEqual( result, { @@ -580,24 +603,30 @@ class TestOembed(TestCase): ) @patch("django.utils.timezone.now") - @patch("urllib.request.urlopen") - @patch("json.loads") - def test_oembed_cache_until_as_string(self, loads, urlopen, now): - urlopen.return_value = self.dummy_response - loads.return_value = { - "type": "something", - "url": "http://www.example.com", - "title": "test_title", - "author_name": "test_author", - "provider_name": "test_provider_name", - "thumbnail_url": "test_thumbail_url", - "width": "test_width", - "height": "test_height", - "html": "test_html", - "cache_age": "3600", - } + @responses.activate + def test_oembed_cache_until_as_string(self, now): + responses.get( + url="https://www.youtube.com/oembed", + match=[ + matchers.query_param_matcher( + {"url": "https://www.youtube.com/watch/", "format": "json"} + ), + ], + json={ + "type": "something", + "url": "http://www.example.com", + "title": "test_title", + "author_name": "test_author", + "provider_name": "test_provider_name", + "thumbnail_url": "test_thumbail_url", + "width": "test_width", + "height": "test_height", + "html": "test_html", + "cache_age": "3600", + }, + ) now.return_value = make_aware(datetime.datetime(2001, 2, 3)) - result = OEmbedFinder().find_embed("http://www.youtube.com/watch/") + result = OEmbedFinder().find_embed("https://www.youtube.com/watch/") self.assertEqual( result, { @@ -615,24 +644,25 @@ class TestOembed(TestCase): def test_oembed_accepts_known_provider(self): finder = OEmbedFinder(providers=[oembed_providers.youtube]) - self.assertTrue(finder.accept("http://www.youtube.com/watch/")) + self.assertTrue(finder.accept("https://www.youtube.com/watch/")) def test_oembed_doesnt_accept_unknown_provider(self): finder = OEmbedFinder(providers=[oembed_providers.twitter]) - self.assertFalse(finder.accept("http://www.youtube.com/watch/")) + self.assertFalse(finder.accept("https://www.youtube.com/watch/")) - @patch("urllib.request.urlopen") - @patch("json.loads") - def test_endpoint_with_format_param(self, loads, urlopen): - urlopen.return_value = self.dummy_response - loads.return_value = {"type": "video", "url": "http://www.example.com"} + @responses.activate + def test_endpoint_with_format_param(self): + responses.get( + url="https://www.vimeo.com/api/oembed.json", + match=[ + matchers.query_param_matcher( + {"url": "https://vimeo.com/217403396", "format": "json"} + ), + ], + json={"type": "video", "url": "http://www.example.com"}, + ) result = OEmbedFinder().find_embed("https://vimeo.com/217403396") self.assertEqual(result["type"], "video") - request = urlopen.call_args[0][0] - self.assertEqual( - request.get_full_url().split("?")[0], - "https://www.vimeo.com/api/oembed.json", - ) class TestInstagramOEmbed(TestCase):