From 40fa3956f59f44ab3b669e0f5b1c62c671a86706 Mon Sep 17 00:00:00 2001 From: Tobias McNulty Date: Thu, 17 Aug 2017 14:58:28 -0700 Subject: [PATCH] add WAGTAILDOCS_SERVE_METHOD setting to allow serving files from underlying storage --- docs/advanced_topics/settings.rst | 18 ++++++++ wagtail/documents/models.py | 7 ++++ wagtail/documents/tests/test_views.py | 60 +++++++++++++++++++++++++++ wagtail/documents/views/serve.py | 28 ++++++++++++- 4 files changed, 112 insertions(+), 1 deletion(-) diff --git a/docs/advanced_topics/settings.rst b/docs/advanced_topics/settings.rst index 0c49395eb5..0862de5391 100644 --- a/docs/advanced_topics/settings.rst +++ b/docs/advanced_topics/settings.rst @@ -268,6 +268,24 @@ This setting lets you override the maximum number of pixels an image can have. I This setting enables feature detection once OpenCV is installed, see all details on the :ref:`image_feature_detection` documentation. +Documents +--------- + +.. code-block:: python + + WAGTAILDOCS_SERVE_METHOD = 'redirect' + +Determines how document downloads will be served. Normally, requests for documents are sent through a Django view, to perform permission checks (see :ref:`image_document_permissions`) and potentially other housekeeping tasks such as hit counting. To fully protect against users bypassing this check, it needs to happen in the same request where the document is served; however, this incurs a performance hit as the document then needs to be served by the Django server. In particular, this cancels out much of the benefit of hosting documents on external storage, such as S3 or a CDN. + +For this reason, Wagtail provides a number of serving methods which trade some of the strictness of the permission check for performance: + + * ``'direct'`` - links to documents point directly to the URL provided by the underlying storage, bypassing the permission check. This is most useful when deploying sites as fully static HTML (e.g. using `wagtail-bakery `_ or `Gatsby `_). + * ``'redirect'`` - links to documents point to a Django view which will check the user's permission; if successful, it will redirect to the URL provided by the underlying storage to allow the document to be downloaded. This is most suitable for remote storage backends such as S3, as it allows the document to be served independently of the Django server. Note that if a user is able to guess the latter URL, they will be able to bypass the permission check; some storage backends may provide configuration options to generate a random or short-lived URL to mitigate this. + * ``'serve_view'`` - links to documents point to a Django view which both checks the user's permission, and serves the document. Serving will be handled by `django-sendfile `_, if this is installed and supported by your server configuration, or as a streaming response from Django if not. When using this method, it is recommended that you configure your webserver to *disallow* serving documents directly from their location under ``MEDIA_ROOT``, as this would provide a way to bypass the permission check. + +If ``WAGTAILDOCS_SERVE_METHOD`` is unspecified or set to ``None``, the default method is ``'redirect'`` when a remote storage backend is in use (i.e. one that exposes a URL but not a local filesystem path), and ``'serve_view'`` otherwise. Finally, some storage backends may not expose a URL at all; in this case, serving will proceed as for ``'serve_view'``. + + Password Management ------------------- diff --git a/wagtail/documents/models.py b/wagtail/documents/models.py index ad74b43323..9d4c862189 100644 --- a/wagtail/documents/models.py +++ b/wagtail/documents/models.py @@ -127,6 +127,13 @@ class AbstractDocument(CollectionMember, index.Indexed, models.Model): @property def url(self): + if getattr(settings, 'WAGTAILDOCS_SERVE_METHOD', None) == 'direct': + try: + return self.file.url + except NotImplementedError: + # backend does not provide a url, so fall back on the serve view + pass + return reverse('wagtaildocs_serve', args=[self.id, self.filename]) def get_usage(self): diff --git a/wagtail/documents/tests/test_views.py b/wagtail/documents/tests/test_views.py index 5711166538..2dac815eb2 100644 --- a/wagtail/documents/tests/test_views.py +++ b/wagtail/documents/tests/test_views.py @@ -11,6 +11,7 @@ from django.urls import reverse from wagtail.documents import models +@override_settings(WAGTAILDOCS_SERVE_METHOD=None) class TestServeView(TestCase): def setUp(self): self.document = models.Document(title="Test document") @@ -67,6 +68,64 @@ class TestServeView(TestCase): _get_sendfile.clear() +@override_settings(WAGTAILDOCS_SERVE_METHOD='redirect') +class TestServeViewWithRedirect(TestCase): + def setUp(self): + self.document = models.Document(title="Test document") + self.document.file.save('example.doc', ContentFile("A boring example document")) + + def tearDown(self): + self.document.delete() + + def get(self): + return self.client.get(reverse('wagtaildocs_serve', args=(self.document.id, self.document.filename))) + + def test_redirect(self): + response = self.get() + self.assertRedirects(response, self.document.file.url, fetch_redirect_response=False) + + +@override_settings(WAGTAILDOCS_SERVE_METHOD='direct') +class TestDirectDocumentUrls(TestCase): + def setUp(self): + self.document = models.Document(title="Test document") + self.document.file.save('example.doc', ContentFile("A boring example document")) + + def tearDown(self): + self.document.delete() + + def test_url(self): + self.assertEqual(self.document.url, self.document.file.url) + + +@override_settings( + WAGTAILDOCS_SERVE_METHOD=None, + DEFAULT_FILE_STORAGE='wagtail.tests.dummy_external_storage.DummyExternalStorage' +) +class TestServeWithExternalStorage(TestCase): + """ + Test the behaviour of the default serve method when used with a remote storage backend + (i.e. one that throws NotImplementedError for the path() method). + """ + def setUp(self): + self.document = models.Document(title="Test document") + self.document.file.save('example.doc', ContentFile("A boring example document")) + self.serve_view_url = reverse('wagtaildocs_serve', args=(self.document.id, self.document.filename)) + + def tearDown(self): + self.document.delete() + + def test_url(self): + # document.url should point to the serve view + self.assertEqual(self.document.url, self.serve_view_url) + + def test_redirect(self): + # serve view should redirect to the remote URL + response = self.client.get(self.serve_view_url) + self.assertRedirects(response, self.document.file.url, fetch_redirect_response=False) + + +@override_settings(WAGTAILDOCS_SERVE_METHOD=None) class TestServeViewWithSendfile(TestCase): def setUp(self): # Import using a try-catch block to prevent crashes if the @@ -124,6 +183,7 @@ class TestServeViewWithSendfile(TestCase): self.assertEqual(response['X-Accel-Redirect'], os.path.join(settings.MEDIA_URL, self.document.file.name)) +@override_settings(WAGTAILDOCS_SERVE_METHOD=None) class TestServeWithUnicodeFilename(TestCase): def setUp(self): self.document = models.Document(title="Test document") diff --git a/wagtail/documents/views/serve.py b/wagtail/documents/views/serve.py index 82d899b198..7f4a299681 100644 --- a/wagtail/documents/views/serve.py +++ b/wagtail/documents/views/serve.py @@ -37,6 +37,31 @@ def serve(request, document_id, document_filename): except NotImplementedError: local_path = None + try: + direct_url = doc.file.url + except NotImplementedError: + direct_url = None + + serve_method = getattr(settings, 'WAGTAILDOCS_SERVE_METHOD', None) + + # If no serve method has been specified, select an appropriate default for the storage backend: + # redirect for remote storages (i.e. ones that provide a url but not a local path) and + # serve_view for all other cases + if serve_method is None: + if direct_url and not local_path: + serve_method = 'redirect' + else: + serve_method = 'serve_view' + + if serve_method in ('redirect', 'direct') and direct_url: + # Serve the file by redirecting to the URL provided by the underlying storage; + # this saves the cost of delivering the file via Python. + # For serve_method == 'direct', this view should not normally be reached + # (the document URL as used in links should point directly to the storage URL instead) + # but we handle it as a redirect to provide sensible fallback / + # backwards compatibility behaviour. + return redirect(direct_url) + if local_path: # Use wagtail.utils.sendfile to serve the file; @@ -57,7 +82,8 @@ def serve(request, document_id, document_filename): else: # We are using a storage backend which does not expose filesystem paths - # (e.g. storages.backends.s3boto.S3BotoStorage). + # (e.g. storages.backends.s3boto.S3BotoStorage) AND the developer has not allowed + # redirecting to the file url directly. # Fall back on pre-sendfile behaviour of reading the file content and serving it # as a StreamingHttpResponse