From 6bd7515663aa1b767483a02dbfe025f9c39a19ba Mon Sep 17 00:00:00 2001 From: Levi Adler Date: Wed, 19 Jul 2017 13:53:01 -0400 Subject: [PATCH] Reduce queries and speed up sitemap generation with PageQuerySet.specific (#3727) --- CHANGELOG.txt | 1 + CONTRIBUTORS.rst | 1 + docs/releases/1.12.rst | 1 + .../wagtailsitemaps/sitemap_generator.py | 9 +++-- wagtail/contrib/wagtailsitemaps/tests.py | 34 +++++++++++++++++-- 5 files changed, 38 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 76f14f7838..c1201fa85e 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -7,6 +7,7 @@ Changelog * New class-based configuration for media embeds (Karl Hobley) * The admin interface now displays a title of the latest draft (Mikalai Radchuk) * Added multi-select form field to the form builder (dwasyl) + * Improved performance of sitemap generation (Levi Adler) * Fix: FieldBlocks in StreamField now call the field's `prepare_value` method (Tim Heap) * Fix: Initial disabled state of InlinePanel add button is now set correctly on non-default tabs (Matthew Downey) * Fix: Redirects with unicode characters now work (Rich Brennan) diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index fefcfe55bf..b26ce8e09e 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -246,6 +246,7 @@ Contributors * Paul Kamp * dwasyl * Eugene Morozov +* Levi Adler Translators =========== diff --git a/docs/releases/1.12.rst b/docs/releases/1.12.rst index 0fea4b807f..80a0327002 100644 --- a/docs/releases/1.12.rst +++ b/docs/releases/1.12.rst @@ -23,6 +23,7 @@ Other features * The admin interface now displays a title of the latest draft (Mikalai Radchuk) * Added multi-select form field to the form builder (dwasyl) + * Improved performance of sitemap generation (Levi Adler) Bug fixes ~~~~~~~~~ diff --git a/wagtail/contrib/wagtailsitemaps/sitemap_generator.py b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py index f903a1aa82..c2cde386f0 100644 --- a/wagtail/contrib/wagtailsitemaps/sitemap_generator.py +++ b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py @@ -9,11 +9,9 @@ class Sitemap(DjangoSitemap): self.site = site def location(self, obj): - return obj.specific.url + return obj.url def lastmod(self, obj): - obj = obj.specific - # fall back on latest_revision_created_at if last_published_at is null # (for backwards compatibility from before last_published_at was added) return (obj.last_published_at or obj.latest_revision_created_at) @@ -25,14 +23,15 @@ class Sitemap(DjangoSitemap): .get_descendants(inclusive=True) .live() .public() - .order_by('path')) + .order_by('path') + .specific()) def _urls(self, page, protocol, domain): urls = [] last_mods = set() for item in self.paginator.page(page).object_list: - for url_info in item.specific.get_sitemap_urls(): + for url_info in item.get_sitemap_urls(): urls.append(url_info) last_mods.add(url_info.get('lastmod')) diff --git a/wagtail/contrib/wagtailsitemaps/tests.py b/wagtail/contrib/wagtailsitemaps/tests.py index 9fa2147b82..4855280eb8 100644 --- a/wagtail/contrib/wagtailsitemaps/tests.py +++ b/wagtail/contrib/wagtailsitemaps/tests.py @@ -54,9 +54,9 @@ class TestSitemapGenerator(TestCase): sitemap = Sitemap(self.site) pages = sitemap.items() - self.assertIn(self.child_page.page_ptr, pages) - self.assertNotIn(self.unpublished_child_page.page_ptr, pages) - self.assertNotIn(self.protected_child_page.page_ptr, pages) + self.assertIn(self.child_page.page_ptr.specific, pages) + self.assertNotIn(self.unpublished_child_page.page_ptr.specific, pages) + self.assertNotIn(self.protected_child_page.page_ptr.specific, pages) def test_get_urls(self): request = RequestFactory().get('/sitemap.xml') @@ -108,6 +108,34 @@ class TestSitemapGenerator(TestCase): ][0] self.assertEqual(child_page_lastmod, datetime.datetime(2017, 2, 1, 12, 0, 0, tzinfo=pytz.utc)) + def test_latest_lastmod(self): + # give the homepage a lastmod + self.home_page.last_published_at = datetime.datetime(2017, 3, 1, 12, 0, 0, tzinfo=pytz.utc) + self.home_page.save() + + request = RequestFactory().get('/sitemap.xml') + req_protocol = request.scheme + req_site = get_current_site(request) + + sitemap = Sitemap(self.site) + sitemap.get_urls(1, req_site, req_protocol) + + self.assertEqual(sitemap.latest_lastmod, datetime.datetime(2017, 3, 1, 12, 0, 0, tzinfo=pytz.utc)) + + def test_latest_lastmod_missing(self): + # ensure homepage does not have lastmod + self.home_page.last_published_at = None + self.home_page.save() + + request = RequestFactory().get('/sitemap.xml') + req_protocol = request.scheme + req_site = get_current_site(request) + + sitemap = Sitemap(self.site) + sitemap.get_urls(1, req_site, req_protocol) + + self.assertFalse(hasattr(sitemap, 'latest_lastmod')) + class TestIndexView(TestCase): def test_index_view(self):