Refactor the wagtail sitemap generator.

This new version is based on the Django sitemaps and therefore can be
easily mixed with other sitemaps in one index. It should be completly
backwards compatible.

This also implements support for pagination (see #1698)
pull/3554/head
Michael van Tellingen 2017-03-24 14:08:24 +01:00 zatwierdzone przez Michael van Tellingen
rodzic ef1cd3d51a
commit 2f79e42948
7 zmienionych plików z 80 dodań i 86 usunięć

Wyświetl plik

@ -1 +1,4 @@
from .sitemap_generator import Sitemap # noqa
default_app_config = 'wagtail.contrib.wagtailsitemaps.apps.WagtailSitemapsAppConfig'

Wyświetl plik

@ -1,23 +1,37 @@
from __future__ import absolute_import, unicode_literals
from django.template.loader import render_to_string
from django.contrib.sitemaps import Sitemap as DjangoSitemap
class Sitemap(object):
template = 'wagtailsitemaps/sitemap.xml'
class Sitemap(DjangoSitemap):
def __init__(self, site):
def __init__(self, site=None):
self.site = site
def get_pages(self):
return self.site.root_page.get_descendants(inclusive=True).live().public().order_by('path')
def location(self, obj):
return obj.specific.url
def get_urls(self):
for page in self.get_pages():
for url in page.specific.get_sitemap_urls():
yield url
def lastmod(self, obj):
return obj.specific.latest_revision_created_at
def render(self):
return render_to_string(self.template, {
'urlset': self.get_urls()
})
def items(self):
return (
self.site
.root_page
.get_descendants(inclusive=True)
.live()
.public()
.order_by('path'))
def _urls(self, page, protocol, domain):
urls = []
last_mods = set()
for item in self.paginator.page(page).object_list:
for url_info in item.specific.get_sitemap_urls():
urls.append(url_info)
last_mods.add(url_info.get('lastmod'))
if None not in last_mods:
self.latest_lastmod = max(last_mods)
return urls

Wyświetl plik

@ -1,13 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% spaceless %}
{% for url in urlset %}
<url>
<loc>{{ url.location }}</loc>
{% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %}
{% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %}
{% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %}
</url>
{% endfor %}
{% endspaceless %}
</urlset>

Wyświetl plik

@ -1,7 +1,8 @@
from __future__ import absolute_import, unicode_literals
from django.contrib.sites.shortcuts import get_current_site
from django.core.cache import cache
from django.test import TestCase
from django.test import RequestFactory, TestCase
from wagtail.tests.testapp.models import EventIndex, SimplePage
from wagtail.wagtailcore.models import Page, PageViewRestriction, Site
@ -37,22 +38,30 @@ class TestSitemapGenerator(TestCase):
self.site = Site.objects.get(is_default_site=True)
def test_get_pages(self):
def test_items(self):
sitemap = Sitemap(self.site)
pages = sitemap.get_pages()
pages = sitemap.items()
self.assertIn(self.child_page.page_ptr, pages)
self.assertNotIn(self.unpublished_child_page.page_ptr, pages)
self.assertNotIn(self.protected_child_page.page_ptr, pages)
def test_get_urls(self):
request = RequestFactory().get('/sitemap.xml')
req_protocol = request.scheme
req_site = get_current_site(request)
sitemap = Sitemap(self.site)
urls = [url['location'] for url in sitemap.get_urls()]
urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)]
self.assertIn('http://localhost/', urls) # Homepage
self.assertIn('http://localhost/hello-world/', urls) # Child page
def test_get_urls_uses_specific(self):
request = RequestFactory().get('/sitemap.xml')
req_protocol = request.scheme
req_site = get_current_site(request)
# Add an event page which has an extra url in the sitemap
self.home_page.add_child(instance=EventIndex(
title="Events",
@ -61,23 +70,18 @@ class TestSitemapGenerator(TestCase):
))
sitemap = Sitemap(self.site)
urls = [url['location'] for url in sitemap.get_urls()]
urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)]
self.assertIn('http://localhost/events/', urls) # Main view
self.assertIn('http://localhost/events/past/', urls) # Sub view
def test_render(self):
sitemap = Sitemap(self.site)
xml = sitemap.render()
# Check that a URL has made it into the xml
self.assertIn('http://localhost/hello-world/', xml)
class TestIndexView(TestCase):
def test_index_view(self):
response = self.client.get('/sitemap-index.xml')
# Make sure the unpublished page didn't make it into the xml
self.assertNotIn('http://localhost/unpublished/', xml)
# Make sure the protected page didn't make it into the xml
self.assertNotIn('http://localhost/protected/', xml)
self.assertEqual(response.status_code, 200)
self.assertEqual(response['Content-Type'], 'application/xml')
class TestSitemapView(TestCase):
@ -85,29 +89,4 @@ class TestSitemapView(TestCase):
response = self.client.get('/sitemap.xml')
self.assertEqual(response.status_code, 200)
self.assertTemplateUsed(response, 'wagtailsitemaps/sitemap.xml')
self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
def test_sitemap_view_cache(self):
cache_key = 'wagtail-sitemap:%d' % Site.objects.get(is_default_site=True).id
# Check that the key is not in the cache
self.assertNotIn(cache_key, cache)
# Hit the view
first_response = self.client.get('/sitemap.xml')
self.assertEqual(first_response.status_code, 200)
self.assertTemplateUsed(first_response, 'wagtailsitemaps/sitemap.xml')
# Check that the key is in the cache
self.assertIn(cache_key, cache)
# Hit the view again. Should come from the cache this time
second_response = self.client.get('/sitemap.xml')
self.assertEqual(second_response.status_code, 200)
self.assertTemplateNotUsed(second_response, 'wagtailsitemaps/sitemap.xml') # Sitemap should not be re rendered
# Check that the content is the same
self.assertEqual(first_response.content, second_response.content)
self.assertEqual(response['Content-Type'], 'application/xml')

Wyświetl plik

@ -1,25 +1,29 @@
from __future__ import absolute_import, unicode_literals
from django.conf import settings
from django.core.cache import cache
from django.http import HttpResponse
from django.contrib.sitemaps import views as sitemap_views
from .sitemap_generator import Sitemap
def sitemap(request):
cache_key = 'wagtail-sitemap:' + str(request.site.id)
sitemap_xml = cache.get(cache_key)
def index(request, sitemaps, **kwargs):
sitemaps = prepare_sitemaps(request, sitemaps)
return sitemap_views.index(request, sitemaps, **kwargs)
if not sitemap_xml:
# Rerender sitemap
sitemap = Sitemap(request.site)
sitemap_xml = sitemap.render()
cache.set(cache_key, sitemap_xml, getattr(settings, 'WAGTAILSITEMAPS_CACHE_TIMEOUT', 6000))
def sitemap(request, sitemaps=None, **kwargs):
if sitemaps:
sitemaps = prepare_sitemaps(request, sitemaps)
else:
sitemaps = {'wagtail': Sitemap(request.site)}
return sitemap_views.sitemap(request, sitemaps, **kwargs)
# Build response
response = HttpResponse(sitemap_xml)
response['Content-Type'] = "text/xml; charset=utf-8"
return response
def prepare_sitemaps(request, sitemaps):
"""Intialize the wagtail Sitemap by passing the request.site value. """
initialised_sitemaps = {}
for name, sitemap_cls in sitemaps.items():
if issubclass(sitemap_cls, Sitemap):
initialised_sitemaps[name] = sitemap_cls(request.site)
else:
initialised_sitemaps[name] = sitemap_cls
return initialised_sitemaps

Wyświetl plik

@ -123,7 +123,6 @@ INSTALLED_APPS = (
'wagtail.tests.search',
'wagtail.tests.modeladmintest',
'wagtail.contrib.wagtailstyleguide',
'wagtail.contrib.wagtailsitemaps',
'wagtail.contrib.wagtailroutablepage',
'wagtail.contrib.wagtailfrontendcache',
'wagtail.contrib.wagtailapi',
@ -151,6 +150,7 @@ INSTALLED_APPS = (
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.sitemaps',
'django.contrib.staticfiles',
)

Wyświetl plik

@ -5,7 +5,8 @@ from django.conf.urls import include, url
from wagtail.api.v2.endpoints import PagesAPIEndpoint
from wagtail.api.v2.router import WagtailAPIRouter
from wagtail.contrib.wagtailapi import urls as wagtailapi_urls
from wagtail.contrib.wagtailsitemaps.views import sitemap
from wagtail.contrib.wagtailsitemaps import views as sitemaps_views
from wagtail.contrib.wagtailsitemaps import Sitemap
from wagtail.tests.testapp import urls as testapp_urls
from wagtail.wagtailadmin import urls as wagtailadmin_urls
from wagtail.wagtailcore import urls as wagtail_urls
@ -32,7 +33,13 @@ urlpatterns = [
url(r'^api/', include(wagtailapi_urls)),
url(r'^api/v2beta/', api_router.urls),
url(r'^sitemap\.xml$', sitemap),
url(r'^sitemap\.xml$', sitemaps_views.sitemap),
url(r'^sitemap-index\.xml$', sitemaps_views.index, {
'sitemaps': {'pages': Sitemap},
'sitemap_url_name': 'sitemap',
}),
url(r'^sitemap-(?P<section>.+)\.xml$', sitemaps_views.sitemap, name='sitemap'),
url(r'^testapp/', include(testapp_urls)),