Merge pull request #325 from kaedroho/sitemaps

Sitemap generator
pull/364/head^2
Karl Hobley 2014-07-02 17:42:13 +01:00
commit 448b50a011
10 zmienionych plików z 215 dodań i 1 usunięć

Wyświetl plik

@ -0,0 +1,58 @@
Sitemap generation
==================
This document describes how to create XML sitemaps for your Wagtail website using the ``wagtail.contrib.wagtailsitemaps`` module.
Basic configuration
~~~~~~~~~~~~~~~~~~~
You firstly need to add ``"wagtail.contrib.wagtailsitemaps"`` to INSTALLED_APPS in your Django settings file:
.. code-block:: python
INSTALLED_APPS = [
...
"wagtail.contrib.wagtailsitemaps",
]
Then, in urls.py, you need to add a link to the ``wagtail.contrib.wagtailsitemaps.views.sitemap`` view which generates the sitemap:
.. code-block:: python
from wagtail.contrib.wagtailsitemaps.views import sitemap
urlpatterns = patterns('',
...
url('^sitemap\.xml$', sitemap),
)
You should now be able to browse to "/sitemap.xml" and see the sitemap working. By default, all published pages in your website will be added to the site map.
Customising
~~~~~~~~~~~
URLs
----
The Page class defines a ``get_sitemap_urls`` method which you can override to customise sitemaps per page instance. This method must return a list of dictionaries, one dictionary per URL entry in the sitemap. You can exclude pages from the sitemap by returning an empty list.
Each dictionary can contain the following:
- **location** (required) - This is the full URL path to add into the sitemap.
- **lastmod** - A python date or datetime set to when the page was last modified.
- **changefreq**
- **priority**
You can add more but you will need to override the ``wagtailsitemaps/sitemap.xml`` template in order for them to be displayed in the sitemap.
Cache
-----
By default, sitemaps are cached for 100 minutes. You can change this by setting ``WAGTAILSITEMAPS_CACHE_TIMEOUT`` in your Django settings to the number of seconds you would like the cache to last for.

Wyświetl plik

@ -85,6 +85,7 @@ if not settings.configured:
'wagtail.wagtailredirects',
'wagtail.wagtailforms',
'wagtail.contrib.wagtailstyleguide',
'wagtail.contrib.wagtailsitemaps',
'wagtail.tests',
],

Wyświetl plik

@ -0,0 +1,21 @@
from django.template.loader import render_to_string
class Sitemap(object):
template = 'wagtailsitemaps/sitemap.xml'
def __init__(self, site):
self.site = site
def get_pages(self):
return self.site.root_page.get_descendants(inclusive=True).live().order_by('path')
def get_urls(self):
for page in self.get_pages():
for url in page.get_sitemap_urls():
yield url
def render(self):
return render_to_string(self.template, {
'urlset': self.get_urls()
})

Wyświetl plik

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% spaceless %}
{% for url in urlset %}
<url>
<loc>{{ url.location }}</loc>
{% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %}
{% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %}
{% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %}
</url>
{% endfor %}
{% endspaceless %}
</urlset>

Wyświetl plik

@ -0,0 +1,84 @@
from django.test import TestCase
from django.core.urlresolvers import reverse
from django.core.cache import cache
from wagtail.wagtailcore.models import Page, Site
from wagtail.tests.models import SimplePage
from .sitemap_generator import Sitemap
class TestSitemapGenerator(TestCase):
def setUp(self):
self.home_page = Page.objects.get(id=2)
self.child_page = self.home_page.add_child(instance=SimplePage(
title="Hello world!",
slug='hello-world',
live=True,
))
self.unpublished_child_page = self.home_page.add_child(instance=SimplePage(
title="Unpublished",
slug='unpublished',
live=False,
))
self.site = Site.objects.get(is_default_site=True)
def test_get_pages(self):
sitemap = Sitemap(self.site)
pages = sitemap.get_pages()
self.assertIn(self.child_page.page_ptr, pages)
self.assertNotIn(self.unpublished_child_page.page_ptr, pages)
def test_get_urls(self):
sitemap = Sitemap(self.site)
urls = [url['location'] for url in sitemap.get_urls()]
self.assertIn('/', urls) # Homepage
self.assertIn('/hello-world/', urls) # Child page
def test_render(self):
sitemap = Sitemap(self.site)
xml = sitemap.render()
# Check that a URL has made it into the xml
self.assertIn('/hello-world/', xml)
# Make sure the unpublished page didn't make it into the xml
self.assertNotIn('/unpublished/', xml)
class TestSitemapView(TestCase):
def test_sitemap_view(self):
response = self.client.get('/sitemap.xml')
self.assertEqual(response.status_code, 200)
self.assertTemplateUsed(response, 'wagtailsitemaps/sitemap.xml')
self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
def test_sitemap_view_cache(self):
cache_key = 'wagtail-sitemap:%d' % Site.objects.get(is_default_site=True).id
# Check that the key is not in the cache
self.assertFalse(cache.has_key(cache_key))
# Hit the view
first_response = self.client.get('/sitemap.xml')
self.assertEqual(first_response.status_code, 200)
self.assertTemplateUsed(first_response, 'wagtailsitemaps/sitemap.xml')
# Check that the key is in the cache
self.assertTrue(cache.has_key(cache_key))
# Hit the view again. Should come from the cache this time
second_response = self.client.get('/sitemap.xml')
self.assertEqual(second_response.status_code, 200)
self.assertTemplateNotUsed(second_response, 'wagtailsitemaps/sitemap.xml') # Sitemap should not be re rendered
# Check that the content is the same
self.assertEqual(first_response.content, second_response.content)

Wyświetl plik

@ -0,0 +1,24 @@
from django.shortcuts import render
from django.http import HttpResponse
from django.core.cache import cache
from django.conf import settings
from .sitemap_generator import Sitemap
def sitemap(request):
cache_key = 'wagtail-sitemap:' + str(request.site.id)
sitemap_xml = cache.get(cache_key)
if not sitemap_xml:
# Rerender sitemap
sitemap = Sitemap(request.site)
sitemap_xml = sitemap.render()
cache.set(cache_key, sitemap_xml, getattr(settings, 'WAGTAILSITEMAPS_CACHE_TIMEOUT', 6000))
# Build response
response = HttpResponse(sitemap_xml)
response['Content-Type'] = "text/xml; charset=utf-8"
return response

Wyświetl plik

@ -4,6 +4,7 @@ from wagtail.wagtailcore import urls as wagtail_urls
from wagtail.wagtailadmin import urls as wagtailadmin_urls
from wagtail.wagtaildocs import urls as wagtaildocs_urls
from wagtail.wagtailsearch.urls import frontend as wagtailsearch_frontend_urls
from wagtail.contrib.wagtailsitemaps.views import sitemap
# Signal handlers
from wagtail.wagtailsearch import register_signal_handlers as wagtailsearch_register_signal_handlers
@ -15,6 +16,8 @@ urlpatterns = patterns('',
url(r'^search/', include(wagtailsearch_frontend_urls)),
url(r'^documents/', include(wagtaildocs_urls)),
url(r'^sitemap\.xml$', sitemap),
# For anything not caught by a more specific rule above, hand over to
# Wagtail's serving mechanism
url(r'', include(wagtail_urls)),

Wyświetl plik

@ -690,12 +690,22 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, Indexed)):
"""
return self.serve(self.dummy_request())
def get_sitemap_urls(self):
latest_revision = self.get_latest_revision()
return [
{
'location': self.url,
'lastmod': latest_revision.created_at if latest_revision else None
}
]
def get_static_site_paths(self):
"""
This is a generator of URL paths to feed into a static site generator
Override this if you would like to create static versions of subpages
"""
# Yield paths for this page
# Yield path for this page
yield '/'
# Yield paths for child pages