kopia lustrzana https://github.com/wagtail/wagtail
Porównaj commity
9 Commity
d6df6b377c
...
d50e5d7ba2
Autor | SHA1 | Data |
---|---|---|
Nick Smith | d50e5d7ba2 | |
Matt Westcott | a09bba67cd | |
Matt Westcott | 6fa3985674 | |
Jake Howard | 84d9bd6fb6 | |
Jake Howard | 37f9ae2ec6 | |
Nick Smith | 94e1c7a2a4 | |
Nick Smith | faf9f8bb30 | |
Nick Smith | 49cd24b61c | |
Nick Smith | 0e2dd63e4d |
|
@ -13,7 +13,9 @@ Changelog
|
|||
* Fix: Preserve whitespace in comment replies (Elhussein Almasri)
|
||||
* Docs: Remove duplicate section on frontend caching proxies from performance page (Jake Howard)
|
||||
* Docs: Document `restriction_type` field on PageViewRestriction (Shlomo Markowitz)
|
||||
* Docs: Document Wagtail's bug bounty policy (Jake Howard)
|
||||
* Maintenance: Use `DjangoJSONEncoder` instead of custom `LazyStringEncoder` to serialize Draftail config (Sage Abdullah)
|
||||
* Maintenance: Refactor image chooser pagination to check `WAGTAILIMAGES_CHOOSER_PAGE_SIZE` at runtime (Matt Westcott)
|
||||
|
||||
|
||||
6.1 (01.05.2024)
|
||||
|
|
|
@ -0,0 +1,362 @@
|
|||
# How to… use synonyms with Elasticsearch to improve search results
|
||||
|
||||
Users might not always use the same terms as you to refer to something. An example might be 'waste', 'refuse', 'garbage', 'trash' and 'rubbish'. If you configure these as synonyms via the search engine, then you will not need to tag pages with every possible synonym, and a search query `Page.objects.search("gift")` could always return results including a page called "Donate to our Charity".
|
||||
|
||||
```eval_rst
|
||||
.. note:: Stemming, and language-specific configuration
|
||||
|
||||
Synonym searching is not necessary for the search engine to match a query "cherries" to a page _Cherry flavour ice cream_. The Postgres or Elasticsearch search backends should do that automatically, so long as you have the correct language configured. See
|
||||
|
||||
- :doc:`/reference/contrib/postgres_search#language-postgresql-search-configuration`
|
||||
- `Elasticsearch documentation <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html>`_
|
||||
```
|
||||
|
||||
This how-to guide will show you how to configure Elasticsearch to use synonym matching, first for a hardcoded source of synonyms, and then for
|
||||
|
||||
Note: We will assume that your synonyms content is dynamic, and therefore you want to read it in from the database, and we will manage the synonyms in the admin.
|
||||
|
||||
Note: We will assume the following code goes in a Django app called `customsearch` in the root of your project.
|
||||
|
||||
## 1. Fetch synonyms terms when a search is performed
|
||||
|
||||
Elasticsearch will accept synonyms as a list of strings in the format:
|
||||
|
||||
```python
|
||||
[
|
||||
"foo, fish, jam",
|
||||
"bar, tobogganing, showers, toasters"
|
||||
]
|
||||
```
|
||||
|
||||
If these are set, then a search for "foo", "fish", or "jam" will return identical results, and will include pages indexed for any of those words.
|
||||
|
||||
If you already have an approach to generating synonyms, and they do not change, this is the only step needed to make Elasticsearch aware of them. In your site settings:
|
||||
|
||||
```python
|
||||
WAGTAILSEARCH_BACKENDS = {
|
||||
"default": {
|
||||
"BACKEND": "wagtail.search.backends.elasticsearch7",
|
||||
"INDEX": "wagtail",
|
||||
"OPTIONS": {},
|
||||
"INDEX_SETTINGS": {
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"default": {"tokenizer": "whitespace", "filter": ["synonym"]},
|
||||
},
|
||||
"filter": {
|
||||
"synonym": {
|
||||
"type": "synonym",
|
||||
"synonyms": [
|
||||
"foo, fish, jam",
|
||||
"bar, tobogganing, showers, toasters",
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
## 2. Permit dynamic synonyms
|
||||
|
||||
We'll make the backend compatible with dynamic synonyms first. An approach to creating these synonyms is in a later step.
|
||||
|
||||
The backend's settings `Elasticsearch7SearchBackend.settings` are read from your project's settings every time the backend is instantiated. Let's create a placeholder function to generate our synonym list. Edit `customsearch/utils.py`:
|
||||
|
||||
```python
|
||||
def get_synonyms():
|
||||
"""This is still a static list"""
|
||||
return [
|
||||
"foo, fish, jam",
|
||||
"bar, tobogganing, showers, toasters",
|
||||
]
|
||||
```
|
||||
|
||||
We now create a new custom search backend that reads this every time it is instantiated. This backend is instantiated for every search query using the `queryset.search()` syntax, so we can be assured it will call the function every time. Edit `customsearch/elasticsearch7.py`:
|
||||
|
||||
```python
|
||||
import copy
|
||||
|
||||
from wagtail.search.backends.elasticsearch7 import Elasticsearch7SearchBackend
|
||||
|
||||
from customsearch.utils import get_synonyms
|
||||
|
||||
|
||||
class SearchBackend(Elasticsearch7SearchBackend):
|
||||
settings = copy.deepcopy(Elasticsearch7SearchBackend.settings)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self.settings["settings"]["analysis"]["filter"]["synonym"] = {
|
||||
"type": "synonym",
|
||||
"synonyms": get_synonyms(),
|
||||
}
|
||||
```
|
||||
|
||||
Now we must update our site settings to use this. Wagtail expects the path to a search backend module, which must contain a `SearchBackend` class:
|
||||
|
||||
```python
|
||||
WAGTAILSEARCH_BACKENDS = {
|
||||
"default": {
|
||||
"BACKEND": "customsearch.elasticsearch7",
|
||||
"INDEX": "wagtail",
|
||||
"OPTIONS": {},
|
||||
"INDEX_SETTINGS": {
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"default": {"tokenizer": "whitespace", "filter": ["synonym"]},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Note also that we have removed the `analysis.filter` section of the settings dict.
|
||||
|
||||
## 3. Make synonyms editable
|
||||
|
||||
We will add an admin menu item to edit the synonyms.
|
||||
|
||||
### Add a Term model
|
||||
|
||||
```python
|
||||
from django.contrib.postgres.fields import ArrayField
|
||||
from django.db import models
|
||||
|
||||
from wagtail.admin.edit_handlers import FieldPanel
|
||||
|
||||
|
||||
class Term(models.Model):
|
||||
canonical_term = models.CharField(
|
||||
max_length=50,
|
||||
unique=True,
|
||||
help_text="A word or phrase that returns intended search results",
|
||||
)
|
||||
synonyms = ArrayField(
|
||||
models.CharField(max_length=50, blank=False),
|
||||
help_text=(
|
||||
"A list of other terms which should match pages containing the canonical "
|
||||
"term. Separate with commas, multiple word phrases are supported."
|
||||
),
|
||||
)
|
||||
|
||||
panels = [
|
||||
FieldPanel("canonical_term"),
|
||||
FieldPanel("synonyms"),
|
||||
]
|
||||
|
||||
class Meta:
|
||||
verbose_name = "Search synonym"
|
||||
|
||||
def __str__(self):
|
||||
synonyms = ", ".join(self.synonyms[:5])
|
||||
return f"{self.canonical_term}: {synonyms}"
|
||||
```
|
||||
|
||||
Register the above as an admin menu item. Edit `customsearch/wagtail_hooks.py`:
|
||||
|
||||
```python
|
||||
from wagtail.contrib.modeladmin.options import ModelAdmin, modeladmin_register
|
||||
|
||||
from customsearch.models import Term
|
||||
|
||||
|
||||
class TermModelAdmin(ModelAdmin):
|
||||
model = Term
|
||||
menu_icon = "search"
|
||||
list_display = ("canonical_term", "synonyms")
|
||||
|
||||
|
||||
modeladmin_register(TaxonomiesModelAdminGroup)
|
||||
```
|
||||
|
||||
### Make our synonyms dynamic
|
||||
|
||||
Now update the `get_synonyms` function to return dynamic content. Edit `customsearch/utils.py`:
|
||||
|
||||
```python
|
||||
from bc.search.models import Term
|
||||
|
||||
|
||||
def get_synonyms(force_update=False):
|
||||
return [
|
||||
", ".join(
|
||||
[term.canonical_term] + [synonym.lower() for synonym in term.synonyms]
|
||||
)
|
||||
for term in Term.objects.all()
|
||||
] or [""] # This `or` clause is necessary for Elasticsearch 5 only.
|
||||
```
|
||||
|
||||
Note: Elasticsearch 5 will return an error response "synonym requires either `synonyms` or `synonyms_path` to be configured" if you send an empty list. If you are not using that backend, the `or [""]` part can be omitted.
|
||||
|
||||
## 4. Cache the results, to improve performance
|
||||
|
||||
It is likely that searches happen more often than updates to the synonyms. If your site has caching enabled, then we can improve our synonyms function.
|
||||
|
||||
### Update the cache when synonyms are edited
|
||||
|
||||
Edit `customsearch/signal_handlers.py`:
|
||||
|
||||
```python
|
||||
from django.db.models.signals import post_delete, post_save
|
||||
from django.dispatch import receiver
|
||||
|
||||
from customsearch.models import Term
|
||||
from customsearch.utils import get_synonyms
|
||||
|
||||
|
||||
@receiver([post_save, post_delete], sender=Term)
|
||||
def cache_synonyms_receiver(**kwargs):
|
||||
get_synonyms(force_update=True)
|
||||
```
|
||||
|
||||
Load the signal handlers in your app's config. Edit `customsearch/apps.py`:
|
||||
|
||||
```python
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class CustomSearchConfig(AppConfig):
|
||||
name = "customsearch"
|
||||
|
||||
def ready(self):
|
||||
import customsearch.signal_handlers
|
||||
```
|
||||
|
||||
In `settings.INSTALLED_APPS`, use `"customsearch.apps.CustomSearchConfig"`, rather than `"customsearch"`.
|
||||
|
||||
### Use the cached value
|
||||
|
||||
Edit `customsearch/utils.py`:
|
||||
|
||||
```
|
||||
from django.core.cache import caches
|
||||
|
||||
from customsearch.models import Term
|
||||
|
||||
cache = caches["default"]
|
||||
|
||||
SYNONYMS_CACHE_KEY = "searchbackend_synonyms"
|
||||
|
||||
|
||||
def get_synonyms(force_update=False):
|
||||
synonyms = None if force_update else cache.get(SYNONYMS_CACHE_KEY)
|
||||
|
||||
if not synonyms:
|
||||
synonyms = [
|
||||
", ".join(
|
||||
[term.canonical_term] + [synonym.lower() for synonym in term.synonyms]
|
||||
)
|
||||
for term in Term.objects.all()
|
||||
]
|
||||
cache.set(SYNONYMS_CACHE_KEY, synonyms)
|
||||
|
||||
return synonyms or [""] # This `or` clause is necessary for Elasticsearch 5 only.
|
||||
```
|
||||
|
||||
## 5. Add unit tests
|
||||
|
||||
This is not the core part of the how-to guide, but code should be tested. Add a test file in a location that your test runner will find it.
|
||||
|
||||
```python
|
||||
from unittest.mock import patch
|
||||
|
||||
import factory
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from customsearch.utils import SYNONYMS_CACHE_KEY, cache, get_synonyms
|
||||
|
||||
|
||||
class TermFactory(factory.django.DjangoModelFactory):
|
||||
class Meta:
|
||||
model = "search.Term"
|
||||
|
||||
|
||||
class SynonymTest(TestCase):
|
||||
def test_basic(self):
|
||||
TermFactory(canonical_term="foo", synonyms=["soup", "potatoes"])
|
||||
self.assertListEqual(get_synonyms(), ["foo, soup, potatoes"])
|
||||
|
||||
def test_multi_word_phrase_synonym(self):
|
||||
TermFactory(
|
||||
canonical_term="foo",
|
||||
synonyms=["haircuts arguments", "small things", "rabbits"],
|
||||
)
|
||||
self.assertListEqual(
|
||||
get_synonyms(), ["foo, haircuts arguments, small things, rabbits"],
|
||||
)
|
||||
|
||||
def test_multi_word_canonical_term(self):
|
||||
TermFactory(
|
||||
canonical_term="people with noses", synonyms=["more jam", "soot", "flies"]
|
||||
)
|
||||
self.assertListEqual(
|
||||
get_synonyms(), ["people with noses, more jam, soot, flies"],
|
||||
)
|
||||
|
||||
def test_multiple_synonyms(self):
|
||||
TermFactory(canonical_term="foo", synonyms=["fish", "jam"])
|
||||
TermFactory(
|
||||
canonical_term="bar", synonyms=["tobogganing", "showers", "toasters"]
|
||||
)
|
||||
self.assertListEqual(
|
||||
get_synonyms(), ["foo, fish, jam", "bar, tobogganing, showers, toasters"],
|
||||
)
|
||||
|
||||
def test_synonyms_are_lower_cased(self):
|
||||
TermFactory(canonical_term="foo", synonyms=["Belgium", "fire", "water"])
|
||||
self.assertListEqual(get_synonyms(), ["foo, belgium, fire, water"])
|
||||
|
||||
@patch("bc.search.signal_handlers.get_synonyms")
|
||||
def test_signal_is_triggered(self, mock_get_synonyms):
|
||||
TermFactory(canonical_term="foo", synonyms=["lights", "Burma"])
|
||||
mock_get_synonyms.assert_called_once_with(force_update=True)
|
||||
|
||||
def test_synonyms_are_cached(self):
|
||||
cache.delete(SYNONYMS_CACHE_KEY)
|
||||
self.assertEqual(cache.get(SYNONYMS_CACHE_KEY), None)
|
||||
|
||||
TermFactory(canonical_term="foo", synonyms=["light", "air"])
|
||||
self.assertListEqual(cache.get(SYNONYMS_CACHE_KEY), ["foo, light, air"])
|
||||
|
||||
def test_synonym_cache_can_be_updated(self):
|
||||
TermFactory(
|
||||
canonical_term="foo", synonyms=["things that go 'uhh'", "Arthur Negus"]
|
||||
)
|
||||
cache.set(SYNONYMS_CACHE_KEY, ["foo, colonel gaddafi"])
|
||||
self.assertListEqual(cache.get(SYNONYMS_CACHE_KEY), ["foo, colonel gaddafi"])
|
||||
self.assertListEqual(
|
||||
get_synonyms(force_update=True), ["foo, things that go 'uhh', arthur negus"]
|
||||
)
|
||||
self.assertListEqual(
|
||||
cache.get(SYNONYMS_CACHE_KEY), ["foo, things that go 'uhh', arthur negus"]
|
||||
)
|
||||
|
||||
def test_cache_is_used(self):
|
||||
cache.set(SYNONYMS_CACHE_KEY, ["foo, eggnog, radiators"])
|
||||
self.assertListEqual(get_synonyms(), ["foo, eggnog, radiators"])
|
||||
|
||||
TermFactory(canonical_term="bar", synonyms=["grandmothers"])
|
||||
self.assertListEqual(get_synonyms(), ["bar, grandmothers"])
|
||||
```
|
||||
|
||||
## Suggestions for improvement
|
||||
|
||||
- Generate the synonyms a different way, that suits you.
|
||||
- Write the synonyms out to a file, if you have a lot of content, because reading them inline in the filter increases cluster size unnecessarily [Elasticsearch synonyms documentation][es_docs].
|
||||
|
||||
## References
|
||||
|
||||
- [Elasticsearch synonyms documentation][es_docs], including more information about the Solr style syntax and file-based synonyms.
|
||||
- [Search backends explanation](reference/search_backends.md)
|
||||
|
||||
|
||||
- [es_docs](https://www.elastic.co/guide/en/elasticsearch/reference/7.10/analysis-synonym-tokenfilter.html)
|
|
@ -34,6 +34,12 @@ At any given time, the Wagtail team provides official security support for sever
|
|||
When new releases are issued for security reasons, the accompanying notice will include a list of affected versions.
|
||||
This list is comprised solely of supported versions of Wagtail: older versions may also be affected, but we do not investigate to determine that, and will not issue patches or new releases for those versions.
|
||||
|
||||
## Bug Bounties
|
||||
|
||||
Wagtail does not have a "Bug Bounty" program. Whilst we appreciate and accept reports from anyone, and will gladly give credit to you and/or your organisation, we aren't able to "reward" you for reporting the vulnerability.
|
||||
|
||||
["Beg Bounties"](https://www.troyhunt.com/beg-bounties/) are ever increasing among security researchers, and it's not something we condone or support.
|
||||
|
||||
## How Wagtail discloses security issues
|
||||
|
||||
Our process for taking a security issue from private discussion to public disclosure involves multiple steps.
|
||||
|
@ -46,8 +52,8 @@ On the day of disclosure, we will take the following steps:
|
|||
1. Apply the relevant patch(es) to Wagtail's codebase.
|
||||
The commit messages for these patches will indicate that they are for security issues, but will not describe the issue in any detail; instead, they will warn of upcoming disclosure.
|
||||
2. Issue the relevant release(s), by placing new packages on [the Python Package Index](https://pypi.org/project/wagtail/), tagging the new release(s) in Wagtail's GitHub repository and updating Wagtail's [release notes](../releases/index).
|
||||
3. Post a public entry on [Wagtail's blog](https://wagtail.org/blog/), describing the issue and its resolution in detail, pointing to the relevant patches and new releases, and crediting the reporter of the issue (if the reporter wishes to be publicly identified).
|
||||
4. Post a notice to the [Wagtail discussion board](https://github.com/wagtail/wagtail/discussions), [Slack workspace](https://wagtail.org/slack/) and Twitter feed ([\@WagtailCMS](https://twitter.com/wagtailcms)) that links to the blog post.
|
||||
3. Publish a [security advisory](https://github.com/wagtail/wagtail/security/advisories?state=published) on Wagtail's GitHub repository. This describes the issue and its resolution in detail, pointing to the relevant patches and new releases, and crediting the reporter of the issue (if the reporter wishes to be publicly identified)
|
||||
4. Post a notice to the [Wagtail discussion board](https://github.com/wagtail/wagtail/discussions), [Slack workspace](https://wagtail.org/slack/) and Twitter feed ([\@WagtailCMS](https://twitter.com/wagtailcms)) that links to the security advisory.
|
||||
|
||||
If a reported issue is believed to be particularly time-sensitive -- due to a known exploit in the wild, for example -- the time between advance notification and public disclosure may be shortened considerably.
|
||||
|
||||
|
|
|
@ -30,11 +30,13 @@ depth: 1
|
|||
|
||||
* Remove duplicate section on frontend caching proxies from performance page (Jake Howard)
|
||||
* Document `restriction_type` field on PageViewRestriction (Shlomo Markowitz)
|
||||
* Document Wagtail's bug bounty policy (Jake Howard)
|
||||
|
||||
|
||||
### Maintenance
|
||||
|
||||
* Use `DjangoJSONEncoder` instead of custom `LazyStringEncoder` to serialize Draftail config (Sage Abdullah)
|
||||
* Refactor image chooser pagination to check `WAGTAILIMAGES_CHOOSER_PAGE_SIZE` at runtime (Matt Westcott)
|
||||
|
||||
|
||||
## Upgrade considerations - changes affecting all projects
|
||||
|
|
|
@ -17,6 +17,10 @@ class ViewSet(WagtailMenuRegisterable):
|
|||
For more information on how to use this class, see :ref:`using_base_viewset`.
|
||||
"""
|
||||
|
||||
#: A special value that, when passed in a kwargs dict to construct a view, indicates that
|
||||
#: the attribute should not be written and should instead be left as the view's initial value
|
||||
UNDEFINED = object()
|
||||
|
||||
#: A name for this viewset, used as the default URL prefix and namespace.
|
||||
name = None
|
||||
|
||||
|
@ -42,12 +46,13 @@ class ViewSet(WagtailMenuRegisterable):
|
|||
in addition to any kwargs passed to this method. Items from get_common_view_kwargs will be
|
||||
filtered to only include those that are valid for the given view_class.
|
||||
"""
|
||||
merged_kwargs = self.get_common_view_kwargs()
|
||||
merged_kwargs.update(kwargs)
|
||||
filtered_kwargs = {
|
||||
key: value
|
||||
for key, value in self.get_common_view_kwargs().items()
|
||||
if hasattr(view_class, key)
|
||||
for key, value in merged_kwargs.items()
|
||||
if hasattr(view_class, key) and value is not self.UNDEFINED
|
||||
}
|
||||
filtered_kwargs.update(kwargs)
|
||||
return view_class.as_view(**filtered_kwargs)
|
||||
|
||||
def inject_view_methods(self, view_class, method_names):
|
||||
|
|
|
@ -29,7 +29,7 @@ class ChooserViewSet(ViewSet):
|
|||
) #: Label for the 'choose' button in the chooser widget, when an item has already been chosen
|
||||
edit_item_text = _("Edit") #: Label for the 'edit' button in the chooser widget
|
||||
|
||||
per_page = 10 #: Number of results to show per page
|
||||
per_page = ViewSet.UNDEFINED #: Number of results to show per page
|
||||
|
||||
#: A list of URL query parameters that should be passed on unmodified as part of any links or
|
||||
#: form submissions within the chooser modal workflow.
|
||||
|
|
|
@ -1681,6 +1681,22 @@ class TestImageChooserView(WagtailTestUtils, TestCase):
|
|||
response = self.get({"p": 9999})
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
@override_settings(WAGTAILIMAGES_CHOOSER_PAGE_SIZE=4)
|
||||
def test_chooser_page_size(self):
|
||||
images = [
|
||||
Image(
|
||||
title="Test image %i" % i,
|
||||
file=get_test_image_file(size=(1, 1)),
|
||||
)
|
||||
for i in range(1, 12)
|
||||
]
|
||||
Image.objects.bulk_create(images)
|
||||
|
||||
response = self.get()
|
||||
|
||||
self.assertContains(response, "Page 1 of 3")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
def test_filter_by_tag(self):
|
||||
for i in range(0, 10):
|
||||
image = Image.objects.create(
|
||||
|
|
|
@ -72,10 +72,15 @@ class ImageCreationFormMixin(CreationFormMixin):
|
|||
class BaseImageChooseView(BaseChooseView):
|
||||
template_name = "wagtailimages/chooser/chooser.html"
|
||||
results_template_name = "wagtailimages/chooser/results.html"
|
||||
per_page = 12
|
||||
ordering = "-created_at"
|
||||
construct_queryset_hook_name = "construct_image_chooser_queryset"
|
||||
|
||||
@property
|
||||
def per_page(self):
|
||||
# Make per_page into a property so that we can read back WAGTAILIMAGES_CHOOSER_PAGE_SIZE
|
||||
# at runtime.
|
||||
return getattr(settings, "WAGTAILIMAGES_CHOOSER_PAGE_SIZE", 20)
|
||||
|
||||
def get_object_list(self):
|
||||
return (
|
||||
permission_policy.instances_user_has_any_permission_for(
|
||||
|
@ -309,7 +314,6 @@ class ImageChooserViewSet(ChooserViewSet):
|
|||
preserve_url_parameters = ChooserViewSet.preserve_url_parameters + ["select_format"]
|
||||
|
||||
icon = "image"
|
||||
per_page = getattr(settings, "WAGTAILIMAGES_CHOOSER_PAGE_SIZE", 10)
|
||||
choose_one_text = _("Choose an image")
|
||||
create_action_label = _("Upload")
|
||||
create_action_clicked_label = _("Uploading…")
|
||||
|
|
Ładowanie…
Reference in New Issue