diff --git a/CHANGELOG.txt b/CHANGELOG.txt index a68e9c730c..d31dac8d70 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -14,6 +14,7 @@ Changelog * Removed `assert` statements from Wagtail API (Kim Chee Leong) * Update `jquery-datetimepicker` dependency to make Wagtail more CSP-friendly (`unsafe-eval`) (Mike Kamermans) * Added error notification when running the `wagtail` command on Python <3.4 (Matt Westcott) + * `update_index` management command now accepts a `--chunk_size` option to determine the number of items to load at once (Dave Bell) * Fix: Status button on 'edit page' now links to the correct URL when live and draft slug differ (LB (Ben Johnston)) * Fix: Image title text in the gallery and in the chooser now wraps for long filenames (LB (Ben Johnston), Luiz Boaretto) * Fix: Move image editor action buttons to the bottom of the form on mobile (Julian Gallo) diff --git a/docs/reference/management_commands.rst b/docs/reference/management_commands.rst index d2dd0ca7ca..656ba76824 100644 --- a/docs/reference/management_commands.rst +++ b/docs/reference/management_commands.rst @@ -80,6 +80,8 @@ For example, to update just the default backend: $ python manage.py update_index --backend default +The ``--chunk_size`` option can be used to set the size of chunks that are indexed at a time. This defaults to +1000 but may need to be reduced for larger document sizes. Indexing the schema only ```````````````````````` diff --git a/docs/releases/2.1.rst b/docs/releases/2.1.rst index c56e45102e..b37bd6b8cc 100644 --- a/docs/releases/2.1.rst +++ b/docs/releases/2.1.rst @@ -28,6 +28,7 @@ Other features * Removed ``assert`` statements from Wagtail API (Kim Chee Leong) * Update `jquery-datetimepicker` dependency to make Wagtail more CSP-friendly (`unsafe-eval`) (Mike Kamermans) * Added error notification when running the ``wagtail`` command on Python <3.4 (Matt Westcott) + * ``update_index`` management command now accepts a ``--chunk_size`` option to determine the number of items to load at once (Dave Bell) Bug fixes ~~~~~~~~~ diff --git a/wagtail/search/management/commands/update_index.py b/wagtail/search/management/commands/update_index.py index d52dc65213..59623dc225 100644 --- a/wagtail/search/management/commands/update_index.py +++ b/wagtail/search/management/commands/update_index.py @@ -7,6 +7,8 @@ from django.db import transaction from wagtail.search.backends import get_search_backend from wagtail.search.index import get_indexed_models +DEFAULT_CHUNK_SIZE = 1000 + def group_models_by_index(backend, models): """ @@ -50,7 +52,7 @@ def group_models_by_index(backend, models): class Command(BaseCommand): - def update_backend(self, backend_name, schema_only=False): + def update_backend(self, backend_name, schema_only=False, chunk_size=DEFAULT_CHUNK_SIZE): self.stdout.write("Updating backend: " + backend_name) backend = get_search_backend(backend_name) @@ -80,8 +82,8 @@ class Command(BaseCommand): for model in models: self.stdout.write('{}: {}.{} '.format(backend_name, model._meta.app_label, model.__name__).ljust(35), ending='') - # Add items (1000 at a time) - for chunk in self.print_iter_progress(self.queryset_chunks(model.get_indexed_objects().order_by('pk'))): + # Add items (chunk_size at a time) + for chunk in self.print_iter_progress(self.queryset_chunks(model.get_indexed_objects().order_by('pk'), chunk_size)): index.add_items(model, chunk) object_count += len(chunk) @@ -100,6 +102,9 @@ class Command(BaseCommand): parser.add_argument( '--schema-only', action='store_true', dest='schema_only', default=False, help="Prevents loading any data into the index") + parser.add_argument( + '--chunk_size', action='store', dest='chunk_size', default=DEFAULT_CHUNK_SIZE, + help="Set number of records to be fetched at once for inserting into the index") def handle(self, **options): # Get list of backends to index @@ -115,7 +120,10 @@ class Command(BaseCommand): # Update backends for backend_name in backend_names: - self.update_backend(backend_name, schema_only=options.get('schema_only', False)) + self.update_backend( + backend_name, + schema_only=options.get('schema_only', False), chunk_size=options.get('chunk_size') + ) def print_newline(self): self.stdout.write('') @@ -145,7 +153,7 @@ class Command(BaseCommand): # Atomic so the count of models doesnt change as it is iterated @transaction.atomic - def queryset_chunks(self, qs, chunk_size=1000): + def queryset_chunks(self, qs, chunk_size=DEFAULT_CHUNK_SIZE): """ Yield a queryset in chunks of at most ``chunk_size``. The chunk yielded will be a list, not a queryset. Iterating over the chunks is done in a diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index 07faa413df..228985aa7d 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -34,7 +34,7 @@ class BackendTests(WagtailTestUtils): # no conf entry found - skip tests for this backend raise unittest.SkipTest("No WAGTAILSEARCH_BACKENDS entry for the backend %s" % self.backend_path) - management.call_command('update_index', backend_name=self.backend_name, stdout=StringIO()) + management.call_command('update_index', backend_name=self.backend_name, stdout=StringIO(), chunk_size=50) def assertUnsortedListEqual(self, a, b): """