From 603dc650e58f17f3369f6e98759212ae1c349a6e Mon Sep 17 00:00:00 2001 From: Matt Westcott Date: Wed, 21 Oct 2020 01:12:26 +0100 Subject: [PATCH] Eliminate stream_data for StreamValue internal storage Since stream_data existed in lazy (JSONish dicts) and non-lazy (tuples containing native values) variants, retrieval and serialisation each required two separate code paths. We now have a single representation, consisting of the raw JSONish data in _raw_data (unused if we were initialised with native data) and a cache of StreamChild objects in _bound_blocks (prefilled if we were initialised with native data). --- wagtail/core/blocks/stream_block.py | 133 +++++++++++++++++----------- 1 file changed, 81 insertions(+), 52 deletions(-) diff --git a/wagtail/core/blocks/stream_block.py b/wagtail/core/blocks/stream_block.py index 958233d96e..29f716428b 100644 --- a/wagtail/core/blocks/stream_block.py +++ b/wagtail/core/blocks/stream_block.py @@ -391,8 +391,12 @@ class StreamValue(Sequence): class StreamChild(BoundBlock): """ - Extends BoundBlock with methods that make logical sense in the context of - children of StreamField, but not necessarily elsewhere that BoundBlock is used + Iterating over (or indexing into) a StreamValue returns instances of StreamChild. + These are wrappers for the individual data items in the stream, extending BoundBlock + (which keeps track of the data item's corresponding Block definition object, and provides + the `render` method to render itself with a template) with an `id` property (a UUID + assigned to the item - this is managed by the enclosing StreamBlock and is not a property + of blocks in general) and a `block_type` property. """ def __init__(self, *args, **kwargs): @@ -429,91 +433,116 @@ class StreamValue(Sequence): with the raw text accessible under the `raw_text` attribute, so that migration code can be rewritten to convert it as desired. """ - self.is_lazy = is_lazy self.stream_block = stream_block # the StreamBlock object that handles this value - self.stream_data = stream_data # a list of dicts (when lazy) or tuples (when non-lazy) as outlined above - self._bound_blocks = {} # populated lazily from stream_data as we access items through __getitem__ + self.is_lazy = is_lazy + self._length = len(stream_data) self.raw_text = raw_text - def __getitem__(self, i): - if i not in self._bound_blocks: - if self.is_lazy: - raw_value = self.stream_data[i] - type_name = raw_value['type'] - child_block = self.stream_block.child_blocks[type_name] - self._prefetch_blocks(type_name, child_block) - return self._bound_blocks[i] - else: + if is_lazy: + # store raw stream data in _raw_data; on retrieval it will be converted to a native + # value (via block.to_python) and wrapped as a StreamValue, and cached in _bound_blocks. + self._raw_data = stream_data + self._bound_blocks = {} + else: + # store native stream data in _bound_blocks; on serialization it will be converted to + # a JSON-ish representation via block.get_prep_value. + self._bound_blocks = {} + for i, item in enumerate(stream_data): try: - type_name, value, block_id = self.stream_data[i] + type_name, value, block_id = item except ValueError: - type_name, value = self.stream_data[i] + type_name, value = item block_id = None - child_block = self.stream_block.child_blocks[type_name] + block_def = self.stream_block.child_blocks[type_name] + self._bound_blocks[i] = StreamValue.StreamChild(block_def, value, id=block_id) - self._bound_blocks[i] = StreamValue.StreamChild(child_block, value, id=block_id) + def __getitem__(self, i): + if i not in range(0, self._length): + raise IndexError("list index out of range") + + if i not in self._bound_blocks: + raw_value = self._raw_data[i] + self._prefetch_blocks(raw_value['type']) return self._bound_blocks[i] - def _prefetch_blocks(self, type_name, child_block): - """Prefetch all child blocks for the given `type_name` using the - given `child_blocks`. - - This prevents n queries for n blocks of a specific type. + def _prefetch_blocks(self, type_name): """ + Populate _bound_blocks with all items in this stream of type `type_name` that exist in + _raw_data but do not already exist in _bound_blocks. + + Fetching is done via the block's bulk_to_python method, so that database lookups are + batched into a single query where possible. + """ + child_block = self.stream_block.child_blocks[type_name] # create a mapping of all the child blocks matching the given block type, # mapping (index within the stream) => (raw block value) raw_values = OrderedDict( - (i, item['value']) for i, item in enumerate(self.stream_data) - if item['type'] == type_name + (i, raw_item['value']) for i, raw_item in enumerate(self._raw_data) + if raw_item['type'] == type_name and i not in self._bound_blocks ) # pass the raw block values to bulk_to_python as a list converted_values = child_block.bulk_to_python(raw_values.values()) - # reunite the converted values with their stream indexes + # reunite the converted values with their stream indexes, along with the block ID + # if one exists for i, value in zip(raw_values.keys(), converted_values): - # also pass the block ID to StreamChild, if one exists for this stream index - block_id = self.stream_data[i].get('id') - self._bound_blocks[i] = StreamValue.StreamChild(child_block, value, id=block_id) + self._bound_blocks[i] = StreamValue.StreamChild( + child_block, value, id=self._raw_data[i].get('id') + ) def get_prep_value(self): prep_value = [] - for i, stream_data_item in enumerate(self.stream_data): - if self.is_lazy and i not in self._bound_blocks: - # This child has not been accessed as a bound block, so its raw JSONish - # value (stream_data_item here) is still valid - prep_value_item = stream_data_item - - # As this method is preparing this value to be saved to the database, - # this is an appropriate place to ensure that each block has a unique id. - prep_value_item['id'] = prep_value_item.get('id', str(uuid.uuid4())) + for i in range(0, self._length): + if i in self._bound_blocks: + # Convert the native value back into raw JSONish data + item = self._bound_blocks[i] + if not item.id: + item.id = str(uuid.uuid4()) + prep_value.append({ + 'type': item.block_type, + 'value': item.block.get_prep_value(item.value), + 'id': item.id, + }) else: - # convert the bound block back into JSONish data - child = self[i] - # As this method is preparing this value to be saved to the database, - # this is an appropriate place to ensure that each block has a unique id. - child.id = child.id or str(uuid.uuid4()) - prep_value_item = { - 'type': child.block.name, - 'value': child.block.get_prep_value(child.value), - 'id': child.id, - } + # item has not been converted to a BoundBlock, so its _raw_data entry is + # still usable (but ensure it has an ID before returning it) - prep_value.append(prep_value_item) + raw_item = self._raw_data[i] + if not raw_item.get('id'): + raw_item['id'] = str(uuid.uuid4()) + + prep_value.append(raw_item) return prep_value def __eq__(self, other): - if not isinstance(other, StreamValue): + if not isinstance(other, StreamValue) or len(other) != self._length: return False - return self.stream_data == other.stream_data + # scan both lists for non-matching items + for i in range(0, self._length): + if i not in self._bound_blocks and i not in other._bound_blocks: + # compare raw values as a shortcut to save the conversion step + if self._raw_data[i] != other._raw_data[i]: + return False + else: + this_item = self[i] + other_item = other[i] + if ( + this_item.block_type != other_item.block_type + or this_item.id != other_item.id + or this_item.value != other_item.value + ): + return False + + return True def __len__(self): - return len(self.stream_data) + return self._length def __repr__(self): return repr(list(self))