diff --git a/wagtail/admin/rich_text/converters/html_to_contentstate.py b/wagtail/admin/rich_text/converters/html_to_contentstate.py index a0e5423621..7782b62a41 100644 --- a/wagtail/admin/rich_text/converters/html_to_contentstate.py +++ b/wagtail/admin/rich_text/converters/html_to_contentstate.py @@ -9,13 +9,19 @@ from wagtail.core.rich_text import features as feature_registry from wagtail.core.models import Page +# constants to keep track of what to do with leading whitespace on the next text node we encounter +STRIP_WHITESPACE = 0 +KEEP_WHITESPACE = 1 +FORCE_WHITESPACE = 2 + + class HandlerState(object): def __init__(self): self.current_block = None self.current_inline_styles = [] self.current_entity_ranges = [] # what to do with leading whitespace on the next text node we encounter: strip, keep or force - self.leading_whitespace = 'strip' + self.leading_whitespace = STRIP_WHITESPACE self.list_depth = 0 self.list_item_type = None self.pushed_states = [] @@ -72,7 +78,7 @@ class BlockElementHandler(object): block = self.create_block(name, dict(attrs), state, contentstate) contentstate.blocks.append(block) state.current_block = block - state.leading_whitespace = 'strip' + state.leading_whitespace = STRIP_WHITESPACE def handle_endtag(self, name, state, contentState): assert not state.current_inline_styles, "End of block reached without closing inline style elements" @@ -98,11 +104,11 @@ class InlineStyleElementHandler(object): def handle_starttag(self, name, attrs, state, contentstate): assert state.current_block is not None, "%s element found at the top level" % name - if state.leading_whitespace == 'force': + if state.leading_whitespace == FORCE_WHITESPACE: # any pending whitespace should be output before handling this tag, # and subsequent whitespace should be collapsed into it (= stripped) state.current_block.text += ' ' - state.leading_whitespace = 'strip' + state.leading_whitespace = STRIP_WHITESPACE inline_style_range = InlineStyleRange(self.style) inline_style_range.offset = len(state.current_block.text) @@ -126,11 +132,11 @@ class InlineEntityElementHandler(object): def handle_starttag(self, name, attrs, state, contentstate): assert state.current_block is not None, "%s element found at the top level" % name - if state.leading_whitespace == 'force': + if state.leading_whitespace == FORCE_WHITESPACE: # any pending whitespace should be output before handling this tag, # and subsequent whitespace should be collapsed into it (= stripped) state.current_block.text += ' ' - state.leading_whitespace = 'strip' + state.leading_whitespace = STRIP_WHITESPACE attrs = dict(attrs) @@ -230,7 +236,7 @@ class HtmlToContentStateHandler(HTMLParser): def add_block(self, block): self.contentstate.blocks.append(block) self.state.current_block = block - self.state.leading_whitespace = 'strip' + self.state.leading_whitespace = STRIP_WHITESPACE def handle_starttag(self, name, attrs): element_handler = self.element_handlers.match(name, dict(attrs)) @@ -263,29 +269,29 @@ class HtmlToContentStateHandler(HTMLParser): self.add_block(Block('unstyled', depth=self.state.list_depth)) if content == ' ': - # if leading_whitespace = 'strip', this whitespace node is not significant + # if leading_whitespace = strip, this whitespace node is not significant # and should be skipped. - # For other cases, _don't_ output the whitespace yet, but set leading_whitespace = 'force' + # For other cases, _don't_ output the whitespace yet, but set leading_whitespace = force # so that a space is forced before the next text node or inline element. If no such node # appears (= we reach the end of the block), the whitespace can rightfully be dropped. - if self.state.leading_whitespace != 'strip': - self.state.leading_whitespace = 'force' + if self.state.leading_whitespace != STRIP_WHITESPACE: + self.state.leading_whitespace = FORCE_WHITESPACE else: # strip or add leading whitespace according to the leading_whitespace flag - if self.state.leading_whitespace == 'strip': + if self.state.leading_whitespace == STRIP_WHITESPACE: content = content.lstrip() - elif self.state.leading_whitespace == 'force' and not content.startswith(' '): + elif self.state.leading_whitespace == FORCE_WHITESPACE and not content.startswith(' '): content = ' ' + content if content.endswith(' '): # don't output trailing whitespace yet, because we want to discard it if the end - # of the block follows. Instead, we'll set leading_whitespace = 'force' so that + # of the block follows. Instead, we'll set leading_whitespace = force so that # any following text or inline element will be prefixed by a space content = content.rstrip() - self.state.leading_whitespace = 'force' + self.state.leading_whitespace = FORCE_WHITESPACE else: # no trailing whitespace here - any leading whitespace at the start of the # next text node should be respected - self.state.leading_whitespace = 'keep' + self.state.leading_whitespace = KEEP_WHITESPACE self.state.current_block.text += content