Source code for innoconv.extensions.join_strings

"""
Merge consecutive sequences of strings and spaces into a single string element.

The motivation behind this extension is to make the AST more readable and also
to save space by compressing the representation. The actual appearance in a
viewer should remain completely untouched.

This extension modifies the AST.

=======
Example
=======

+--------+----------------------------------------------------------------------+
| Before | ``{"t": "Str", "c": "Foo"},{"t": "Space"},{"t": "Str", "c": "b!"}]`` |
+--------+----------------------------------------------------------------------+
| After  | ``{"t": "Str", "c": "Foo b!"}]``                                     |
+--------+----------------------------------------------------------------------+
"""  # noqa: E501

from innoconv.extensions.abstract import AbstractExtension

#: Type that represents a string
STR_TYPE = 'Str'

#: Content types that are merged
TYPES_TO_MERGE = (STR_TYPE, 'Space', 'SoftBreak')


[docs]class JoinStrings(AbstractExtension):
    """Merge consecutive strings and spaces in the AST."""

    _helptext = "Merge sequences of strings and spaces in the AST."

    def __init__(self, *args, **kwargs):
        """Initialize variables."""
        super(JoinStrings, self).__init__(*args, **kwargs)
        self.previous_element = None  # the element we merge to

    # content parsing

    def _process_ast_element(self, ast_element):
        """Process a single element in the AST.

        Descend further down if possible.
        """
        self.previous_element = None   # Stop merging on new element
        if isinstance(ast_element, list):
            self._process_ast_array(ast_element)
            return
        try:
            for key in ast_element:
                self._process_ast_element(ast_element[key])
        except TypeError:
            pass

    def _process_ast_array(self, ast_array):
        """
        Iterate over elements in AST.

        The first instance of mergeable content is stored in
        self.previous_element. Every subsequent instance of mergeable content
        gets added to the first instance and finally removed.
        """
        def is_string_or_space(content_element):
            """Check if an ast element is mergeable, i.e. String or Space."""
            try:
                return content_element['t'] in TYPES_TO_MERGE
            except (TypeError, KeyError):  # could be an invalid dictionary
                return False
        self.previous_element = None
        to_delete = set()
        for pos, ast_element in enumerate(ast_array):
            if is_string_or_space(ast_element):
                if self.previous_element is None:
                    self._prepare_previous_element(ast_element)
                else:
                    self._merge_to_previous_element(ast_element)
                    to_delete.add(pos)
            else:
                self._process_ast_element(ast_element)
        removed_items = 0  # remember number of deleted items to adjust index
        for index in to_delete:
            del ast_array[index - removed_items]
            removed_items += 1
        # Necessary for when we finish an element list and go back to a list
        # that has been processed already which contained it.
        self.previous_element = None

    def _prepare_previous_element(self, content_element):
        """Normalize self.previous_element to always be a Str."""
        self.previous_element = content_element
        if self.previous_element['t'] != STR_TYPE:
            self.previous_element['t'] = STR_TYPE
            self.previous_element['c'] = ' '

    def _merge_to_previous_element(self, content_element):
        if content_element['t'] == STR_TYPE:
            self.previous_element['c'] += content_element['c']
        else:
            if not self.previous_element['c'].endswith(' '):
                self.previous_element['c'] += ' '

    # extension events

    def start(self, output_dir, source_dir):
        """Unused."""

    def pre_conversion(self, language):
        """Unused."""

    def pre_process_file(self, path):
        """Unused."""

[docs]    def post_process_file(self, ast, _):
        """Process AST in-place."""
        self._process_ast_element(ast)

    def post_conversion(self, language):
        """Unused."""

    def finish(self):
        """Unused."""