Source code for innoconv.ext.index_terms

"""
Scan documents for index terms.

Viewers may generate a list of words with links to locations in the documents.
In order to achieve this a list of index terms is provided in the
:class:`Manifest <innoconv.manifest.Manifest>` so viewers don't have to scan
the whole documents themselves. Also for every occurence of an index term in
the text an ID is attached.

This extension modifies the AST.

.. note::

  Index terms are not supported in custom pages.
"""

from slugify import slugify

from innoconv.ext.abstract import AbstractExtension
from innoconv.traverse_ast import TraverseAst

INDEX_ATTRIBUTE = "data-index-term"

INDEX_ID_TEMPLATE = "index-term-{}-{}"


[docs]class IndexTerms(AbstractExtension): """Scan the documents for index terms.""" _helptext = "Scan the documents for index terms and write them to the manifest." def __init__(self, *args, **kwargs): """Initialize variables.""" super().__init__(*args, **kwargs) self._current_section_name = None self._language = None self._index_terms = {} self._page_occurences = None def _handle_index_term(self, elem, index_term): index_term_slug = slugify(index_term) # sequentially number IDs per section try: number = self._page_occurences[index_term_slug] + 1 except KeyError: number = 0 self._page_occurences[index_term_slug] = number occurence_id = INDEX_ID_TEMPLATE.format(index_term_slug, number) elem["c"][0][0] = occurence_id # add to manifest field entry = [ self._current_section_name, f"{index_term_slug}-{number}", ] try: self._index_terms[self._language][index_term_slug][1].append(entry) except KeyError: self._index_terms[self._language][index_term_slug] = [ index_term, [entry], ]
[docs] def process_element(self, elem, _): """Respond to AST element.""" if elem["t"] == "Span": attrs = dict(elem["c"][0][2]) if INDEX_ATTRIBUTE in attrs.keys(): self._handle_index_term(elem, attrs["data-index-term"])
[docs] def pre_conversion(self, language): """Remember current conversion language.""" self._language = language self._index_terms[language] = {}
[docs] def pre_process_file(self, path): """Remember current path.""" self._current_section_name = path[3:] # strip language self._page_occurences = {}
[docs] def post_process_file( self, ast, title, content_type, section_type=None, short_title=None ): """Scan the AST.""" if content_type == "section": TraverseAst(self.process_element).traverse(ast)
[docs] def manifest_fields(self): """Add `index_terms` field to manifest.""" return {"index_terms": self._index_terms}