Source code for innoconv.runner

"""
The innoConv runner is the core of the conversion process.

It traverses the source directory recursively and finds all content files.
These are converted one-by-one to JSON. Under the hood is uses
`Pandoc <https://pandoc.org/>`_.

It receives a list of extensions that are instantiated and notified upon
certain events. The events are documented in
:class:`AbstractExtension <innoconv.ext.abstract.AbstractExtension>`.
"""

import json
import logging
from os import makedirs, walk
from os.path import abspath, dirname, exists, isdir, join, relpath
import pathlib

from innoconv.constants import (
    CONTENT_BASENAME,
    FOOTER_FRAGMENT_PREFIX,
    PAGES_FOLDER,
)
from innoconv.ext import EXTENSIONS
from innoconv.utils import to_ast


[docs]class InnoconvRunner: """ Convert content files in a directory tree. :param source_dir: Content source directory. :type source_dir: str :param output_dir: Output directory. :type output_dir: str :param manifest: Content manifest. :type manifest: innoconv.manifest.Manifest :param extensions: List of extension names to use. :type extensions: list[str] """ def __init__(self, source_dir, output_dir, manifest, extensions): """Initialize InnoconvRunner.""" self._source_dir = source_dir self._output_dir = output_dir self._manifest = manifest self._extensions = [] self._load_extensions(extensions) self._sections = []
[docs] def run(self): """Start the conversion by iterating over language folders.""" self._notify_extensions("start", self._output_dir, self._source_dir) for i, language in enumerate(self._manifest.languages): self._notify_extensions("pre_conversion", language) self._convert_language_folder(language, i) self._notify_extensions("post_conversion", language) self._notify_extensions("finish")
def _convert_language_folder(self, language, lang_num): path = abspath(join(self._source_dir, language)) if not isdir(path): raise RuntimeError(f"Error: Directory {path} does not exist") section_num = 0 for root, dirs, files in walk(path): rel_path = relpath(root, path) # skip meta directories like '_static' parts = pathlib.Path(rel_path).parts if parts and parts[0].startswith("_"): continue # note: all dirs manipulation must happen in-place! dirs.sort() # sort section names # process section content_filename = f"{CONTENT_BASENAME}.md" if content_filename in files: filepath = join(root, content_filename) self._process_section(filepath, lang_num, section_num) section_num += 1 else: raise RuntimeError(f"Found section without content file: {root}") if section_num != len(self._sections): msg = ( "Inconsistent directory structure: " f"Language {language} is missing sections." ) raise RuntimeError(msg) # process pages try: pages = self._manifest.pages except AttributeError: pages = [] for page in pages: self._process_page(page, language) # process footer fragments self._process_footer_fragments(language) def _process_section(self, filepath, lang_num, section_num): rel_path = dirname(relpath(filepath, self._source_dir)) section_name = rel_path[3:] # strip language if lang_num == 0: # record sections for first language self._sections.append(section_name) else: # ensure sections are identical for all languages try: if self._sections[section_num] != section_name: msg = ( "Inconsistent directory structure: " f"Section {rel_path} differs." ) raise RuntimeError(msg) except IndexError as err: msg = ( "Inconsistent directory structure: " f"Extra section {rel_path} present." ) raise RuntimeError(msg) from err # full filepath output_filename = f"{CONTENT_BASENAME}.json" filepath_out = join(self._output_dir, rel_path, output_filename) # convert file using pandoc self._notify_extensions("pre_process_file", rel_path) ast, title, short_title, section_type = to_ast(filepath) self._notify_extensions( "post_process_file", ast, title, "section", section_type, short_title ) # write file content makedirs(dirname(filepath_out), exist_ok=True) with open(filepath_out, "w") as out_file: json.dump(ast, out_file) logging.info("Wrote %s", filepath_out) return title def _process_page(self, page, language): try: link_in_nav = page["link_in_nav"] except KeyError: link_in_nav = False try: link_in_footer = page["link_in_footer"] except KeyError: link_in_footer = False if not (link_in_nav or link_in_footer): logging.warning( "Page '%s' should have at least on of " "link_in_nav or link_in_nav set to true." ) return try: page["title"] except KeyError: page["title"] = {} input_filename = f"{page['id']}.md" filepath = join(self._source_dir, language, PAGES_FOLDER, input_filename) rel_path = dirname(relpath(filepath, self._source_dir)) # convert self._notify_extensions("pre_process_file", rel_path) ast, title, short_title, _ = to_ast(filepath) self._notify_extensions("post_process_file", ast, title, "page", None) try: page["short_title"][language] = short_title except KeyError: page["short_title"] = {language: short_title} page["title"][language] = title # write json output output_filename = f"{page['id']}.json" filepath_out = join(self._output_dir, rel_path, output_filename) makedirs(dirname(filepath_out), exist_ok=True) with open(filepath_out, "w") as out_file: json.dump(ast, out_file) logging.info("Wrote %s", filepath_out) def _process_footer_fragments(self, language): for part in ("a", "b"): input_filename = f"{FOOTER_FRAGMENT_PREFIX}{part}.md" filepath = join(self._source_dir, language, input_filename) rel_path = dirname(relpath(filepath, self._source_dir)) if not exists(filepath): logging.warning("Footer fragment %s does not exist.", filepath) continue # convert self._notify_extensions("pre_process_file", rel_path) ast, title, _, _ = to_ast(filepath, ignore_missing_title=True) self._notify_extensions("post_process_file", ast, title, "fragment", None) # write json output output_filename = f"{FOOTER_FRAGMENT_PREFIX}{part}.json" filepath_out = join(self._output_dir, rel_path, output_filename) makedirs(dirname(filepath_out), exist_ok=True) with open(filepath_out, "w") as out_file: json.dump(ast, out_file) logging.info("Wrote %s", filepath_out) def _notify_extensions(self, event_name, *args, **kwargs): for ext in self._extensions: func = getattr(ext, event_name) func(*args, **kwargs) def _load_extensions(self, extensions): # load extensions for ext_name in extensions: try: self._extensions.append(EXTENSIONS[ext_name](self._manifest)) except (ImportError, KeyError) as exc: raise RuntimeError(f"Extension {ext_name} not found!") from exc # pass extension list to extenions self._notify_extensions("extension_list", self._extensions)