Source code for latex_to_myst.main

#!/usr/bin/env python
import re
import logging
import sys
import panflute as pf
from panflute.elements import Doc
from latex_to_myst.latex_math import create_amsthm_blocks, create_displaymath
from latex_to_myst.figures import create_image, create_subplots
from latex_to_myst.directive import (
    elem_has_multiple_figures,
    image_in_subplot,
    is_directive_block,
    directive_level,
    SUPPORTED_AMSTHM_BLOCKS,
)

section_labels_to_insert = {}
logging.basicConfig(
    format="[%(levelname)8s] %(message)s", stream=sys.stderr, level=logging.ERROR
)
logger = logging.getLogger(__name__)


[docs]def get_element_type(elem: pf.Element): if isinstance(elem, pf.Image): return "figure" if isinstance(elem, pf.Para): if elem_has_multiple_figures(elem): return "subfigures" if isinstance(elem, pf.Math): if elem.format == "DisplayMath": return "displaymath" if isinstance(elem, pf.Div): if elem.classes: if any([k in SUPPORTED_AMSTHM_BLOCKS for k in elem.classes]): return "amsthm" if isinstance(elem, pf.Header): return "header" return None
[docs]def is_isolated_label(elem, doc=None): """check if element is isolated label""" if not elem: return False if len(elem.content) == 1: if isinstance(elem.content[0], pf.Span): if "label" in elem.content[0].attributes: label = elem.content[0].attributes["label"] if pf.stringify(elem).strip(" \n\r") == f"[{label}]": return label return False
[docs]def action(elem: pf.Element, doc: pf.Doc = None) -> pf.Element: if isinstance(elem, pf.Para) and is_isolated_label(elem): return [] if isinstance(elem, pf.Doc): return elem if isinstance(elem, pf.Str): # remove section and figure before references. if isinstance(elem.next, pf.Link) or ( elem.next is not None and isinstance(elem.next.next, pf.Link) ): if elem.text.lower() in ["section", "figure", "fig"]: return [] return elem if isinstance(elem, pf.Header): if is_isolated_label(elem.next): logger.debug("Header is followed by isolated header on next line.") label = is_isolated_label(elem.next) else: label = elem.identifier section_labels_to_insert[elem] = label elem.identifier = "" elem.classes = [] return elem if isinstance(elem, pf.Para): if elem_has_multiple_figures(elem): logger.debug("Para element is actually subfigure.") return create_subplots(elem, doc) return elem if isinstance(elem, pf.Table): if elem_has_multiple_figures(elem): logger.debug("Table element is actually subfigure.") return create_subplots(elem, doc) return elem if isinstance(elem, pf.Image): if image_in_subplot(elem): return elem logger.debug("Creating Figure.") return create_image(elem, doc) if isinstance(elem, pf.Link): if not elem.attributes: if "http" in elem.url: elem.url = "".join(elem.url.split(" ")) return elem if hasattr(elem, "attributes") and "reference" in elem.attributes: target = str(elem.attributes["reference"]) elem.attributes = {} elem.url = target if target in doc.element_labels: target_elem = doc.element_labels[target] target_type = get_element_type(target_elem) if not target_type: return elem if target_type in ["figure"]: return pf.RawInline("{numref}`%s`" % target, format="markdown") if target_type in ["subfigures", "dislaymath", "header"]: return pf.RawInline("{ref}`%s`" % target, format="markdown") if target_type in ["amsthm"]: return pf.RawInline("{prf:ref}`%s`" % target, format="markdown") if target_type in ["displaymath"]: return pf.RawInline("{eq}`%s`" % target, format="markdown") logger.error(f"Link to target type {target_type} not understood.") else: logger.error(f"Link to target {target} not found.") return elem if isinstance(elem, pf.CodeBlock): elem.attributes = {} return elem if isinstance(elem, pf.Div): if elem.classes: return create_amsthm_blocks(elem, doc) return elem if isinstance(elem, pf.Math): if elem.format == "DisplayMath": return create_displaymath(elem, doc) return elem return elem
[docs]def finalize(doc: pf.Doc): # add in title labels for n, (elem, label) in enumerate(section_labels_to_insert.items()): idx = doc.content.index(elem) doc.content.insert(idx, pf.Para(pf.Str(f"({label.strip()})=")))
[docs]def prepare(doc: pf.Doc): # determine level of blocks block_levels = {} def get_level(e, d): if is_directive_block(e): level = directive_level(e, doc) block_levels[e] = level doc.walk(get_level) doc.element_levels = block_levels # determine labels of blocks for hyperlinks block_labels = {} def gather_labels(e, doc): if hasattr(e, "identifier"): if e.identifier: block_labels[e.identifier] = e elif get_element_type(e) == "displaymath": label = "eqn" if "\label" in pf.stringify(e): label = re.findall(r"\\label\{([^\}]+)\}", e.text)[0] block_labels[label] = e doc.walk(gather_labels) doc.element_labels = block_labels
[docs]def main(doc=None): return pf.run_filter(action, doc=doc, finalize=finalize, prepare=prepare)
if __name__ == "__main__": main()