at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html.FormulaConverter.java Source code

Java tutorial

Introduction

Here is the source code for at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html.FormulaConverter.java

Source

package at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html;

import at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html.elements.Formula;
import at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html.snugglepkgs.SnugglePackageRegistry;
import org.apache.log4j.Logger;
import org.jdom2.Content;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Text;
import org.jdom2.filter.Filters;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import uk.ac.ed.ph.snuggletex.*;
import uk.ac.ed.ph.snuggletex.internal.FrozenSlice;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/*
 * The MIT License (MIT)
 * latex2mobi -- LaTeX Formulas to Mobi Converter
 * Copyright (c) 2014 Michael Au
 * <p/>
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * <p/>
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 * <p/>
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 * <p/>
 * <p/>
 * For Third Party Software Licenses read LICENSE file in base dir.
 */

/**
 * This abstract class provides shared routines for implementing formula converters.
 * <p/>
 * Provides mechanisms for finding, replacing and parsing formulas from the resulting file generated by Pandoc
 *
 * @author Michael Au
 *         Date: 08.06.14
 * @see at.ac.tuwien.ims.latex2mobiformulaconv.converter.latex2html.PandocLatexToHtmlConverter
 */
public abstract class FormulaConverter {
    public static final String FORMULA_ID_PREFIX = "formula_";
    private static final Logger logger = Logger.getLogger(FormulaConverter.class);
    protected static SnuggleEngine engine = new SnuggleEngine();
    private static XPathFactory xPathFactory = XPathFactory.instance();
    protected static XPathExpression<Element> xpath = xPathFactory.compile("//*[@class='LaTeX']",
            Filters.element());

    static {
        // Add special SnuggleTeX configuration for certain Elements
        for (SnugglePackage p : SnugglePackageRegistry.getPackages()) {
            engine.addPackage(p);
        }

        XMLStringOutputOptions xmlStringOutputOptions = new XMLStringOutputOptions();
        xmlStringOutputOptions.setEncoding("UTF-8");
        xmlStringOutputOptions.setIndenting(true);
        engine.setDefaultXMLStringOutputOptions(xmlStringOutputOptions);
    }

    /**
     * This path will hold the temporary generated files
     */
    protected Path tempDirPath = null;
    /**
     * If true, an Implementation of FormulaConverter
     * should generate debug output markup per Formula
     */
    private boolean generateDebugMarkup = false;

    /**
     * Main constructor for an implementation
     * If overwritten, super() must be called!
     */
    public FormulaConverter() {
        try {
            tempDirPath = Files.createTempDirectory("latex2mobi");
        } catch (IOException e) {
            logger.error("Error creating temporary directory!");
        }
    }

    public void setGenerateDebugMarkup(boolean generateDebugMarkup) {
        this.generateDebugMarkup = generateDebugMarkup;
    }

    /**
     * Parse latex formula code to entities, which afterward can be rendered to html
     *
     * @param latexFormula
     * @return Parsed Formula root object, tree-like representation of formula for further html rendering
     */
    public abstract Formula parse(int id, String latexFormula);

    /**
     * Parses a latex formula to MathML with SnuggleTeX
     *
     * @param id           the formula's index
     * @param latexFormula the latex formula string
     * @return formula object with set id, latex and mathml parameters
     */
    public Formula parseToMathML(int id, String latexFormula) {
        Formula formula = new Formula(id);

        formula.setLatexCode(latexFormula);

        if (latexFormula == null) {
            formula.setInvalid(true);
            return formula;
        }

        // Parse MathML formula and convert to png file
        SnuggleInput input = new SnuggleInput(latexFormula);
        try {
            SnuggleSession session = engine.createSession();

            session.parseInput(input);
            String xmlString = session.buildXMLString();

            if (xmlString != null) {
                // Formula is valid
                formula.setMathMl(xmlString);

            } else {
                // Error handling
                List<InputError> errors = session.getErrors();
                Integer errorCount = errors.size();
                logger.error(errorCount + " Error(s) occured while converting LaTeX with SnuggleTeX:");
                for (int i = 0; i < errorCount; i++) {
                    InputError error = errors.get(i);
                    logger.error("--------------");
                    logger.error("Error " + i + "/" + errorCount + ":");
                    logger.error("SnuggleTeX Error code: " + error.getErrorCode().getName());

                    FrozenSlice slice = error.getSlice();
                    input = slice.getDocument().getInput();
                    String document = input.getString();

                    // log document, position coordinates and substring
                    logger.error(document);
                    logger.error("@ Position " + slice.getStartIndex() + ":" + slice.getEndIndex());
                    logger.error(document.substring(slice.getStartIndex(), slice.getEndIndex()));

                    formula.setInvalid(true);
                }
            }
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }
        return formula;
    }

    /**
     * Parses a JDOM HTML Document for formula entries, sets an id to refer to it in the future.
     *
     * @param document JDOM HTML Document to parse
     * @return Map of formulas, keys: given id, values: corresponding latex formula code from the document
     */
    public Map<Integer, String> extractFormulas(Document document) {
        Map<Integer, String> formulas = new HashMap<>();

        List<Element> foundElements = xpath.evaluate(document);
        if (foundElements.size() > 0) {
            int id = 0;
            for (Element element : foundElements) {
                formulas.put(id, element.getValue());

                // mark formula number
                element.setAttribute("id", FORMULA_ID_PREFIX + id);
                id++;
            }
        }

        return formulas;
    }

    /**
     * Replaces all formulas with the html representation of the mapped formula objects
     *
     * @param doc        JDOM Document where to replace the formulas
     * @param formulaMap Map of the indexed Formula Objects
     * @return JDOM Document with replaced formulas
     */
    public Document replaceFormulas(Document doc, Map<Integer, Formula> formulaMap) {
        List<Element> foundFormulas = xpath.evaluate(doc);

        if (foundFormulas.size() > 0) {
            Map<String, Element> formulaMarkupMap = new HashMap<>();

            // Initialize markup map
            for (Element element : foundFormulas) {
                formulaMarkupMap.put(element.getAttribute("id").getValue(), element);
            }

            // Replace all found formulas
            Iterator<Integer> formulaIterator = formulaMap.keySet().iterator();
            while (formulaIterator.hasNext()) {
                Integer id = formulaIterator.next();

                Element formulaMarkupRoot = formulaMarkupMap.get(FORMULA_ID_PREFIX + id);
                Formula formula = formulaMap.get(id);

                formulaMarkupRoot.removeAttribute("class");
                formulaMarkupRoot.removeContent();
                formulaMarkupRoot.setName("div");

                Element div = (Element) formulaMarkupRoot.getParent();
                div.setName("div");
                div.setAttribute("class", "formula");

                // Potentially there's text inside the paragraph...
                List<Text> texts = div.getContent(Filters.textOnly());
                if (texts.isEmpty() == false) {
                    String textString = "";
                    for (Text text : texts) {
                        textString += text.getText();
                    }
                    Element textSpan = new Element("span");
                    textSpan.setAttribute("class", "text");
                    textSpan.setText(textString);
                    div.addContent(textSpan);

                    List<Content> content = div.getContent();
                    content.removeAll(texts);
                }

                if (generateDebugMarkup) {
                    div.setAttribute("style", "border: 1px solid black;");

                    // Header
                    Element h4 = new Element("h4");
                    h4.setText("DEBUG - Formula #" + formula.getId());
                    div.addContent(h4);

                    // Render LaTeX source
                    Element latexPre = new Element("pre");
                    latexPre.setAttribute("class", "debug-latex");
                    latexPre.setText(formula.getLatexCode());
                    div.addContent(latexPre);

                    // Render MathML markup
                    Element mathmlPre = new Element("pre");
                    mathmlPre.setAttribute("class", "debug-mathml");
                    mathmlPre.setText(formula.getMathMl());
                    div.addContent(mathmlPre);

                    // Render HTML Markup
                    Element htmlPre = new Element("pre");
                    htmlPre.setAttribute("class", "debug-html");
                    XMLOutputter xmlOutputter = new XMLOutputter();
                    xmlOutputter.setFormat(Format.getRawFormat());
                    htmlPre.setText(xmlOutputter.outputString(formula.getHtml()));

                    div.addContent(htmlPre);

                }

                // Set formula into
                formulaMarkupRoot.addContent(formula.getHtml());
            }
        }
        return doc;
    }

    public Path getTempDirPath() {
        return tempDirPath;
    }

    public Element renderInvalidFormulaSource(Formula formula) {
        // Render invalid LaTeX code
        Element invalidFormulaPre = new Element("pre");
        invalidFormulaPre.setText("Formula #" + formula.getId() + " is invalid!\n\n" + formula.getLatexCode());
        return invalidFormulaPre;
    }
}