Java tutorial
package at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html; import at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html.elements.Formula; import at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html.snugglepkgs.SnugglePackageRegistry; import org.apache.log4j.Logger; import org.jdom2.Content; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.Text; import org.jdom2.filter.Filters; import org.jdom2.output.Format; import org.jdom2.output.XMLOutputter; import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathFactory; import uk.ac.ed.ph.snuggletex.*; import uk.ac.ed.ph.snuggletex.internal.FrozenSlice; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; /* * The MIT License (MIT) * latex2mobi -- LaTeX Formulas to Mobi Converter * Copyright (c) 2014 Michael Au * <p/> * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * <p/> * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * <p/> * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * <p/> * <p/> * For Third Party Software Licenses read LICENSE file in base dir. */ /** * This abstract class provides shared routines for implementing formula converters. * <p/> * Provides mechanisms for finding, replacing and parsing formulas from the resulting file generated by Pandoc * * @author Michael Au * Date: 08.06.14 * @see at.ac.tuwien.ims.latex2mobiformulaconv.converter.latex2html.PandocLatexToHtmlConverter */ public abstract class FormulaConverter { public static final String FORMULA_ID_PREFIX = "formula_"; private static final Logger logger = Logger.getLogger(FormulaConverter.class); protected static SnuggleEngine engine = new SnuggleEngine(); private static XPathFactory xPathFactory = XPathFactory.instance(); protected static XPathExpression<Element> xpath = xPathFactory.compile("//*[@class='LaTeX']", Filters.element()); static { // Add special SnuggleTeX configuration for certain Elements for (SnugglePackage p : SnugglePackageRegistry.getPackages()) { engine.addPackage(p); } XMLStringOutputOptions xmlStringOutputOptions = new XMLStringOutputOptions(); xmlStringOutputOptions.setEncoding("UTF-8"); xmlStringOutputOptions.setIndenting(true); engine.setDefaultXMLStringOutputOptions(xmlStringOutputOptions); } /** * This path will hold the temporary generated files */ protected Path tempDirPath = null; /** * If true, an Implementation of FormulaConverter * should generate debug output markup per Formula */ private boolean generateDebugMarkup = false; /** * Main constructor for an implementation * If overwritten, super() must be called! */ public FormulaConverter() { try { tempDirPath = Files.createTempDirectory("latex2mobi"); } catch (IOException e) { logger.error("Error creating temporary directory!"); } } public void setGenerateDebugMarkup(boolean generateDebugMarkup) { this.generateDebugMarkup = generateDebugMarkup; } /** * Parse latex formula code to entities, which afterward can be rendered to html * * @param latexFormula * @return Parsed Formula root object, tree-like representation of formula for further html rendering */ public abstract Formula parse(int id, String latexFormula); /** * Parses a latex formula to MathML with SnuggleTeX * * @param id the formula's index * @param latexFormula the latex formula string * @return formula object with set id, latex and mathml parameters */ public Formula parseToMathML(int id, String latexFormula) { Formula formula = new Formula(id); formula.setLatexCode(latexFormula); if (latexFormula == null) { formula.setInvalid(true); return formula; } // Parse MathML formula and convert to png file SnuggleInput input = new SnuggleInput(latexFormula); try { SnuggleSession session = engine.createSession(); session.parseInput(input); String xmlString = session.buildXMLString(); if (xmlString != null) { // Formula is valid formula.setMathMl(xmlString); } else { // Error handling List<InputError> errors = session.getErrors(); Integer errorCount = errors.size(); logger.error(errorCount + " Error(s) occured while converting LaTeX with SnuggleTeX:"); for (int i = 0; i < errorCount; i++) { InputError error = errors.get(i); logger.error("--------------"); logger.error("Error " + i + "/" + errorCount + ":"); logger.error("SnuggleTeX Error code: " + error.getErrorCode().getName()); FrozenSlice slice = error.getSlice(); input = slice.getDocument().getInput(); String document = input.getString(); // log document, position coordinates and substring logger.error(document); logger.error("@ Position " + slice.getStartIndex() + ":" + slice.getEndIndex()); logger.error(document.substring(slice.getStartIndex(), slice.getEndIndex())); formula.setInvalid(true); } } } catch (IOException e) { logger.error(e.getMessage(), e); } return formula; } /** * Parses a JDOM HTML Document for formula entries, sets an id to refer to it in the future. * * @param document JDOM HTML Document to parse * @return Map of formulas, keys: given id, values: corresponding latex formula code from the document */ public Map<Integer, String> extractFormulas(Document document) { Map<Integer, String> formulas = new HashMap<>(); List<Element> foundElements = xpath.evaluate(document); if (foundElements.size() > 0) { int id = 0; for (Element element : foundElements) { formulas.put(id, element.getValue()); // mark formula number element.setAttribute("id", FORMULA_ID_PREFIX + id); id++; } } return formulas; } /** * Replaces all formulas with the html representation of the mapped formula objects * * @param doc JDOM Document where to replace the formulas * @param formulaMap Map of the indexed Formula Objects * @return JDOM Document with replaced formulas */ public Document replaceFormulas(Document doc, Map<Integer, Formula> formulaMap) { List<Element> foundFormulas = xpath.evaluate(doc); if (foundFormulas.size() > 0) { Map<String, Element> formulaMarkupMap = new HashMap<>(); // Initialize markup map for (Element element : foundFormulas) { formulaMarkupMap.put(element.getAttribute("id").getValue(), element); } // Replace all found formulas Iterator<Integer> formulaIterator = formulaMap.keySet().iterator(); while (formulaIterator.hasNext()) { Integer id = formulaIterator.next(); Element formulaMarkupRoot = formulaMarkupMap.get(FORMULA_ID_PREFIX + id); Formula formula = formulaMap.get(id); formulaMarkupRoot.removeAttribute("class"); formulaMarkupRoot.removeContent(); formulaMarkupRoot.setName("div"); Element div = (Element) formulaMarkupRoot.getParent(); div.setName("div"); div.setAttribute("class", "formula"); // Potentially there's text inside the paragraph... List<Text> texts = div.getContent(Filters.textOnly()); if (texts.isEmpty() == false) { String textString = ""; for (Text text : texts) { textString += text.getText(); } Element textSpan = new Element("span"); textSpan.setAttribute("class", "text"); textSpan.setText(textString); div.addContent(textSpan); List<Content> content = div.getContent(); content.removeAll(texts); } if (generateDebugMarkup) { div.setAttribute("style", "border: 1px solid black;"); // Header Element h4 = new Element("h4"); h4.setText("DEBUG - Formula #" + formula.getId()); div.addContent(h4); // Render LaTeX source Element latexPre = new Element("pre"); latexPre.setAttribute("class", "debug-latex"); latexPre.setText(formula.getLatexCode()); div.addContent(latexPre); // Render MathML markup Element mathmlPre = new Element("pre"); mathmlPre.setAttribute("class", "debug-mathml"); mathmlPre.setText(formula.getMathMl()); div.addContent(mathmlPre); // Render HTML Markup Element htmlPre = new Element("pre"); htmlPre.setAttribute("class", "debug-html"); XMLOutputter xmlOutputter = new XMLOutputter(); xmlOutputter.setFormat(Format.getRawFormat()); htmlPre.setText(xmlOutputter.outputString(formula.getHtml())); div.addContent(htmlPre); } // Set formula into formulaMarkupRoot.addContent(formula.getHtml()); } } return doc; } public Path getTempDirPath() { return tempDirPath; } public Element renderInvalidFormulaSource(Formula formula) { // Render invalid LaTeX code Element invalidFormulaPre = new Element("pre"); invalidFormulaPre.setText("Formula #" + formula.getId() + " is invalid!\n\n" + formula.getLatexCode()); return invalidFormulaPre; } }