Java tutorial
/** * \cond LICENSE * ******************************************************************** * This is a conditional block for preventing the DoxyGen documentation * tool to include this license header within the description of each * source code file. If you want to include this block, please define * the LICENSE parameter into the provided DoxyFile. * ******************************************************************** * * TeXPrinter - A TeX.SX question printer Copyright (c) 2012, Paulo Roberto * Massa Cereda All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the project's author nor the names of its contributors * may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ******************************************************************** * End of the LICENSE conditional block * ******************************************************************** * \endcond * * StringUtils.java: This is a helper class that provides String functions to * the generator classes. * Last revision: paulo at temperantia 26 Feb 2012 05:12 */ // package definition package net.sf.texprinter.utils; // needed imports import com.itextpdf.text.pdf.codec.Base64; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; import org.apache.commons.lang.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * Provides String functions to the generator classes. Another helper class, * but this one is specific to string manipulation. * * @author Paulo Roberto Massa Cereda * @version 2.1 * @since 1.0 */ public class StringUtils { // the application logger private static final Logger log = Logger.getLogger(StringUtils.class.getCanonicalName()); /** * Escapes HTML entities and tags to a TeX format. This method tries to * replace HTML code by the TeX equivalent macros. * * @param text The input text. * @return A new text formatted from HTML to TeX. */ public static String escapeHTMLtoTeX(String text) { // replace bold tags String newText = text.replaceAll("<b>", "\\\\textbf{"); newText = newText.replaceAll("</b>", "}"); // replace bold tags newText = newText.replaceAll("<strong>", "\\\\textbf{"); newText = newText.replaceAll("</strong>", "}"); // replace italic tags newText = newText.replaceAll("<i>", "\\\\textit{"); newText = newText.replaceAll("</i>", "}"); // replace emphasized tags newText = newText.replaceAll("<em>", "\\\\emph{"); newText = newText.replaceAll("</em>", "}"); // replace paragraphs tags newText = newText.replaceAll("<p>", ""); newText = newText.replaceAll("</p>", "\n\n"); // replace ordered lists tags newText = newText.replaceAll("<ol>", "\\\\begin{enumerate}\n"); newText = newText.replaceAll("</ol>", "\\\\end{enumerate}\n"); // replace unordered lists tags newText = newText.replaceAll("<ul>", "\\\\begin{itemize}\n"); newText = newText.replaceAll("</ul>", "\\\\end{itemize}\n"); // replace item tags newText = newText.replaceAll("<li>", "\\\\item "); newText = newText.replaceAll("</li>", "\n"); // replace blockquote tags newText = newText.replaceAll("<blockquote>", "\\\\begin{quotation}\n"); newText = newText.replaceAll("</blockquote>", "\\\\end{quotation}\n"); // replace code tags newText = newText.replaceAll("<pre><code>", "\\\\begin{TeXPrinterListing}\n"); newText = newText.replaceAll("<pre class=.*\"><code>", "\\\\begin{TeXPrinterListing}\n"); newText = newText.replaceAll("</code></pre>", "\\\\end{TeXPrinterListing}\n\n"); // replace inline code tags newText = newText.replaceAll("<code>", "\\\\lstinline|"); newText = newText.replaceAll("</code>", "|"); // replace links tags newText = newText.replaceAll("alt=\".*\" ", ""); // parse the text Document docLinks = Jsoup.parse(newText); // get all the links Elements links = docLinks.getElementsByTag("a"); // if there are links if (links.size() > 0) { // for every link for (Element link : links) { // get the outer HTML String temp = link.outerHtml(); // replace it newText = newText.replaceFirst(Pattern.quote(temp), "\\\\href{" + link.attr("href") + "}{" + link.text() + "}"); } } // create a list of images ArrayList<ImageGroup> images = new ArrayList<ImageGroup>(); // parse the current text Document doc = Jsoup.parse(text); // fetch all the media found Elements media = doc.select("[src]"); // for all media found for (Element m : media) { // if it's an image tag if (m.tagName().equals("img")) { // create a new image group with the image link ImageGroup image = new ImageGroup(m.attr("abs:src")); // add to the list of images images.add(image); // set the current image to null image = null; } } // create a new loop saver LoopSaver lps = null; // for every image in the list of images for (ImageGroup img : images) { // create a new object lps = new LoopSaver(); // while there are references for that image in the text while (newText.indexOf(img.getURL()) != -1) { // tick loop lps.tick(); // replace the occurrence of that image newText = newText.replaceFirst("<img src=\"" + img.getURL() + "\" />", "\\\\begin{figure}[h!]\n\\\\centering\n\\\\includegraphics[scale=0.5]{" + img.getName() + "}\n\\\\end{figure}"); } // lets try try { // finally, download the image to the current directory Downloader.download(img.getURL(), img.getName()); } catch (Exception exception) { // log message log.log(Level.WARNING, "An error occurred while getting the current image. Trying to set the replacement image instead. MESSAGE: {0}", StringUtils.printStackTrace(exception)); // image could not be downloaded for any reason try { // open a file stream FileOutputStream f = new FileOutputStream(img.getName()); // write a replacement image f.write(Base64.decode( "iVBORw0KGgoAAAANSUhEUgAAALAAAABKCAIAAACU3El2AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAcjSURBVHhe7VzrmeMgDExdKSj1pJptZovZi3lqhAQ4n7HhrPt1STAaRoOELa0ff/bPGCAMPIwNY4AyYIIwPQADJggThAnCNKAzYBHC1GERwjRgEcI00MmApYxOou4yzARxF093rtME0UnUXYaZIO7i6c51miA6ibrLMBPEXTzduc4vBPHzejwez/cvt+C+f7x+GpY7h/2+nz2zdS5z6LCFoLZ5qAli8wj/9xHCzIK4xDeXGG27No7YB68dIfx8JCJoguiHOG7kvsUfhOMSo/3Y98H7XhBvlyFIWAfDPjFIOWT/MLgC9IlW/E/xH0tqeWiR7tx1z7dfkE95dKo00TbF8/2TrMTsiIyLhsh8eJVHG00EJGAizUeScT+8KieSqL4VRGCOHggILySI/L5feNzYP0wThGBF2Q2frwPn0oBImuhgZ8T/ElzjZqK6JHPKhmhM/Xlt15e7wpkISIiFIBaF54AJ4LkP6vhmYPlWEFHT4JR0DPR4hINnWjSB3RxWFQRaaYdHIeGxpMjmyIqAS1EFwvk3jy5NaiYASSkaIpQULhR4sl+acvj7GyOItJuEnKGkDH57ohGupAwI9cKdDgnkhVQl97DDdNYvRHevRm1FwZAgUkSTU2uPILRrfVSob9QLBeFNs71HvgOfNYZpEQKP0dL2CSPoBGqEwD0nha12hBANCavTzuXdgmjCmytC5Exapb9zGGREksfFy0WqHc+eQymZ8S1MEzDdU/QMISdp2RALahsQxUSPILRr1QiBP7SCxKCUEU5HrbuMzmEkA4VTvveudHn6DjZRjrOvl7tXoI/V2jGdnNqer1e8l5EOoYqhjBTuWUhiIodK6fyLQmdpg8ILKxOimXqqA420BdGS1KDfBS8NstQ9rRbouydYYeC0gpiQ/QkhHS+xKQURA2yzLnI8H7UZTRDn8m3WZmBgyggxAzF3xWCCuKvnlXWbIEwQa9x2mp+uYaAeIbRndiOx7nsAsW/0cbilp+2HzX7Ior5EuIogeElH7kU4zCXNib6kuzmvGzCvIPrwHztKZqOsHfj65iHcfbGAmwqC0B3qNq1mHrKTc8GAbW94Vo8tQ6qLIXkRbzBBkOpG0fXHLJGqQ+oLVi5PgknXhIqGWJigdRahGk1KwNt07Ras2JgDvVUfSHWqOcJe0ddTBhdEKAtF3txyiaty/bFUEusbAEe6KYSWD7KIHkEoc4qooDzse7oqkDwQcg0tfArtSbwpKhBGCq6EOr9yuXwqfR/r/EINTEPYq4bPuJ2CaBfigu0MzW8DV110vEiRHhSB8qDzQSsb3YjNOUVUWPVksaZEIRQQs1tTrMjRK0+4/c9VWTecIdSmWny9pQUfl4uJCqnG/kyla60ikIMFgckh96yw/0EU5N24REEZuJx1YFvzc2euvQuoyp4u/XKPAp3B/c7yI673M7XPDLEVIowGb0PMis2IXAFlCAjs5ZgUkXx5yjlSEHSPZeQ0L0sdXn3hDFIGuYTYxM2Uxsio4s+ZNuVypkmBbmkTk95tL4XPF5up0Nsd0mNbEKy5Ja1FXpQWw/oo9qMOFwTJk879JEJSXJqD5bY7TKV0noKZ4k/HeIiOqIpdqkMqQ0R5hpCSaVj80+nBr+H5+ZAgdggCFIFJqOwBo0EBEO5QxJGCoGGYNCaxWIyHx9wzhE8Wcgj2i+mIEHlYmhT607eD65bI6eHDjcxVdg1qJDT9Do1b+GccoEh0S/gkd2+KKSPnqrAmgT3oAdMQdktieC1DCGOTtTl0c3WLgaMFgWf3VlS+BeVzL3K0IFK05/cSc9NyX3QnCOK+5K64chPEil4biNkEMZDcFac2QazotYGYTRADyV1x6l2CaD7dXZEBwwwMdD+pTM8B+TPEOQlltcs5Qc6IygQxo1cuxFQTRPHKppAyirdLffDTmqYUQ8jv8ck1LRxAETG/7ikUpppvf2J/CA4F1qIlQLLrC0/C+6M6lnah9waY3h8h6m+XgrceJbz08OFfskQfYpMiXXRlEA37qDY1lfNrKUOxGxs06i9ochf/55WY/YIoO3wY+SVt5WFU6iEoezz4G2g0Q8JhVxGEZld720ZzaQP26LVTHiEIVjRmJWWpM1ptBGIOkPxRvv1Jcr4sCNWuJojW0q513gjrhwmicvPB3RALXqwPMTUc5qgsCaI0JMyvtedLEaJ8oVgedb8b7cZzCCQEPpEPrao2eIycIcouo3qE6Ho1k59fe7ESXYLch4Zy1ZbWWvKIzXvKnK0HU+nAnk6CQpdw5LBsf0pryAd/7EpkjUANQeiGKvOzkAK3IM3mJc3ibQVxiirNyDwMtCLEPEgNySkMmCBOoXkdIyaIdXx1ClITxCk0r2PEBLGOr05BaoI4heZ1jJgg1vHVKUhNEKfQvI4RE8Q6vjoFqQniFJrXMWKCWMdXpyA1QZxC8zpGTBDr+OoUpP8Arv92hCPEu+kAAAAASUVORK5CYII=")); // close the file f.close(); } catch (IOException ioexception) { // log message log.log(Level.SEVERE, "An IO exception occured while trying to create the image replacement. MESSAGE: {0}", StringUtils.printStackTrace(ioexception)); } catch (Exception except) { // log message log.log(Level.SEVERE, "An error occured while trying to create the image replacement. MESSAGE: {0}", StringUtils.printStackTrace(except)); } } } // unescape all HTML entities newText = StringEscapeUtils.unescapeHtml(newText); // return new text return newText; } /** * Prints the stack trace to a string. This method gets the exception * and prints the stack trace to a string instead of the system default * output. * * @param exception The exception. * @return The string containg the whole stack trace. */ public static String printStackTrace(Exception exception) { // lets try try { // create a string writer StringWriter stringWriter = new StringWriter(); // create a print writer PrintWriter printWriter = new PrintWriter(stringWriter); // set the stack trace to the writer exception.printStackTrace(printWriter); // return the writer return "M: " + exception.getMessage() + " S: " + stringWriter.toString(); } catch (Exception except) { // error message return "Error in printStackTrace: " + except.getMessage(); } } }