org.niord.core.util.TextUtils.java Source code

Introduction

Here is the source code for org.niord.core.util.TextUtils.java
Source

/*
 * Copyright 2016 Danish Maritime Authority.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.niord.core.util;

import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.examples.HtmlToPlainText;
import org.jsoup.nodes.Document;
import org.w3c.tidy.Tidy;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.List;
import java.util.function.Function;

/**
 * Text utility methods
 */
@SuppressWarnings("unused")
public class TextUtils {

    /**
     * Converts the text from html to plain text
     * @param html the html
     * @return the plain text version
     */
    public static String html2txt(String html) {
        if (StringUtils.isNotBlank(html)) {
            try {
                Document doc = Jsoup.parse(html);
                return new HtmlToPlainText().getPlainText(doc.body());
            } catch (Exception ignored) {
            }
        }
        // If blank, or if any error occurs, return the original html
        return html;
    }

    /**
     * Converts the text from plain text to html
     * @param text the text
     * @return the html version
     */
    public static String txt2html(String text) {
        text = StringUtils.replaceEach(text, new String[] { "&", "\"", "<", ">", "\n", "\t" },
                new String[] { "&amp;", "&quot;", "&lt;", "&gt;", "<br>", "&nbsp;&nbsp;&nbsp;" });
        return text;
    }

    /**
     * Use JTidy to clean up the HTML
     * @param html the HTML to clean up
     * @return the resulting XHTML
     */
    public static org.w3c.dom.Document cleanHtml(String html) {
        Tidy tidy = new Tidy();

        tidy.setShowWarnings(false); //to hide errors
        tidy.setQuiet(true); //to hide warning

        tidy.setXHTML(true);
        return tidy.parseDOM(new StringReader(html), new StringWriter());
    }

    /**
     * Ensures that the string ends with a trailing dot character
     * @param text the text to add a trailing dot to
     * @return the updated text
     **/
    public static String trailingDot(String text) {
        if (StringUtils.isNotBlank(text)) {
            text = text.trim();
            if (!text.endsWith(".")) {
                text = text + ".";
            }
        }
        return text;
    }

    /**
     * Removes any trailing dot character
     * @param text the text to remove a trailing dot from
     * @return the updated text
     **/
    public static String removeTrailingDot(String text) {
        if (StringUtils.isNotBlank(text)) {
            text = text.trim();
            while (text.endsWith(".")) {
                text = text.substring(0, text.length() - 1).trim();
            }
        }
        return text;
    }

    /** Simple case-insensitive comparison between two strings **/
    public static int compareIgnoreCase(String s1, String s2) {
        if (s1 == null && s2 == null) {
            return 0;
        } else if (s1 == null) {
            return 1;
        } else if (s2 == null) {
            return -1;
        }
        return s1.toLowerCase().compareTo(s2.toLowerCase());
    }

    /**
     * Used for joining strings with a different last delimiter.
     * Credits: http://stackoverflow.com/questions/34936771/join-strings-with-different-last-delimiter
     * <p>
     * Usage:
     * <pre>
     *  list.stream()
     *    .collect(
     *      Collectors.collectingAndThen(Collectors.toList(), joiningLastDelimiter(", ", " and "))
     *    );
     * </pre>
     *
     * @param delimiter the delimiter
     * @param lastDelimiter the last delimiter
     * @return the joining function
     */
    public static Function<List<String>, String> joiningLastDelimiter(String delimiter, String lastDelimiter) {
        return list -> {
            int last = list.size() - 1;
            if (last < 1)
                return String.join(delimiter, list);
            return String.join(lastDelimiter, String.join(delimiter, list.subList(0, last)), list.get(last));
        };
    }

    /**
     * Prints an XML document to the output stream
     * @param doc the document to print
     * @param out the output stream
     */
    public static void printDocument(org.w3c.dom.Document doc, OutputStream out)
            throws IOException, TransformerException {
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer transformer = tf.newTransformer();
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

        transformer.transform(new DOMSource(doc), new StreamResult(new OutputStreamWriter(out, "UTF-8")));
    }

}