org.j2free.util.HtmlFilter.java Source code

Java tutorial

Introduction

Here is the source code for org.j2free.util.HtmlFilter.java

Source

/**
 * HtmlFilter
 *
 * Copyright 2011 FooBrew, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.j2free.util;

import java.util.*;
import java.util.regex.*;
import net.jcip.annotations.NotThreadSafe;
import org.apache.commons.logging.*;

/**
 *
 * @author ryan
 */
@NotThreadSafe
/**
 * @author Ryan Wilson
 */
public class HtmlFilter {
    private final int PLAINTEXT_LINE_LENGTH = 60;

    private TreeSet<String> okayTags;
    private final String[] defaultAllowedTags = { "br", "hr", "strong", "b", "i", "u", "font", "ul", "ol", "li",
            "a" };

    private final String HTML_START = "<([A-Z][A-Z0-9]*)\\b[^>]*>";
    private final String HTML_END = "</([A-Z][A-Z0-9]*)\\b[^>]*>";

    private final String NOBR_SPACE_PATTERN = "&nbsp;";
    private final String EMAIL_LINE_BREAKS = "</?[tb]r\\s?/?>";
    private final String LINK_PATTERN = "<a.*?href=\"([^\"]*?)\"[^>]*?>([^<]*?)</a>";

    /**
     *
     */
    public HtmlFilter() {
        okayTags = new TreeSet<String>();
        okayTags.addAll(Arrays.asList(defaultAllowedTags));
    }

    private final Log log = LogFactory.getLog(HtmlFilter.class);

    /**
     *
     * @param msg
     * @return
     */
    public String filter(String msg) {
        return filter(msg, defaultAllowedTags);
    }

    /**
     *
     * @param msg
     * @param allowedTags
     * @return
     */
    public String filter(String msg, String[] allowedTags) {
        if (msg == null)
            return "";
        else if (msg.length() == 0 || msg.equals("") || !msg.contains("<"))
            return msg;

        okayTags.clear();
        if (allowedTags != null && allowedTags.length > 0)
            okayTags.addAll(Arrays.asList(allowedTags));

        StringBuffer line = new StringBuffer(msg);
        for (int i = 0; i < line.length(); i++) {
            if (line.charAt(i) == '<') {
                int begin = i, end = i;
                for (int j = i + 1; j < line.length(); j++) {
                    if (line.charAt(j) == '>') {
                        end = j + 1;
                        break;
                    }
                }
                if (!isOkayTag(new String(line.substring(begin, end)))) {
                    line = line.delete(begin, end);
                }
            }
        }
        return new String(line);
    }

    private boolean isOkayTag(String tag) {
        int end = (tag.indexOf(" ") >= 0) ? tag.indexOf(" ") : tag.length() - 1;
        return (okayTags.contains(tag.substring(1, end)) || okayTags.contains(tag.substring(2, end)));
    }

    /**
     *
     * @param text
     * @return
     */
    public String strictFilter(String text) {
        if (text == null || text.equals(""))
            return text;

        Pattern p0 = Pattern.compile(HTML_START, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
        Pattern p1 = Pattern.compile(HTML_END, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);

        Matcher m0 = p0.matcher(text);
        Matcher m1 = p1.matcher(m0.replaceAll(""));
        return m1.replaceAll("");
    }

    /**
     *
     * @param text
     * @return
     */
    public String filterForEmail(String text) {
        Pattern links = Pattern.compile(LINK_PATTERN, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);

        Matcher matcher = links.matcher(text);
        text = matcher.replaceAll("$2 [link: $1]");

        // First replace <tr> and <br> with \n
        text = text.replaceAll(EMAIL_LINE_BREAKS, "\n");

        text = strictFilter(text);

        text = text.replaceAll(NOBR_SPACE_PATTERN, " ") // make all &nbsp; into plain text spaces
                .replaceAll("\t", " ") // make all tabs spaces
                .replaceAll(" {2,}", " ") // make all multiple space sections a single space
                .replaceAll("^\\s", "") // remove any leaving space
                .replaceAll("\n{2,}", "\n\n"); // make sure there aren't too many line breaks in a row

        // Get the text as lines
        String[] lines = text.split("\n");
        String[] words;

        StringBuilder body = new StringBuilder();

        int lineLength = 0;
        String lastWord;
        for (String line : lines) {
            words = line.split("\\s");
            lineLength = 0;

            lastWord = "";
            for (String word : words) {
                if (lineLength > 0 && lineLength + word.length() >= PLAINTEXT_LINE_LENGTH
                        && !lastWord.equals("[link:")) {
                    body.append("\n" + word);
                    lineLength = word.length();
                } else {
                    body.append((lineLength > 0 ? " " : "") + word);
                    lineLength += word.length();
                }
                lastWord = word;
            }
            body.append("\n");
        }
        return body.toString().replaceAll("\n{3,}", "\n\n");
    }
}