org.exoplatform.wiki.rendering.render.confluence.ConfluenceSyntaxEscapeHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.exoplatform.wiki.rendering.render.confluence.ConfluenceSyntaxEscapeHandler.java

Source

/*
 * Copyright (C) 2003-2010 eXo Platform SAS.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Affero General Public License
 * as published by the Free Software Foundation; either version 3
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see<http://www.gnu.org/licenses/>.
 */
package org.exoplatform.wiki.rendering.render.confluence;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.xwiki.rendering.listener.chaining.BlockStateChainingListener;

/**
 * Created by The eXo Platform SAS
 * Author : viet nguyen
 *          viet.nguyen@exoplatform.com
 * Jul 2, 2010  
 */

/**
 * Escape characters that would be confused for Confluence wiki syntax if they were not escaped.
 */
public class ConfluenceSyntaxEscapeHandler {

    public static final Pattern STARLISTEND_PATTERN = Pattern.compile("(\\**([:;]*|1+\\.)?\\p{Blank})");

    private static final Pattern LIST_PATTERN = Pattern
            .compile("\\p{Blank}*((\\*+[:;]*)|([1*]+\\.[:;]*)|([:;]+))\\p{Blank}+");

    private static final Pattern QUOTE_PATTERN = Pattern.compile("(\\>+)");

    private static final Pattern HEADER_PATTERN = Pattern.compile("\\p{Blank}*(=+)");

    private static final Pattern TABLE_PATTERN = Pattern.compile("\\p{Blank}*(\\||!!)");

    private static final Pattern DOUBLE_CHARS_PATTERN = Pattern
            .compile("\\/\\/|\\*\\*|__|--|\\^\\^|,,|##|\\\\\\\\");

    public static final String ESCAPE_CHAR = "~";

    private boolean beforeLink = false;

    private boolean onNewLine = true;

    public void setOnNewLine(boolean onNewLine) {
        this.onNewLine = onNewLine;
    }

    public boolean isOnNewLine() {
        return this.onNewLine;
    }

    public void escape(StringBuffer accumulatedBuffer, ConfluenceSyntaxListenerChain listenerChain,
            boolean escapeLastChar, Pattern escapeFirstIfMatching) {
        BlockStateChainingListener blockStateListener = listenerChain.getBlockStateChainingListener();

        // Escape tilde symbol (i.e. the escape character).
        // Note: This needs to be the first replacement since other replacements below also use the tilde symbol
        replaceAll(accumulatedBuffer, ESCAPE_CHAR, ESCAPE_CHAR + ESCAPE_CHAR);

        // When in a paragraph we need to escape symbols that are at beginning of lines and that could be confused
        // with list items, headers or tables.
        if (blockStateListener.isInLine() && isOnNewLine()) {

            // Look for list pattern at beginning of line and escape the first character only (it's enough)
            escapeFirstMatchedCharacter(LIST_PATTERN, accumulatedBuffer);

            // Look for header pattern at beginning of line and escape the first character only (it's enough)
            escapeFirstMatchedCharacter(HEADER_PATTERN, accumulatedBuffer);

            // Look for table character patterns at beginning of line and escape the first character only (it's enough)
            escapeFirstMatchedCharacter(TABLE_PATTERN, accumulatedBuffer);

            // Look for quote pattern at beginning of line and escape the first character only (it's enough)
            escapeFirstMatchedCharacter(QUOTE_PATTERN, accumulatedBuffer);
        }

        // Escape table characters
        if (blockStateListener.isInTable()) {
            replaceAll(accumulatedBuffer, "|", ESCAPE_CHAR + "|");
            replaceAll(accumulatedBuffer, "||", ESCAPE_CHAR + "|" + ESCAPE_CHAR + "|");
        }

        if (escapeFirstIfMatching != null) {
            escapeFirstMatchedCharacter(escapeFirstIfMatching, accumulatedBuffer);
        }

        // When in a header we need to escape "=" symbols since otherwise they would
        // be confused for end of section characters.
        if (blockStateListener.isInHeader()) {
            replaceAll(accumulatedBuffer, "=", ESCAPE_CHAR + "=");
        }

        // Escape verbatim "{{{"
        replaceAll(accumulatedBuffer, "{{{", ESCAPE_CHAR + "{" + ESCAPE_CHAR + "{" + ESCAPE_CHAR + "{");

        // Escape "{{"
        replaceAll(accumulatedBuffer, "{{", ESCAPE_CHAR + "{" + ESCAPE_CHAR + "{");

        // Escape groups
        replaceAll(accumulatedBuffer, "(((", ESCAPE_CHAR + "(" + ESCAPE_CHAR + "(" + ESCAPE_CHAR + "(");
        replaceAll(accumulatedBuffer, ")))", ESCAPE_CHAR + ")" + ESCAPE_CHAR + ")" + ESCAPE_CHAR + ")");

        // Escape reserved keywords
        Matcher matcher = DOUBLE_CHARS_PATTERN.matcher(accumulatedBuffer.toString());
        for (int i = 0; matcher.find(); i = i + 2) {
            accumulatedBuffer.replace(matcher.start() + i, matcher.end() + i,
                    ESCAPE_CHAR + matcher.group().charAt(0) + ESCAPE_CHAR + matcher.group().charAt(1));
        }

        // Escape ":" in "image:something", "attach:something" and "mailto:something"
        // Note: even though there are some restriction in the URI specification as to what character is valid after
        // the ":" character following the scheme we only check for characters greater than the space symbol for
        // simplicity.
        escapeURI(accumulatedBuffer, "image:");
        escapeURI(accumulatedBuffer, "attach:");
        escapeURI(accumulatedBuffer, "mailto:");

        // Escape last character if we're told to do so. This is to handle cases such as:
        // - onWord("hello:") followed by onFormat(ITALIC) which would lead to "hello://" if the ":" wasn't escaped
        // - onWord("{") followed by onMacro() which would lead to "{{{" if the "{" wasn't escaped
        if (escapeLastChar) {
            accumulatedBuffer.insert(accumulatedBuffer.length() - 1, '~');
        }

        // Escape begin link
        replaceAll(accumulatedBuffer, "[", ESCAPE_CHAR + "[");

        // Escape link label
        int linkLevel = getLinkLevel(listenerChain);

        if (linkLevel > 0) {
            // This need to be done after anything else because link label add another level of escaping (escaped as
            // link label and then escaped as wiki content).
            String escape = StringUtils.repeat(ESCAPE_CHAR, linkLevel);
            replaceAll(accumulatedBuffer, ESCAPE_CHAR, escape + ESCAPE_CHAR);
            replaceAll(accumulatedBuffer, "]", escape + "]");
            replaceAll(accumulatedBuffer, "^", escape + "^");
            replaceAll(accumulatedBuffer, "|", escape + "|");
        }
    }

    private int getLinkLevel(ConfluenceSyntaxListenerChain listenerChain) {
        int linkDepth = listenerChain.getBlockStateChainingListener().getLinkDepth();

        if (this.beforeLink) {
            --linkDepth;
        }

        return linkDepth;
    }

    public void setBeforeLink(boolean beforeLink) {
        this.beforeLink = beforeLink;
    }

    private void escapeURI(StringBuffer accumulatedBuffer, String match) {
        int pos = accumulatedBuffer.indexOf(match);
        if (pos > -1 && accumulatedBuffer.length() > pos + match.length()
                && accumulatedBuffer.charAt(pos + match.length()) > 32) {
            // Escape the ":" symbol
            accumulatedBuffer.replace(pos + match.length() - 1, pos + match.length(), "~:");
        }
    }

    private void replaceAll(StringBuffer accumulatedBuffer, String match, String replacement) {
        int pos = -replacement.length();
        while ((pos + replacement.length() < accumulatedBuffer.length())
                && ((pos = accumulatedBuffer.indexOf(match, pos + replacement.length())) != -1)) {
            accumulatedBuffer.replace(pos, pos + match.length(), replacement);
        }
    }

    private void escapeFirstMatchedCharacter(Pattern pattern, StringBuffer accumulatedBuffer) {
        Matcher matcher = pattern.matcher(accumulatedBuffer);
        if (matcher.lookingAt()) {
            // Escape the first character
            accumulatedBuffer.replace(matcher.start(1), matcher.start(1) + 1,
                    ESCAPE_CHAR + matcher.group(1).charAt(0));
        }
    }
}