nl.tue.gale.ae.processor.xmlmodule.CreoleParser.java Source code

Java tutorial

Introduction

Here is the source code for nl.tue.gale.ae.processor.xmlmodule.CreoleParser.java

Source

/*
    
   This file is part of GALE (Generic Adaptation Language and Engine).
    
GALE is free software: you can redistribute it and/or modify it under the 
terms of the GNU Lesser General Public License as published by the Free 
Software Foundation, either version 3 of the License, or (at your option) 
any later version.
    
GALE is distributed in the hope that it will be useful, but WITHOUT ANY 
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for 
more details.
    
You should have received a copy of the GNU Lesser General Public License
along with GALE. If not, see <http://www.gnu.org/licenses/>.
    
 */
/**
 * CreoleParser.java
 * Last modified: $Date$
 * In revision:   $Revision$
 * Modified by:   $Author$
 *
 * Copyright (c) 2008-2011 Eindhoven University of Technology.
 * All Rights Reserved.
 *
 * This software is proprietary information of the Eindhoven University
 * of Technology. It may be used according to the GNU LGPL license.
 */
package nl.tue.gale.ae.processor.xmlmodule;

import static com.google.common.base.Preconditions.checkNotNull;
import static nl.tue.gale.common.GaleUtil.adaptns;
import static nl.tue.gale.common.GaleUtil.createHTMLElement;
import static nl.tue.gale.common.GaleUtil.createNSElement;
import static nl.tue.gale.common.GaleUtil.xhtmlns;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import nl.tue.gale.common.GaleUtil;
import nl.tue.gale.common.parser.ParseString;

import org.dom4j.DocumentFactory;
import org.dom4j.Element;
import org.dom4j.Node;

import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.MapMaker;

public final class CreoleParser {
    private CreoleParser() {
    }

    private static final CreoleParser parser = new CreoleParser();

    public static CreoleParser instance() {
        return parser;
    }

    private final Map<String, Element> cache = new MapMaker().maximumSize(250)
            .makeComputingMap(new Function<String, Element>() {
                @Override
                public Element apply(String input) {
                    return internalParse(input);
                }
            });

    public Element parse(String text) {
        if (text == null)
            return null;
        return cache.get(text).createCopy();
    }

    private Element internalParse(String text) {
        try {
            Element span = createHTMLElement("span");
            for (Node node : Parser.CREOLE(new Tokens(new Scanner(text))))
                span.add(node);
            return span;
        } catch (Exception e) {
            throw new IllegalArgumentException("unable to parse creole '" + text + "'", e);
        }
    }

    private static final class Tokens {
        private static final int maxTokens = 5;

        private final Scanner scanner;
        private final ArrayList<Token> tokenList = new ArrayList<Token>(maxTokens + 2);
        private int pos = 0;

        public Tokens(Scanner scanner) {
            this.scanner = scanner;
        }

        public Token current() {
            if (pos >= tokenList.size())
                addTokens();
            return tokenList.get(pos);
        }

        public void moveNext() {
            pos++;
            if (pos >= maxTokens)
                addTokens();
        }

        public void movePrevious() {
            pos--;
            if (pos < 0)
                throw new IllegalStateException("token list too small");
        }

        public boolean hasPrevious() {
            return pos > 0;
        }

        private void addTokens() {
            tokenList.addAll(scanner.scan());
            while (tokenList.size() > maxTokens) {
                pos--;
                tokenList.remove(0);
            }
        }
    }

    private static final class Token {
        private final String type;
        private final String text;
        private final boolean useText;

        private volatile int hashCode = 0;

        private Token(String type, String text) {
            this.type = type;
            this.text = text;
            useText = ("symbol".equals(type) || "text".equals(type));
        }

        public static Token of(String type) {
            checkNotNull(type);
            return new Token(type, null);
        }

        public static Token of(String type, String text) {
            checkNotNull(type);
            Token result = new Token(type, text);
            return result;
        }

        public String getType() {
            return type;
        }

        public String getText() {
            return text;
        }

        @Override
        public String toString() {
            if (useText)
                return text;
            else
                return type;
        }

        public String toString(boolean includeWhitespace) {
            if (!includeWhitespace)
                return toString();
            if (type.contains(" ") || type.contains("\n"))
                return text;
            else
                return toString();
        }

        @Override
        public int hashCode() {
            if (hashCode == 0) {
                int result = 5394;
                result = 31 * result + type.hashCode();
                if (useText)
                    result = 31 * result + (text == null ? 3958 : text.hashCode());
                hashCode = (result == 0 ? 1 : result);
            }
            return hashCode;
        }

        @Override
        public boolean equals(Object obj) {
            if (obj == this)
                return true;
            if (obj.hashCode() != hashCode())
                return false;
            if (!(obj instanceof Token))
                return false;
            Token other = (Token) obj;
            if (!type.equals(other.type))
                return false;
            if (useText) {
                if (text == null)
                    return other.text == null;
                if (!text.equals(other.text))
                    return false;
            }
            return true;
        }
    }

    private static final class Scanner {
        private final ParseString ps;

        public Scanner(String parseString) {
            this.ps = new ParseString(parseString);
        }

        public List<Token> scan() {
            StringBuilder sb = new StringBuilder();
            do {
                scanInternal();
                if (tokenString == null && lastChar != '\f') {
                    if (Character.isLetterOrDigit(lastChar))
                        sb.append(lastChar);
                }
            } while (tokenString == null && lastChar != '\f' && Character.isLetterOrDigit(lastChar));
            ImmutableList.Builder<Token> builder = ImmutableList.builder();
            if (sb.length() > 0)
                builder.add(Token.of("text", sb.toString()));
            if (tokenString != null)
                builder.add(Token.of(tokenString, tokenText));
            else if (lastChar == '\f')
                builder.add(Token.of("end"));
            else
                builder.add(Token.of("symbol", "" + lastChar));
            return builder.build();
        }

        private String tokenString;
        private String tokenText;
        private char lastChar;

        private void scanInternal() {
            tokenString = null;
            tokenText = null;
            char ch = ps.nextChar();
            if (ch == '~') {
                ch = ps.nextChar();
            } else if (ch == '/' || ch == '[' || ch == ']' || ch == '\\') {
                char ach = ps.nextChar();
                if (ach == ch) {
                    if (ch == '[') {
                        ach = ps.nextChar();
                        if (ach == '=')
                            tokenString = "" + ch + ch + ach;
                        else {
                            tokenString = "" + ch + ch;
                            ps.returnChar(ach);
                        }
                    } else
                        tokenString = "" + ch + ch;
                } else {
                    ps.returnChar(ach);
                    if (ch == '\\')
                        tokenString = "\\";
                }
            } else if (ch == '*' || ch == '-' || ch == '=' || ch == '#') {
                char ach;
                StringBuilder sb = new StringBuilder();
                sb.append(ch);
                while ((ach = ps.nextChar()) == ch)
                    sb.append(ch);
                ps.returnChar(ach);
                if (ch == '-' && sb.length() < 4)
                    for (int i = 0; i < sb.length() - 1; i++)
                        ps.returnChar(ch);
                else
                    tokenString = sb.toString();
            } else if (ch == '|') {
                char ach = ps.nextChar();
                if (ach == '=')
                    tokenString = "|=";
                else {
                    ps.returnChar(ach);
                    tokenString = "|";
                }
            } else if (ch == '(') {
                char ach = ps.nextChar();
                if (ach == '%') {
                    tokenString = "(%";
                    StringBuilder text = new StringBuilder();
                    ach = ps.nextChar();
                    char bch;
                    boolean done = false;
                    while (ach != '\f' && !done) {
                        text.append(ach);
                        bch = ps.nextChar();
                        done = (ach == '%' && bch == ')');
                        ach = bch;
                    }
                    text.delete(text.length() - 1, text.length());
                    tokenText = text.toString();
                } else
                    ps.returnChar(ach);
            } else if (ch == '%') {
                char ach = ps.nextChar();
                if (ach == ')')
                    tokenString = "%)";
                else
                    ps.returnChar(ach);
            } else if (ch == '{' || ch == '}') {
                char ach = ps.nextChar();
                if (ach == ch) {
                    char bch = ps.nextChar();
                    if (bch == ch)
                        tokenString = "" + ch + ch + ch;
                    else {
                        ps.returnChar(bch);
                        tokenString = "" + ch + ch;
                    }
                } else
                    ps.returnChar(ach);
            } else if (Character.isWhitespace(ch) && !('\f' == ch)) {
                char ach = ch;
                char last;
                int lineCount = 0;
                StringBuilder sb = new StringBuilder();
                do {
                    if (Character.isWhitespace(ach))
                        sb.append(ach);
                    last = ach;
                    if (ach == '\n')
                        lineCount++;
                    ach = ps.nextChar();
                } while (Character.isWhitespace(ach) && !('\f' == ach));
                tokenText = sb.toString();
                ps.returnChar(ach);
                ch = ' ';
                if (lineCount >= 1) {
                    tokenString = ((lineCount == 1) ? "\n" : "\n\n") + (last == '\n' ? "" : " ");
                } else
                    tokenString = " ";
            }
            lastChar = ch;
        }
    }

    private static final class Parser {
        private static final Token END_TOKEN = Token.of("end");

        public static List<Node> CREOLE(Tokens tokens) {
            List<Node> result = new LinkedList<Node>();
            while (!END_TOKEN.equals(tokens.current())) {
                while (tokens.current().getType().startsWith("\n"))
                    tokens.moveNext();
                result.add(PARA(tokens));
            }
            return ImmutableList.copyOf(result);
        }

        public static Element PARA(Tokens tokens) {
            Element result = createHTMLElement("p");
            @SuppressWarnings("unchecked")
            List<Node> content = result.content();
            content.addAll(TEXT(tokens));
            return result;
        }

        public static List<Node> TEXT(Tokens tokens) {
            return TEXT(tokens, (String) null);
        }

        public static List<Node> TEXT(Tokens tokens, String mark) {
            if (mark == null)
                return TEXT(tokens, ImmutableList.<Token>of());
            else
                return TEXT(tokens, ImmutableList.of(Token.of(mark)));
        }

        public static List<Node> TEXT(Tokens tokens, List<Token> mark) {
            List<Node> result = new LinkedList<Node>();
            while (!END_TOKEN.equals(tokens.current()) && !tokens.current().getType().startsWith("\n\n")
                    && (mark == null || !mark.contains(tokens.current()))) {
                if (tokens.current().getType().equals("//")) {
                    result.add(EM(tokens));
                } else if (tokens.current().getType().equals("**")) {
                    result.add(STRONG(tokens));
                } else if (tokens.current().getType().equals("(%")) {
                    result.add(INLINE(tokens));
                } else if (tokens.current().getType().startsWith("[[")) {
                    result.add(LINK(tokens));
                } else if (tokens.current().getType().equals("{{")) {
                    result.add(IMAGE(tokens));
                } else if (tokens.current().getType().equals("{{{")) {
                    result.add(NOWIKI(tokens));
                } else if (tokens.current().getType().startsWith("----")) {
                    result.add(HR(tokens));
                } else if ((tokens.current().getType().equals("*") || tokens.current().getType().equals("#"))
                        && isListStart(tokens)) {
                    result.add(LIST(tokens, 1));
                } else if (tokens.current().getType().startsWith("=") && isListStart(tokens)) {
                    result.add(HEADING(tokens));
                } else if ((tokens.current().getType().equals("|=") || tokens.current().getType().equals("|"))
                        && isListStart(tokens)) {
                    result.add(TABLE(tokens));
                } else if (tokens.current().getType().equals("\\\\")) {
                    result.add(BR(tokens));
                } else if (tokens.current().getType().equals("\\")) {
                    result.add(VARIABLE(tokens));
                } else {
                    String text = tokens.current().toString();
                    if (text.equals("\n"))
                        text = " ";
                    result.add(DocumentFactory.getInstance().createText(text));
                    tokens.moveNext();
                }
            }
            if (mark != null && mark.contains(tokens.current()))
                tokens.moveNext();
            return ImmutableList.copyOf(result);
        }

        public static Element EM(Tokens tokens) {
            tokens.moveNext();
            Element result = createHTMLElement("em");
            @SuppressWarnings("unchecked")
            List<Node> content = result.content();
            content.addAll(TEXT(tokens, "//"));
            return result;
        }

        public static Element STRONG(Tokens tokens) {
            tokens.moveNext();
            Element result = createHTMLElement("strong");
            @SuppressWarnings("unchecked")
            List<Node> content = result.content();
            content.addAll(TEXT(tokens, "**"));
            return result;
        }

        public static Element LINK(Tokens tokens) {
            boolean external = tokens.current().getType().endsWith("=");
            tokens.moveNext();
            Element result = createNSElement("a", adaptns);
            boolean inLabel = false;
            StringBuilder link = new StringBuilder();
            List<Node> label = new LinkedList<Node>();
            while (!END_TOKEN.equals(tokens.current()) && !tokens.current().getType().startsWith("\n\n")
                    && !tokens.current().getType().equals("]]")) {
                if (tokens.current().getType().equals("|")) {
                    inLabel = true;
                } else {
                    if (inLabel) {
                        if (tokens.current().getType().equals("\\")) {
                            label.add(VARIABLE(tokens));
                            tokens.movePrevious();
                        } else
                            label.add(DocumentFactory.getInstance().createText(tokens.current().toString()));
                    } else
                        link.append(tokens.current().toString());
                }
                tokens.moveNext();
            }
            if (!END_TOKEN.equals(tokens.current()))
                tokens.moveNext();
            if (label.size() == 0) {
                if (!inLabel || external)
                    label.add(DocumentFactory.getInstance().createText(link.toString()));
                else
                    label.add(createNSElement("variable", adaptns).addAttribute("expr",
                            "${" + link.toString() + "}.getTitle()"));
            }
            if (tokens.current().getType().equals("text")) {
                label.add(DocumentFactory.getInstance().createText(tokens.current().toString()));
                tokens.moveNext();
            }
            if (external) {
                result.addAttribute("href", "?external=" + link.toString());
                result.addAttribute("adapt", "false");
            } else
                result.addAttribute("href", link.toString());
            for (Node node : label)
                result.add(node);
            return result;
        }

        private static final String[] ulStart = { "*", "**", "***", "****", "*****" };
        private static final String[] olStart = { "#", "##", "###", "####", "#####" };

        public static Element LIST(Tokens tokens, int level) {
            String type = (tokens.current().getType().startsWith("*") ? "ul" : "ol");
            Element result = createHTMLElement(type);
            do {
                tokens.moveNext();
                Element li = createHTMLElement("li");
                @SuppressWarnings("unchecked")
                List<Node> content = li.content();
                content.addAll(TEXT(tokens, ImmutableList.of(Token.of("\n"))));
                result.add(li);
                if (!isListPart(tokens))
                    break;
                if (type.equals("ul") ? ulStart[level].equals(tokens.current().getType())
                        : olStart[level].equals(tokens.current().getType())) {
                    result.add(LIST(tokens, level + 1));
                    if (!isListPart(tokens))
                        break;
                }
            } while (type.equals("ul") ? ulStart[level - 1].equals(tokens.current().getType())
                    : olStart[level - 1].equals(tokens.current().getType()));
            return result;
        }

        private static boolean isListStart(Tokens tokens) {
            boolean isStart = !tokens.hasPrevious();
            if (!isStart) {
                tokens.movePrevious();
                isStart = tokens.current().getType().endsWith("\n");
                tokens.moveNext();
            }
            return isStart;
        }

        private static boolean isListPart(Tokens tokens) {
            tokens.movePrevious();
            boolean isPart = tokens.current().getType().equals("\n");
            tokens.moveNext();
            return isPart;
        }

        public static Element HR(Tokens tokens) {
            tokens.moveNext();
            return createHTMLElement("hr");
        }

        private static final List<Token> headingList = ImmutableList.of(Token.of("="), Token.of("=="),
                Token.of("==="), Token.of("===="), Token.of("====="), Token.of("======"), Token.of("\n"),
                Token.of("\n "));

        public static Element HEADING(Tokens tokens) {
            Element result = createHTMLElement("h" + (headingList.indexOf(tokens.current()) + 1));
            tokens.moveNext();
            @SuppressWarnings("unchecked")
            List<Node> content = result.content();
            content.addAll(TEXT(tokens, headingList));
            return result;
        }

        public static Element BR(Tokens tokens) {
            tokens.moveNext();
            return createHTMLElement("br");
        }

        public static Element IMAGE(Tokens tokens) {
            tokens.moveNext();
            Element result = createNSElement("img", xhtmlns);
            boolean inLabel = false;
            StringBuilder link = new StringBuilder();
            StringBuilder label = new StringBuilder();
            while (!END_TOKEN.equals(tokens.current()) && !tokens.current().getType().startsWith("\n\n")
                    && !tokens.current().getType().equals("}}")) {
                if (tokens.current().getType().equals("|")) {
                    inLabel = true;
                } else {
                    if (inLabel)
                        label.append(tokens.current().toString());
                    else
                        link.append(tokens.current().toString());
                }
                tokens.moveNext();
            }
            if (!END_TOKEN.equals(tokens.current()))
                tokens.moveNext();
            if (label.length() == 0)
                label.append(link);
            result.addAttribute("src", link.toString()).addAttribute("alt", label.toString()).addAttribute("title",
                    label.toString());
            return result;
        }

        public static Element NOWIKI(Tokens tokens) {
            boolean pre = isListStart(tokens);
            tokens.moveNext();
            pre &= tokens.current().getType().contains("\n");
            Element result = createHTMLElement((pre ? "pre" : "tt"));
            StringBuilder content = new StringBuilder();
            while (((pre && !(isListStart(tokens) && tokens.current().equals(Token.of("}}}"))))
                    || (!pre && !tokens.current().equals(Token.of("}}}"))))
                    && (!tokens.current().equals(END_TOKEN))) {
                content.append(tokens.current().toString(true));
                tokens.moveNext();
            }
            if (!tokens.current().equals(END_TOKEN))
                tokens.moveNext();
            result.addText(content.toString());
            return result;
        }

        public static Element VARIABLE(Tokens tokens) {
            tokens.moveNext();
            String variable = tokens.current().toString();
            tokens.moveNext();
            Element result = createNSElement("object", adaptns);
            result.addAttribute("name", "_variable/" + variable);
            return result;
        }

        public static Element INLINE(Tokens tokens) {
            String xmlString = tokens.current().getText().trim();
            tokens.moveNext();
            if (xmlString.startsWith("<")) {
                Element result = GaleUtil.parseXML(new StringReader(xmlString)).getRootElement();
                result.detach();
                return result;
            }
            throw new IllegalArgumentException("error in inline code '" + xmlString + "'");
        }

        public static Element TABLE(Tokens tokens) {
            Element result = createHTMLElement("table");
            do {
                Element tr = result.addElement("tr");
                boolean eol = false;
                do {
                    boolean header = ("|=".equals(tokens.current().getType()));
                    tokens.moveNext();
                    eol = tokens.current().getType().contains("\n") || tokens.current().equals(END_TOKEN);
                    if (!eol) {
                        Element td = (header ? tr.addElement("th") : tr.addElement("td"));
                        while (!eol && !"|".equals(tokens.current().getType())
                                && !"|=".equals(tokens.current().getType())) {
                            if (tokens.current().getType().equals("//")) {
                                td.add(EM(tokens));
                            } else if (tokens.current().getType().equals("**")) {
                                td.add(STRONG(tokens));
                            } else if (tokens.current().getType().startsWith("[[")) {
                                td.add(LINK(tokens));
                            } else {
                                td.addText(tokens.current().toString());
                                tokens.moveNext();
                            }
                            eol = tokens.current().getType().contains("\n") || tokens.current().equals(END_TOKEN);
                        }
                    }
                    if (tokens.current().getType().contains("\n"))
                        tokens.moveNext();
                } while (!eol);
            } while (isListPart(tokens) && !tokens.current().equals(END_TOKEN));
            tokens.movePrevious();
            if (!tokens.current().getType().contains("\n\n"))
                tokens.moveNext();
            return result;
        }
    }
}