iristk.cfg.ABNFGrammar.java Source code

Java tutorial

Introduction

Here is the source code for iristk.cfg.ABNFGrammar.java

Source

/*******************************************************************************
 * Copyright (c) 2014 Gabriel Skantze.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/gpl.html
 * 
 * Contributors:
 *     Gabriel Skantze - initial API and implementation
 ******************************************************************************/
package iristk.cfg;

import iristk.util.Language;
import iristk.util.ParsedInputStream;
import iristk.util.StringUtils;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.output.ByteArrayOutputStream;

/**
      
Extensions to ABNF:
    
*         0- repeat
+         1- repeat
_         Matches any word
red_      Matches all words that starts with "red"
     
 */

public class ABNFGrammar extends GrammarModel {

    public ABNFGrammar() {
    }

    public ABNFGrammar(InputStream is) throws IOException, GrammarException {
        readInputStream(is);
    }

    public ABNFGrammar(File file) throws IOException, GrammarException {
        this(file.toURI());
    }

    public ABNFGrammar(URI uri) throws IOException, GrammarException {
        this.uri = uri;
        readInputStream(uri.toURL().openStream());
    }

    public ABNFGrammar(String grammarString) throws IOException, GrammarException {
        readInputStream(new ByteArrayInputStream(grammarString.getBytes(StandardCharsets.UTF_8)));
    }

    public ABNFGrammar(Grammar grammar) {
        super(grammar);
    }

    private void readInputStream(InputStream is) throws IOException, GrammarException {
        boolean parsed = false;
        String rest = null;
        ParsedInputStream reader = new ParsedInputStream(is);
        String line = "";
        while (reader.available() > 0) {
            line += reader.readTo(';');
            int sb = StringUtils.countMatches(line, '{');
            int eb = StringUtils.countMatches(line, '}');
            if (sb == eb) {
                line = line.trim();
                if (line.length() > 0) {
                    line = line.replace("\n", "");
                    line = line.replace("\r", "");
                    if (parseLine(line)) {
                        parsed = true;
                    } else if (rest == null) {
                        rest = line;
                    }
                }
                line = "";
            } else {
                line += ";";
            }
        }
        if (getLanguage() == null)
            setLanguage(Language.ENGLISH_US);
        if (!parsed) {
            if (rest == null && line.length() > 0) {
                rest = line;
            }
            if (rest != null && rest.length() > 0) {
                parseLine("root $root");
                parseLine("$root = " + rest);
            }
        }
    }

    public boolean parseLine(String line) throws IOException, GrammarException {
        if (line.startsWith("language")) {
            setLanguage(new Language(line.substring(line.lastIndexOf(" ")).trim()));
            return true;
        } else if (line.startsWith("root")) {
            setRoot(line.substring(line.lastIndexOf(" ")).trim().substring(1));
            return true;
        } else if (line.startsWith("public $") || line.startsWith("$")) {
            Matcher m = Pattern.compile("(public +)?\\$([^ =]+) *= *(.*)").matcher(line);
            if (m.matches()) {
                boolean isPublic = (m.group(1) != null);
                String ruleId = m.group(2);
                String matchStr = m.group(3);
                addRule(ruleId, isPublic, parseMatches(matchStr));
            }
            return true;
        } else if (line.startsWith("#")) {
            return true;
        } else {
            return false;
        }
    }

    private void checkBalance(String str, char c1, char c2) throws GrammarException {
        if (StringUtils.countMatches(str, c1) != StringUtils.countMatches(str, c2)) {
            throw new GrammarException("Unbalanced '" + c1 + "' and '" + c2 + "' in: " + str);
        }
    }

    private List<Object> parseMatches(String matches) throws IOException, GrammarException {
        //System.out.println(matches);
        checkBalance(matches, '{', '}');
        checkBalance(matches, '(', ')');
        checkBalance(matches, '[', ']');
        checkBalance(matches, '<', '>');

        //System.out.println("#" + matches + "#");
        matches = matches.trim();
        List<String> groups = split(matches, "|");
        if (groups.size() > 1) {
            OneOf oneof = new OneOf();
            for (String gr : groups) {
                List<Object> grl = parseMatches(gr);
                if (grl.size() == 1) {
                    oneof.add(grl.get(0));
                } else {
                    oneof.add(new Item(grl));
                }
            }
            List<Object> result = new ArrayList<Object>();
            result.add(oneof);
            return result;
        }
        List<Object> result = new ArrayList<Object>();
        groups = split(matches, " ");
        if (groups.size() > 1) {
            for (String gr : groups) {
                List<Object> grl = parseMatches(gr);
                if (grl.size() == 1) {
                    result.add(grl.get(0));
                } else {
                    result.add(new Item(grl));
                }
            }
        } else {
            String group = groups.get(0);
            if (group.startsWith("(")) {
                List<Object> tag = null;
                if (group.endsWith("}")) {
                    Matcher m = Pattern.compile("\\{[^\\(\\{\\}\\)]*\\}$").matcher(group);
                    m.find();
                    tag = parseMatches(m.group(0));
                    group = m.replaceFirst("").trim();
                }
                List<Object> list = parseMatches(group.substring(1, group.length() - 1));
                if (list.size() == 1)
                    result.add(list.get(0));
                else
                    result.add(new Item(list));
                if (tag != null)
                    result.addAll(tag);
            } else if (group.startsWith("$<")) {
                String ref = group.replace("$<", "").replace(">", "").trim();
                try {
                    ABNFGrammar include = new ABNFGrammar(uri == null ? new URI(ref) : uri.resolve(ref));
                    include(include);
                    result.add(new RuleRef(include.includeRoot()));
                } catch (URISyntaxException e) {
                    e.printStackTrace();
                }
            } else if (group.startsWith("$")) {
                result.add(new RuleRef(group.substring(1)));
            } else if (group.startsWith("{")) {
                if (!group.endsWith("}"))
                    throw new GrammarException("Bad expression: " + group);
                result.add(new Tag(group.substring(1, group.length() - 1)));
            } else if (group.startsWith("[")) {
                if (!group.endsWith("]"))
                    throw new GrammarException("Bad expression: " + group);
                List<Object> list = parseMatches(group.substring(1, group.length() - 1));
                Item optional = new Item(list);
                optional.setRepeat(0, 1);
                result.add(optional);
            } else {
                result.add(group);
            }
        }
        return result;
    }

    private List<String> split(String string, String split) {
        List<String> result = new ArrayList<String>();
        String group = "";
        int para = 0;
        int cpara = 0;
        int spara = 0;
        for (int i = 0; i < string.length(); i++) {
            String c = string.substring(i, i + 1);
            if (c.equals(split) && para == 0 && cpara == 0 && spara == 0) {
                result.add(group.trim());
                group = "";
            } else {
                if (c.equals("(")) {
                    para++;
                } else if (c.equals(")")) {
                    para--;
                } else if (c.equals("{")) {
                    cpara++;
                } else if (c.equals("}")) {
                    cpara--;
                } else if (c.equals("[")) {
                    spara++;
                } else if (c.equals("]")) {
                    spara--;
                }
                group += c;
            }
        }
        result.add(group.trim());
        return result;
    }

    public static String matchToString(Grammar grammar, Object match) {
        if (grammar.isItem(match)) {
            String content = matchToString(grammar, grammar.getMatches(match));
            int min = grammar.getMinRepeat(match);
            int max = grammar.getMaxRepeat(match);
            if (min == 1 && max == 1) {
                return "(" + content + ")";
            } else if (min == 0 && max == 1) {
                return "[" + content + "]";
            } else {
                return "(" + content + ") " + "<" + min + "-" + (max == INFINITY ? "" : max) + ">";
            }
        } else if (grammar.isOneOf(match)) {
            String result = "";
            boolean first = true;
            for (Object part : grammar.getMatches(match)) {
                if (!first)
                    result += " | ";
                result += matchToString(grammar, part);
                first = false;
            }
            return "(" + result.trim() + ")";
        } else if (grammar.isRuleRef(match)) {
            return "$" + grammar.getRuleRef(match);
        } else if (grammar.isWord(match)) {
            return grammar.getWordString(match);
        } else if (grammar.isTag(match)) {
            return "{" + grammar.getTagScript(match) + "}";
        } else if (match instanceof List) {
            String result = "";
            for (Object part : (List) match) {
                result += matchToString(grammar, part) + " ";
            }
            return result.trim();
        } else {
            throw new RuntimeException("Cannot convert " + match);
        }
    }

    @Override
    public void marshal(OutputStream out) {
        PrintStream ps = new PrintStream(out);
        ps.println("#ABNF 1.0 UTF-8;\n");
        if (getLanguage() != null)
            ps.println("language " + getLanguage() + ";");
        if (getRoot() != null)
            ps.println("root $" + getRoot() + ";");
        ps.println();
        for (Object rule : getRules()) {
            if (isPublic(rule))
                ps.print("public ");
            ps.println("$" + getRuleId(rule) + " = " + matchToString(this, getMatches(rule)) + ";");
            ps.println();
        }
    }

    @Override
    public String toString() {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        marshal(baos);
        try {
            return new String(baos.toByteArray(), "utf-8");
        } catch (UnsupportedEncodingException e) {
            return e.getMessage();
        }
    }

    public static void main(String[] args) throws Exception {
        new ABNFGrammar("okay").marshal(System.out);
        //new ABNFGrammar(new SrgsGrammar(new File("app/example/src/app/example/grammar.xml"))).marshal(System.out);;
        //new SrgsGrammar(new ABNFGrammar(new File("app/social/name_grammar.abnf"))).marshal(System.out);;
        //ContextFreeGrammar cfg = new ABNFGrammar(new File("app/chess/src/iristk/app/chess/ChessGrammar.abnf"));
        //Grammar cfg = new SrgsGrammar(new ABNFGrammar(new File("app/chess/src/iristk/app/chess/ChessGrammar.abnf")));
        //Grammar cfg = new ABNFGrammar(new SrgsGrammar(new File("app/chess/src/iristk/app/chess/ChessGrammar.xml")));
        //cfg.marshal(System.out);
    }
}