edu.buffalo.cse.pigout.parser.PigOutMacro.java Source code

Java tutorial

Introduction

Here is the source code for edu.buffalo.cse.pigout.parser.PigOutMacro.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.buffalo.cse.pigout.parser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.impl.PigContext;

import org.apache.pig.parser.AliasMasker;
import org.apache.pig.parser.ParserException;
import org.apache.pig.parser.PigParserNode;
import org.apache.pig.parser.PigParserNode.InvocationPoint;
import org.apache.pig.parser.QueryLexer;
import org.apache.pig.parser.QueryParser;

import org.apache.pig.parser.QueryParserStringStream;
import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor;
import org.apache.pig.tools.parameters.PreprocessorContext;

class PigOutMacro {

    private static final Log LOG = LogFactory.getLog(PigOutMacro.class);

    private String fileName;
    private String name;
    private String body;
    private List<String> params;
    private List<String> rets;
    private Map<String, PigOutMacro> seen;
    private Set<String> macroStack;
    private PigContext pigContext;
    private long idx = 0;

    // The start line number of this macro in the script
    private int startLine = 0;

    PigOutMacro(String name, String file, List<String> params, List<String> returns, String body,
            Map<String, PigOutMacro> seen) {
        this.name = name;
        this.params = (params == null) ? new ArrayList<String>() : params;
        this.rets = (returns == null) ? new ArrayList<String>() : returns;
        this.fileName = file;
        this.body = body;
        this.seen = seen;
        this.macroStack = new HashSet<String>();
        LOG.debug("Macro '" + name + "' is defined");
    }

    String getName() {
        return name;
    }

    void setStack(Set<String> stack) {
        macroStack = stack;
    }

    Set<String> getStack() {
        return macroStack;
    }

    void setStartLine(int start) {
        this.startLine = start;
    }

    int getStartLine() {
        return startLine;
    }

    void setPigContext(PigContext pigContext) {
        this.pigContext = pigContext;
    }
    /*
    private CommonTree inline(String[] inputs, String[] outputs, CommonTree t,
        String file) throws ParserException {
    String in = substituteParams(inputs, outputs, t.getLine(), file);
        
    Set<String> masks = new HashSet<String>();
    if (inputs != null) {
        for (String s : inputs) {
            masks.add(s);
        }
    }
        
    for (String s : outputs) {
        masks.add(s);
    }
        
    return maskAlias(in, masks, t, file);
    }*/

    private String substituteParams(String[] inputs, String[] outputs, int line, String file)
            throws ParserException {
        if ((inputs == null && !params.isEmpty()) || (inputs != null && inputs.length != params.size())) {
            String msg = getErrorMessage(file, line, "Failed to expand macro '" + name + "'",
                    "Expected number of parameters: " + params.size() + " actual number of inputs: "
                            + ((inputs == null) ? 0 : inputs.length));
            throw new ParserException(msg);
        }
        boolean isVoidReturn = false;
        if (rets.isEmpty()) {
            if (outputs != null && outputs.length > 0) {
                String msg = getErrorMessage(file, line, "Cannot expand macro '" + name + "'",
                        "Expected number of return aliases: 0" + " actual number of return values: "
                                + outputs.length);
                throw new ParserException(msg);
            }
            isVoidReturn = true;
        }

        if (!isVoidReturn
                && ((outputs == null && !rets.isEmpty()) || (outputs != null && outputs.length != rets.size()))) {
            String msg = getErrorMessage(file, line, "Failed to expand macro '" + name + "'",
                    "Expected number of return aliases: " + rets.size() + " actual number of return values: "
                            + ((outputs == null) ? 0 : outputs.length));
            throw new ParserException(msg);
        }

        String[] args = new String[params.size()];
        for (int i = 0; i < params.size(); i++) {
            if (inputs[i].startsWith("$"))
                inputs[i] = "\\\\" + inputs[i];
            args[i] = params.get(i) + "=" + inputs[i];
        }
        if (!isVoidReturn) {
            String[] args1 = new String[params.size() + rets.size()];
            System.arraycopy(args, 0, args1, 0, params.size());
            args = args1;
            for (int i = 0; i < rets.size(); i++) {
                args[params.size() + i] = rets.get(i) + "=" + outputs[i];
            }
        }

        StringWriter writer = new StringWriter();
        BufferedReader in = new BufferedReader(new StringReader(body));

        try {
            PreprocessorContext pc = new PreprocessorContext(50);
            pc.loadParamVal(Arrays.asList(args), null);

            Map<String, String> paramVal = pc.getParamVal();
            for (Map.Entry<String, String> e : pigContext.getParamVal().entrySet()) {
                if (paramVal.containsKey(e.getKey())) {
                    throw new ParserException("Macro contains argument or return value " + e.getKey()
                            + " which conflicts " + "with a Pig parameter of the same name.");
                } else {
                    paramVal.put(e.getKey(), e.getValue());
                }
            }

            ParameterSubstitutionPreprocessor psp = new ParameterSubstitutionPreprocessor(pc);
            psp.genSubstitutedFile(in, writer);
        } catch (Exception e) {
            // catch both ParserException and RuntimeException
            String msg = getErrorMessage(file, line, "Macro inline failed for macro '" + name + "'",
                    e.getMessage() + "\n Macro content: " + body);
            throw new ParserException(msg);
        }

        LOG.debug("--- after substition:\n" + writer.toString());

        return writer.toString();
    }

    /*
    private CommonTree maskAlias(String in, Set<String> masks, CommonTree tree,
        String file) throws ParserException {
        
    // this is the MACRO_INLINE node. the real line number is in the 
    // macro name node
    int line = tree.getChild(0).getLine();
        
    CharStream input = null;
    try {
        // parse macro body into ast 
        input = new QueryParserStringStream(in, file);
    } catch (IOException e) {
        String msg = getErrorMessage(file, line, "Failed to inline macro '"
                + name + "'", e.getMessage() + "\nmacro content: " + in);
        throw new ParserException(msg);
    }
        
    QueryLexer lex = new QueryLexer(input);
    CommonTokenStream tokens = new  CommonTokenStream(lex);
            
    QueryParser.query_return result = null;
    QueryParser parser = QueryParserUtils.createParser(tokens, startLine-1);
        
    try {
        result = parser.query();
    } catch (RecognitionException e) {
        String msg = (fileName == null) ? parser.getErrorHeader(e)
                : QueryParserUtils.generateErrorHeader(e, fileName);
        msg += " " + parser.getErrorMessage(e, parser.getTokenNames());
        String msg2 = getErrorMessage(file, line, "Failed to parse macro '"
                + name + "'", msg + "\nmacro content: " + in);
        throw new ParserException(msg2);
    }
        
    CommonTree ast = (CommonTree)result.getTree();
        
    LOG.debug("AST for macro '" + name + "':\n" + ast.toStringTree());
            
    List<CommonTree> macroDefNodes = new ArrayList<CommonTree>();
    traverseMacro(ast, macroDefNodes, "MACRO_DEF");
    if (!macroDefNodes.isEmpty()) {
        String fname = ((PigParserNode)ast).getFileName();
        String msg = getErrorMessage(fname, ast.getLine(),
                "Invalide macro definition", "macro '" + name
                        + "' contains macro definition.\nmacro content: "
                        + body);
        throw new ParserException(msg);
    }
        
    // add macro invocation points to the expanded macro tree
    PigParserNode pnode = (PigParserNode)tree;
    List<InvocationPoint> invStack = pnode.getInvocationStack();
    List<InvocationPoint> newInvStack = (invStack == null) ? new ArrayList<InvocationPoint>()
            : new ArrayList<InvocationPoint>(invStack);
        
    InvocationPoint pt = new InvocationPoint(line, file, name);
    newInvStack.add(pt);
    setInvocationStack(ast, newInvStack);
        
    // recursively expand the inline macros
    List<CommonTree> inlineNodes = new ArrayList<CommonTree>();
    traverseMacro(ast, inlineNodes, "MACRO_INLINE");
        
    for (CommonTree t : inlineNodes) {
        CommonTree newTree = macroInline(t,
                new ArrayList<PigOutMacro>(seen.values()), macroStack, pigContext);
        QueryParserUtils.replaceNodeWithNodeList(t, newTree, null);
    }
        
    // mask the aliases in the inlined macro 
    CommonTreeNodeStream nodes = new CommonTreeNodeStream(ast);
    AliasMasker walker = new AliasMasker(nodes);
    walker.setParams(masks, name, idx++);
        
    AliasMasker.query_return result2 = null;
    CommonTree commonTree = null;
        
    try {
        result2 = walker.query();
    } catch (RecognitionException e) {
        String msg = walker.getErrorHeader(e) + " "
                + walker.getErrorMessage(e, walker.getTokenNames());
        String msg2 = getErrorMessage(file, line, "Failed to mask macro '"
                + name + "'", msg + "\nmacro content: " + in);
        throw new ParserException(msg2);
    }
        
    commonTree = result2.tree;
        
    LOG.debug("AST for masked macro '" + name + "':\n"
            + commonTree.toStringTree());
        
    return commonTree;
    }
    */

    private static void setInvocationStack(Tree ast, List<InvocationPoint> stack) {
        PigParserNode node = (PigParserNode) ast;
        node.setInvocationStack(stack);
        int n = node.getChildCount();
        for (int i = 0; i < n; i++) {
            setInvocationStack(node.getChild(i), stack);
        }
    }

    /*
     * Validates that return alias exists in the macro body.
     */
    void validate() throws IOException {
        if (rets.isEmpty()) {
            return;
        }

        HashSet<String> testSet = new HashSet<String>();

        StreamTokenizer st = new StreamTokenizer(new StringReader(body));

        st.wordChars('.', '.');
        st.wordChars('0', '9');
        st.wordChars('_', '_');
        st.wordChars('$', '$');
        st.lowerCaseMode(false);
        st.ordinaryChar('/');
        st.slashStarComments(true);

        while (st.nextToken() != StreamTokenizer.TT_EOF) {
            if (matchWord(st, "define", false) && matchDollarAlias(st, true)) {
                testSet.add(st.sval.substring(1));
            } else if (matchDollarAlias(st, false)) {
                String prevWord = st.sval;
                if (matchWord(st, "if", true) || matchWord(st, "otherwise", true)) {
                    testSet.add(prevWord.substring(1));
                } else if (matchChar(st, '=', true) && !matchChar(st, '=', true)) {
                    testSet.add(prevWord.substring(1));
                } else if (matchChar(st, ',', true)) {
                    // possible mult-alias inlining of a macro
                    ArrayList<String> mlist = new ArrayList<String>();
                    mlist.add(prevWord);
                    if (isMultiValueReturn(st, mlist, true)) {
                        for (String s : mlist) {
                            testSet.add(s.substring(1));
                        }
                    }
                }
            } else if (matchChar(st, '-', false) && matchChar(st, '-', true)) {
                skipSingleLineComment(st);
            }
        }

        for (String s : rets) {
            if (!testSet.contains(s)) {
                throw new IOException("Macro '" + name + "' missing return alias: " + s);
            }
        }
    }

    // check for multi-value return pattern: alias, alias, ..., alias =
    private static boolean isMultiValueReturn(StreamTokenizer st, List<String> mlist, boolean comma)
            throws IOException {
        int lookahead = st.nextToken();
        if ((comma && lookahead == StreamTokenizer.TT_WORD) || (!comma && matchChar(st, ',', false))) {
            if (matchDollarAlias(st, false)) {
                mlist.add(st.sval);
            }
            return isMultiValueReturn(st, mlist, !comma);
        }
        if (!comma && lookahead == '=' && !matchChar(st, '=', true)) {
            return true;
        }
        return false;
    }

    private static boolean matchDollarAlias(StreamTokenizer st, boolean next) throws IOException {
        int type = next ? st.nextToken() : st.ttype;
        if (type == StreamTokenizer.TT_WORD && st.sval.charAt(0) == '$' && st.sval.length() > 1) {
            return true;
        }
        if (next)
            st.pushBack();
        return false;
    }

    private static boolean matchWord(StreamTokenizer st, String word, boolean next) throws IOException {
        int type = next ? st.nextToken() : st.ttype;
        if (type == StreamTokenizer.TT_WORD && st.sval.equalsIgnoreCase(word)) {
            return true;
        }
        if (next)
            st.pushBack();

        return false;
    }

    private static boolean matchChar(StreamTokenizer st, int c, boolean next) throws IOException {
        int type = next ? st.nextToken() : st.ttype;
        if (type == c)
            return true;
        if (next)
            st.pushBack();
        return false;
    }

    private static void skipSingleLineComment(StreamTokenizer st) throws IOException {
        int lineNo = st.lineno();
        int lookahead = st.nextToken();
        while (lookahead != StreamTokenizer.TT_EOF && lookahead != '\n') {
            if (st.lineno() > lineNo)
                break;
            lookahead = st.nextToken();
        }
        st.pushBack();
    }

    private static void traverseMacro(Tree t, List<CommonTree> nodes, String nodeType) {
        if (t.getText().equals(nodeType)) {
            nodes.add((CommonTree) t);
        }
        int n = t.getChildCount();
        for (int i = 0; i < n; i++) {
            Tree t0 = t.getChild(i);
            traverseMacro(t0, nodes, nodeType);
        }
    }

    /*
     * Macro inline nodes have the following form:
     * 
     * (MACRO_INLINE <name> (RETURN_VAL <values>) (PARAMS <values>)) 
     * 
     * Child nodes:
     *      0: macro name
     *      1: list of return values
     *      2: list of parameters
     */
    /*
    static CommonTree macroInline(CommonTree t,
                              List<PigOutMacro> macroDefs, Set<String> macroStack,
                              PigContext pigContext)
                              throws ParserException {
    // get name
    String mn = t.getChild(0).getText();
        
    // get macroDef
    PigOutMacro macro = null;
    for (PigOutMacro pm : macroDefs) {
        if (pm.getName().equals(mn)) {
            macro = pm;
            break;
        }
    }
        
    String file = ((PigParserNode)t).getFileName();
        
    if (macro == null) {
        String msg = getErrorMessage(file, t.getLine(),
                "Cannot expand macro '" + mn + "'",
                "Macro must be defined before expansion.");
        throw new ParserException(msg);
    }
        
    if (macroStack.contains(macro.name)) {
        String msg = getErrorMessage(file, t.getLine(),
                "Cannot expand macro '" + mn + "'",
                  "Macro can't be defined circularly.");
        throw new ParserException(msg);
    }
        
    // set nested macro call stack
    Set<String> newStack = new HashSet<String>(macroStack);
    newStack.add(macro.name);
    macro.setStack(newStack);
        
    // inform the macro of the PigContext
    // so it can substitute parameters from the main pigscript
    macro.setPigContext(pigContext);
        
    // get return values
    int n = t.getChild(1).getChildCount();
    String[] rets = new String[n];
    for (int i = 0; i < n; i++) {
        rets[i] = t.getChild(1).getChild(i).getText();
    }
        
    // get parameters
    int m = t.getChild(2).getChildCount();
    String[] params = new String[m];
    for (int i = 0; i < m; i++) {
        params[i] = t.getChild(2).getChild(i).getText();
    }
        
    return macro.inline(params, rets, t, file);
    }
    */

    private static String getErrorMessage(String file, int line, String header, String reason) {
        StringBuilder sb = new StringBuilder();
        sb.append("<");
        if (file != null) {
            sb.append("file ").append(file).append(", ");
        }
        sb.append("line ").append(line).append("> ").append(header);
        if (reason != null) {
            sb.append(". Reason: ").append(reason);
        }
        return sb.toString();
    }
}