emp.cloud.pigutils.EmbeddedPigRunner.java Source code

Java tutorial

Introduction

Here is the source code for emp.cloud.pigutils.EmbeddedPigRunner.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package emp.cloud.pigutils;

import java.io.BufferedReader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.ExecType;
import org.apache.pig.Main;
import org.apache.pig.PigException;
import org.apache.pig.PigRunner.ReturnCode;
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.util.LogUtils;
import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.PropertiesUtil;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.tools.grunt.Grunt;
import org.apache.pig.tools.pigstats.PigProgressNotificationListener;
import org.apache.pig.tools.pigstats.PigStatsUtil;
import org.apache.pig.tools.pigstats.ScriptState;

/**
 * Based on {@link Main}, helper class for pig script execution.
 * 
 * @author <a href="https://github.com/e-m-p">Mateusz Pytel</a>
 * @since 0.1
 */
class EmbeddedPigRunner {

    private final static Log log = LogFactory.getLog(EmbeddedPigRunner.class);

    private static final String PROP_FILT_SIMPL_OPT = "pig.exec.filterLogicExpressionSimplifier";

    static int executeScript(Configuration hadoopConfig, PigProgressNotificationListener listener, String taskName,
            String script) throws Throwable {

        boolean verbose = false;
        boolean gruntCalled = false;
        String logFileName = null;

        try {
            Properties properties = new Properties();
            PropertiesUtil.loadDefaultProperties(properties);
            properties.putAll(ConfigurationUtil.toProperties(hadoopConfig));

            HashSet<String> optimizerRules = new HashSet<String>();
            ExecType execType = ExecType.MAPREDUCE;

            if (properties.getProperty("aggregate.warning") == null) {
                // by default warning aggregation is on
                properties.setProperty("aggregate.warning", "" + true);
            }

            if (properties.getProperty("opt.multiquery") == null) {
                // by default multiquery optimization is on
                properties.setProperty("opt.multiquery", "" + true);
            }

            if (properties.getProperty("stop.on.failure") == null) {
                // by default we keep going on error on the backend
                properties.setProperty("stop.on.failure", "" + false);
            }

            // set up client side system properties in UDF context
            UDFContext.getUDFContext().setClientSystemProps(properties);

            // create the context with the parameter
            PigContext pigContext = new PigContext(execType, properties);

            // create the static script state object

            ScriptState scriptState = ScriptState.start("", pigContext);

            if (listener != null) {
                scriptState.registerListener(listener);
            }

            if (!Boolean.valueOf(properties.getProperty(PROP_FILT_SIMPL_OPT, "false"))) {
                // turn off if the user has not explicitly turned on this
                // optimization
                optimizerRules.add("FilterLogicExpressionSimplifier");
            }

            if (optimizerRules.size() > 0) {
                pigContext.getProperties().setProperty("pig.optimizer.rules",
                        ObjectSerializer.serialize(optimizerRules));
            }

            if (properties.get("udf.import.list") != null)
                PigContext.initializeImportList((String) properties.get("udf.import.list"));

            PigContext.setClassLoader(pigContext.createCl(null));
            pigContext.getProperties().setProperty(PigContext.JOB_NAME, taskName);

            Grunt grunt = null;
            BufferedReader in;
            scriptState.setScript(script);
            in = new BufferedReader(new StringReader(script));

            grunt = new Grunt(in, pigContext);
            gruntCalled = true;
            int results[] = grunt.exec();

            return getReturnCodeForStats(results);

            //      } catch (Exception e) {
            //         if (e instanceof PigException) {
            //            PigException pe = (PigException) e;
            //            int rc = (pe.retriable()) ? ReturnCode.RETRIABLE_EXCEPTION
            //                  : ReturnCode.PIG_EXCEPTION;
            //            PigStatsUtil.setErrorCode(pe.getErrorCode());
            //         }
            //         PigStatsUtil.setErrorMessage(e.getMessage());
            //
            //         if (!gruntCalled) {
            //            LogUtils.writeLog(e, logFileName, log, verbose,
            //                  "Error before Pig is launched");
            //         }
            //         FileLocalizer.deleteTempFiles();
            //
            //         if (!gruntCalled) {
            //            LogUtils.writeLog(e, logFileName, log, verbose,
            //                  "Error before Pig is launched");
            //         }
            //         throw e;
            //      } catch (Throwable e) {
            //         PigStatsUtil.setErrorMessage(e.getMessage());
            //         throw new IllegalStateException(e);
        } finally {
            // clear temp files
            FileLocalizer.deleteTempFiles();
        }
    }

    private static int getReturnCodeForStats(int[] stats) {
        return (stats[1] == 0) ? ReturnCode.SUCCESS // no failed jobs
                : (stats[0] == 0) ? ReturnCode.FAILURE // no succeeded jobs
                        : ReturnCode.PARTIAL_FAILURE; // some jobs have failed
    }

}