Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package emp.cloud.pigutils; import java.io.BufferedReader; import java.io.StringReader; import java.util.HashSet; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.pig.ExecType; import org.apache.pig.Main; import org.apache.pig.PigException; import org.apache.pig.PigRunner.ReturnCode; import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileLocalizer; import org.apache.pig.impl.util.LogUtils; import org.apache.pig.impl.util.ObjectSerializer; import org.apache.pig.impl.util.PropertiesUtil; import org.apache.pig.impl.util.UDFContext; import org.apache.pig.tools.grunt.Grunt; import org.apache.pig.tools.pigstats.PigProgressNotificationListener; import org.apache.pig.tools.pigstats.PigStatsUtil; import org.apache.pig.tools.pigstats.ScriptState; /** * Based on {@link Main}, helper class for pig script execution. * * @author <a href="https://github.com/e-m-p">Mateusz Pytel</a> * @since 0.1 */ class EmbeddedPigRunner { private final static Log log = LogFactory.getLog(EmbeddedPigRunner.class); private static final String PROP_FILT_SIMPL_OPT = "pig.exec.filterLogicExpressionSimplifier"; static int executeScript(Configuration hadoopConfig, PigProgressNotificationListener listener, String taskName, String script) throws Throwable { boolean verbose = false; boolean gruntCalled = false; String logFileName = null; try { Properties properties = new Properties(); PropertiesUtil.loadDefaultProperties(properties); properties.putAll(ConfigurationUtil.toProperties(hadoopConfig)); HashSet<String> optimizerRules = new HashSet<String>(); ExecType execType = ExecType.MAPREDUCE; if (properties.getProperty("aggregate.warning") == null) { // by default warning aggregation is on properties.setProperty("aggregate.warning", "" + true); } if (properties.getProperty("opt.multiquery") == null) { // by default multiquery optimization is on properties.setProperty("opt.multiquery", "" + true); } if (properties.getProperty("stop.on.failure") == null) { // by default we keep going on error on the backend properties.setProperty("stop.on.failure", "" + false); } // set up client side system properties in UDF context UDFContext.getUDFContext().setClientSystemProps(properties); // create the context with the parameter PigContext pigContext = new PigContext(execType, properties); // create the static script state object ScriptState scriptState = ScriptState.start("", pigContext); if (listener != null) { scriptState.registerListener(listener); } if (!Boolean.valueOf(properties.getProperty(PROP_FILT_SIMPL_OPT, "false"))) { // turn off if the user has not explicitly turned on this // optimization optimizerRules.add("FilterLogicExpressionSimplifier"); } if (optimizerRules.size() > 0) { pigContext.getProperties().setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules)); } if (properties.get("udf.import.list") != null) PigContext.initializeImportList((String) properties.get("udf.import.list")); PigContext.setClassLoader(pigContext.createCl(null)); pigContext.getProperties().setProperty(PigContext.JOB_NAME, taskName); Grunt grunt = null; BufferedReader in; scriptState.setScript(script); in = new BufferedReader(new StringReader(script)); grunt = new Grunt(in, pigContext); gruntCalled = true; int results[] = grunt.exec(); return getReturnCodeForStats(results); // } catch (Exception e) { // if (e instanceof PigException) { // PigException pe = (PigException) e; // int rc = (pe.retriable()) ? ReturnCode.RETRIABLE_EXCEPTION // : ReturnCode.PIG_EXCEPTION; // PigStatsUtil.setErrorCode(pe.getErrorCode()); // } // PigStatsUtil.setErrorMessage(e.getMessage()); // // if (!gruntCalled) { // LogUtils.writeLog(e, logFileName, log, verbose, // "Error before Pig is launched"); // } // FileLocalizer.deleteTempFiles(); // // if (!gruntCalled) { // LogUtils.writeLog(e, logFileName, log, verbose, // "Error before Pig is launched"); // } // throw e; // } catch (Throwable e) { // PigStatsUtil.setErrorMessage(e.getMessage()); // throw new IllegalStateException(e); } finally { // clear temp files FileLocalizer.deleteTempFiles(); } } private static int getReturnCodeForStats(int[] stats) { return (stats[1] == 0) ? ReturnCode.SUCCESS // no failed jobs : (stats[0] == 0) ? ReturnCode.FAILURE // no succeeded jobs : ReturnCode.PARTIAL_FAILURE; // some jobs have failed } }