Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.openflamingo.engine.handler; import org.openflamingo.core.exception.FileSystemException; import org.openflamingo.core.exception.WorkflowException; import org.openflamingo.engine.history.ActionHistoryService; import org.openflamingo.engine.scheduler.JobVariable; import org.openflamingo.engine.util.ManagedProcess; import org.openflamingo.model.rest.ActionHistory; import org.openflamingo.model.rest.HadoopCluster; import org.openflamingo.model.workflow.Pig; import org.openflamingo.model.workflow.Variable; import org.openflamingo.util.ExceptionUtils; import org.openflamingo.util.FileSystemUtils; import org.openflamingo.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.helpers.MessageFormatter; import java.io.*; import java.util.*; /** * Apache Pig Handler. * * @author Byoung Gon, Kim * @since 0.2 */ public class PigHandler extends ELSupportHandler<Pig> { private Logger logger = LoggerFactory.getLogger(PigHandler.class); private Pig pig; public static final int STRING_BUFFER_SIZE = 100; private static final String JOB_ID_LOG_PREFIX = "HadoopJobId: "; /** * Apache Hadoop Cluster */ private HadoopCluster hadoopCluster; public PigHandler(Object pig) { this.pig = (Pig) pig; } @Override void before() { this.hadoopCluster = (HadoopCluster) this.getJobDataMap().get(JobVariable.HADOOP_CLUSTER); } @Override void after() { } @Override void executeInternal() { log(true, "-------------------------------------------"); log(true, "Apache Pig Job"); log(true, "-------------------------------------------"); log(true, "Workflow ID : {}", this.actionContext.getWorkflowContext().getWorkflowId()); log(true, "Workflow Name : {}", this.actionContext.getWorkflowContext().getWorkflowDomain().getWorkflowName()); log(true, "Action ID : {}", this.actionContext.getActionId()); log(true, "Action Name : {}", this.actionContext.getActionDescription()); log(true, "Base Path : {}", actionBasePath); log(true, "-------------------------------------------"); if (globalVariables != null) { if (globalVariables.size() > 0) { log(true, "Global Variables :"); Set<Object> keys = globalVariables.keySet(); for (Iterator<Object> iterator = keys.iterator(); iterator.hasNext();) { String key = (String) iterator.next(); log(true, "\t{} = {}", key, globalVariables.getProperty(key)); } } } // Pig Runner? ? List<String> params = new ArrayList<String>(); StringBuilder udfBuilder = new StringBuilder(); if (pig.getUdfJar() != null) { List<String> udfJars = pig.getUdfJar(); for (String udfJar : udfJars) { ArtifactLoader artifactLoader = ArtifactLoaderFactory .getArtifactLoader(actionContext.getWorkflowContext()); String jarPath = artifactLoader.load(udfJar.trim()); udfBuilder.append("REGISTER ").append(jarPath).append(";").append("\n"); } } Properties varProps = new Properties(); if (pig.getVariables() != null && pig.getVariables().getVariable().size() > 0) { List<Variable> vars = pig.getVariables().getVariable(); for (Variable var : vars) { varProps.put(var.getName(), get(var.getValue())); } } // Pig Script evaluation. String pigScript = pig.getScript(); String evaluatedScript = udfBuilder.toString() + get(pigScript, varProps); log(true, "Pig Script : \n{}", evaluatedScript); // Pig Script ??? . String pigScriptPath = actionBasePath + "/script.pig"; FileSystemUtils.saveToFile(evaluatedScript.getBytes(), pigScriptPath); FileSystemUtils.saveToFile(getHadoopSiteXml().getBytes(), actionBasePath + "/core-site.xml"); params.add("-file"); params.add(pigScriptPath); logger.debug("Pig Script '{}'? .", pigScriptPath); String propertiesPath = actionBasePath + "/pig.properties"; String props = getPropertyFile(pig); FileSystemUtils.saveToFile(props.getBytes(), propertiesPath); params.add("-propertyFile"); params.add(propertiesPath); logger.debug("Properties ?? '{}'? .", propertiesPath); // Pig? Log4J Properties ?? ??? . String log4jPath = getLog4JPropertiesPath(actionBasePath, logPath); actionContext.setValue("basePath", actionBasePath); actionContext.setValue("logPath", logPath); actionContext.setValue("scriptPath", pigScriptPath); actionContext.setValue("propertyPath", propertiesPath); actionContext.setValue("script", pigScriptPath); /* params.add("-log4jconf"); params.add(log4jPath); params.add("-logfile"); params.add(logPath); */ logger.debug("Pig Log4J ?({})? ?.", log4jPath); if (pig.getVariables() != null && pig.getVariables().getVariable().size() > 0) { List<Variable> variables = pig.getVariables().getVariable(); log(true, "Variables :"); for (Variable var : variables) { log(true, "\t{} = {}", var.getName(), get(var.getValue())); } } log(true, "-------------------------------------------"); log(true, "Hadoop Cluster :"); log(true, "\tfs.default.name = {}", hadoopCluster.getHdfsUrl()); log(true, "\tmapred.job.tracker = {}", hadoopCluster.getJobTrackerIP() + ":" + hadoopCluster.getJobTrackerPort()); log(true, "-------------------------------------------"); log(true, "Environments :"); Map<String, String> environment = getEnv(); Set<String> envsKeySet = environment.keySet(); for (String key : envsKeySet) { log(true, "\t{} = {}", key, environment.get(key)); } log(true, "-------------------------------------------"); log(true, "Java System Properties : "); Properties properties = System.getProperties(); Set<Object> keys = properties.keySet(); for (Object key : keys) { log(true, "\t{} = {}", key, get((String) properties.get(key))); } String[] args = params.toArray(new String[params.size()]); List<String> command = buildCommand(); for (String arg : args) { command.add(arg); } log(true, "-------------------------------------------"); String commandline = StringUtils.collectionToDelimitedString(command, " "); FileSystemUtils.saveToFile(commandline.getBytes(), actionBasePath + "/command.sh"); log(true, "Command : {}", commandline); log(true, "-------------------------------------------"); // Update Action History ActionHistory actionHistory = getActionHistory(); actionHistory.setLogPath(logPath); actionHistory.setCommand(commandline); actionHistory.setScript(evaluatedScript); actionContext.getWorkflowContext().getBean(ActionHistoryService.class).update(actionHistory); actionContext.setObject(JobVariable.ACTION_HISTORY, actionHistory); try { ManagedProcess managedProcess = new ManagedProcess(command, getEnv(), actionBasePath, logger, fileWriter); managedProcess.run(); if (new File(logPath).exists()) { handleError(logPath); String jobIds = getHadoopJobIds(logPath); if (!StringUtils.isEmpty(jobIds)) { actionContext.setValue("hadoopJobIds", jobIds); log(true, " Pig Hadoop Job ID {} .", jobIds); } } } catch (Exception e) { if (new File(logPath).exists()) { handleError(logPath); String jobIds = getHadoopJobIds(logPath); if (!StringUtils.isEmpty(jobIds)) { actionContext.setValue("hadoopJobIds", jobIds); log(true, " Pig Hadoop Job ID {} .", jobIds); } } String cause = getFailedCause(logPath); throw new WorkflowException(ExceptionUtils.getMessage("{}", cause), e); } } private String getFailedCause(String logFile) { try { StringBuilder builder = new StringBuilder(STRING_BUFFER_SIZE); if (!new File(logFile).exists()) { logger.warn( "Pig ?({})? ?? ??? ? .", logFile); return builder.toString(); } BufferedReader br = new BufferedReader(new FileReader(logFile)); String line = br.readLine(); String separator = ""; while (line != null) { String prefix = "org.apache.pig.impl.logicalLayer.FrontendException:"; if (line.contains(prefix)) { int start = line.indexOf(prefix) + prefix.length(); String cause = line.substring(start).trim(); builder.append(separator).append(cause); break; } line = br.readLine(); } br.close(); return builder.toString(); } catch (Exception ex) { throw new FileSystemException( ExceptionUtils.getMessage("Cannot load a log file '{}' of Apache Pig.", logFile), ex); } } /** * Hadoop MapReduce Job? ??? . * * @return Hadoop MapReduce Job? ?? */ private List<String> buildCommand() { List<String> command = new LinkedList<String>(); command.add("/bin/bash"); command.add(getFlamingoConf("pig.home") + "/bin/pig"); return command; } private String getHadoopSiteXml() { StringBuilder builder = new StringBuilder(); builder.append("<configuration>"); builder.append(MessageFormatter.format("<property><name>fs.default.name</name><value>{}</value></property>", hadoopCluster.getHdfsUrl()).getMessage()); builder.append( MessageFormatter .format("<property><name>mapred.job.tracker</name><value>{}</value></property>", hadoopCluster.getJobTrackerIP() + ":" + hadoopCluster.getJobTrackerPort()) .getMessage()); if (pig.getConfiguration() != null && pig.getConfiguration().getVariable() != null) { List<Variable> vars = pig.getConfiguration().getVariable(); for (Variable var : vars) { builder.append(MessageFormatter.format("<property><name>{}</name><value>{}</value></property>", var.getName(), get(var.getValue())).getMessage()); } } builder.append("</configuration>"); return builder.toString(); } private String getPropertyFile(Pig pig) { Properties props = new Properties(); props.put("fs.default.name", hadoopCluster.getHdfsUrl()); props.put("mapred.job.tracker", hadoopCluster.getJobTrackerIP() + ":" + hadoopCluster.getJobTrackerPort()); if (pig.getVariables() != null && pig.getVariables().getVariable().size() > 0) { List<Variable> vars = pig.getVariables().getVariable(); for (Variable variable : vars) { props.put(variable.getName(), get(variable.getValue())); } } if (pig.getConfiguration() != null && pig.getConfiguration().getVariable() != null) { List<Variable> vars = pig.getConfiguration().getVariable(); for (Variable var : vars) { props.put(var.getName(), get(var.getValue())); } } StringWriter writer = new StringWriter(); props.list(new PrintWriter(writer)); String properties = writer.getBuffer().toString(); logger.debug("Properties ?? ? ? .\n{}", properties); return properties; } /** * Action? ? Pig ? Log4J Properties ?? . * * @param actionBasePath ? * @param logPath ?? * @return Log4J Properties ?? */ private String getLog4JPropertiesPath(String actionBasePath, String logPath) { String propertiesPath = actionBasePath + "/log4j.properties"; try { Properties props = new Properties(); props.setProperty("log4j.logger.org.apache.pig", "INFO, B, stdout"); props.setProperty("log4j.logger.org.apache.hadoop", "INFO, B, stdout"); props.setProperty("log4j.logger.org.apache.commons", "INFO, B, stdout"); props.setProperty("log4j.appender.B", "org.apache.log4j.FileAppender"); props.setProperty("log4j.appender.B.file", logPath); props.setProperty("log4j.appender.B.layout", "org.apache.log4j.PatternLayout"); props.setProperty("log4j.appender.B.layout.ConversionPattern", "%d %-5p [%c] %m%n"); props.setProperty("log4j.appender.stdout", "org.apache.log4j.ConsoleAppender"); props.setProperty("log4j.appender.stdout.layout", "org.apache.log4j.PatternLayout"); props.setProperty("log4j.appender.stdout.layout.ConversionPattern", "%d %-5p [%c] %m%n"); OutputStream os = new FileOutputStream(propertiesPath); props.store(os, ""); os.close(); return propertiesPath; } catch (Exception ex) { throw new FileSystemException(ExceptionUtils.getMessage( "Action? Log4J ? '{}'? .", actionBasePath), ex); } } /** * Pig ?? ? ? ?? ? . * * @param pigLogPath Pig Log ?? */ private void handleError(String pigLogPath) { try { String log = FileSystemUtils.loadFromFile(pigLogPath); logger.warn("Pig ?? ? ? .\n{}", log); } catch (Exception ex) { throw new FileSystemException( ExceptionUtils.getMessage("Pig ? '{}'? .", pigLogPath), ex); } } /** * Pig ?? Hadoop Job ID . * * @param logFile Pig ? * @return comma-separated ? */ public static String getHadoopJobIds(String logFile) { try { StringBuilder builder = new StringBuilder(STRING_BUFFER_SIZE); if (!new File(logFile).exists()) { return builder.toString(); } BufferedReader br = new BufferedReader(new FileReader(logFile)); String line = br.readLine(); String separator = ","; while (line != null) { if (line.contains(JOB_ID_LOG_PREFIX)) { int jobIdStarts = line.indexOf(JOB_ID_LOG_PREFIX) + JOB_ID_LOG_PREFIX.length(); String jobId = line.substring(jobIdStarts); int jobIdEnds = jobId.indexOf(" "); if (jobIdEnds > -1) { jobId = jobId.substring(0, jobId.indexOf(" ")); } if (builder.length() > 0) { builder.append(separator); } builder.append(jobId); } line = br.readLine(); } br.close(); return builder.toString(); } catch (Exception ex) { throw new FileSystemException( ExceptionUtils.getMessage("Pig ?({})? .", logFile), ex); } } @Override public Pig getModel() { return pig; } /** * Java Virtual Machine? ? Key Value . * * @return Key Value ? ? ? */ private Map<String, String> getEnv() { Map<String, String> envs = new HashMap(); envs.putAll(System.getenv()); envs.put("HADOOP_CONF_DIR", actionBasePath); if (getFlamingoConf("java.home") != null) envs.put("JAVA_HOME", getFlamingoConf("java.home")); if (getFlamingoConf("hadoop.home") != null) envs.put("HADOOP_HOME", getFlamingoConf("hadoop.home")); if (getFlamingoConf("pig.home") != null) envs.put("PIG_HOME", getFlamingoConf("pig.home")); if (getFlamingoConf("hive.home") != null) envs.put("HIVE_HOME", getFlamingoConf("hive.home")); if (getFlamingoConf("hadoop.user.name") != null && !StringUtils.isEmpty(getFlamingoConf("hadoop.user.name"))) { envs.put("HADOOP_USER_NAME", getFlamingoConf("hadoop.user.name")); } return envs; } }