Java tutorial
/* * Copyright (C) 2017 Baifendian Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.baifendian.swordfish.execserver.runner.flow; import com.baifendian.swordfish.common.config.BaseConfig; import com.baifendian.swordfish.common.enums.ExternalJobType; import com.baifendian.swordfish.common.hadoop.HdfsClient; import com.baifendian.swordfish.common.job.struct.node.BaseParam; import com.baifendian.swordfish.common.job.struct.node.BaseParamFactory; import com.baifendian.swordfish.common.mail.EmailManager; import com.baifendian.swordfish.common.utils.graph.DAGGraph; import com.baifendian.swordfish.common.utils.graph.Graph; import com.baifendian.swordfish.dao.DaoFactory; import com.baifendian.swordfish.dao.FlowDao; import com.baifendian.swordfish.dao.enums.ExecType; import com.baifendian.swordfish.dao.enums.FailurePolicyType; import com.baifendian.swordfish.dao.enums.FlowStatus; import com.baifendian.swordfish.dao.model.ExecutionFlow; import com.baifendian.swordfish.dao.model.ExecutionNode; import com.baifendian.swordfish.dao.model.FlowNode; import com.baifendian.swordfish.dao.model.FlowNodeRelation; import com.baifendian.swordfish.dao.model.flow.FlowDag; import com.baifendian.swordfish.dao.utils.json.JsonUtil; import com.baifendian.swordfish.execserver.exception.ExecTimeoutException; import com.baifendian.swordfish.execserver.job.JobContext; import com.baifendian.swordfish.execserver.parameter.ParamHelper; import com.baifendian.swordfish.execserver.parameter.SystemParamManager; import com.baifendian.swordfish.execserver.runner.node.NodeRunner; import com.baifendian.swordfish.execserver.utils.EnvHelper; import com.baifendian.swordfish.execserver.utils.LoggerUtil; import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CancellationException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * flow <p> */ public class FlowRunner implements Runnable { private final Logger logger = LoggerFactory.getLogger(getClass()); private static final String JOB_PREFIX = "FLOW"; /** * {@link FlowDao} */ private final FlowDao flowDao; /** * {@link ExecutionFlow} */ private final ExecutionFlow executionFlow; /** * {@link ExecutorService} */ private final ExecutorService nodeExecutorService; /** * ??, node name => ExecutionNode */ private final Map<String, ExecutionNode> executionNodeMap = new ConcurrentHashMap<>(); /** * ? nodeRunner */ private Map<NodeRunner, Future<Boolean>> activeNodeRunners = new ConcurrentHashMap<>(); /** * ? */ private final FailurePolicyType failurePolicyType; /** * ? */ private final int maxTryTimes; /** * ?, ??: */ private final int timeout; /** * ? (ms) */ private final long startTime; /** * ?? */ private volatile boolean shutdown; /** * @param context */ public FlowRunner(FlowRunnerContext context) { this.flowDao = DaoFactory.getDaoInstance(FlowDao.class); this.executionFlow = context.getExecutionFlow(); this.nodeExecutorService = context.getNodeExecutorService(); this.maxTryTimes = context.getMaxTryTimes(); this.timeout = context.getTimeout(); this.failurePolicyType = context.getFailurePolicyType(); this.startTime = executionFlow.getStartTime().getTime(); this.shutdown = false; } /** * ? */ @Override public void run() { // ??? ExecutionFlow newExecutionFlow = flowDao.queryExecutionFlow(executionFlow.getId()); // if (newExecutionFlow != null) { if (newExecutionFlow.getStatus().typeIsFinished()) { logger.info("flow is done: {}", executionFlow.getId()); return; } // ? // flowDao.deleteExecutionNodes(executionFlow.getId()); } else { // ? logger.info("flow is not exist: {}", executionFlow.getId()); return; } FlowStatus status = null; // String execLocalPath = BaseConfig.getFlowExecDir(executionFlow.getProjectId(), executionFlow.getFlowId(), executionFlow.getId()); logger.info("exec id:{}, current execution dir:{}, max try times:{}, timeout:{}, failure policy type:{}", executionFlow.getId(), execLocalPath, maxTryTimes, timeout, failurePolicyType); // ????, if (StringUtils.isEmpty(executionFlow.getWorkflowDataSub())) { Map<String, String> systemParamMap = SystemParamManager.buildSystemParam(executionFlow.getType(), executionFlow.getScheduleTime()); // ?, ${abc} = ${sf.system.bizdate}, $[yyyyMMdd] Map<String, String> customParamMap = executionFlow.getUserDefinedParamMap(); Map<String, String> allParamMap = new HashMap<>(); if (systemParamMap != null) { allParamMap.putAll(systemParamMap); } if (customParamMap != null) { allParamMap.putAll(customParamMap); } executionFlow.setWorkflowDataSub( ParamHelper.resolvePlaceholders(executionFlow.getWorkflowData(), allParamMap)); flowDao.updateExecutionFlowDataSub(executionFlow); } // try { // EnvHelper.workDirAndUserCreate(execLocalPath, executionFlow.getProxyUser(), logger); // ??, DAG ? FlowDag flowDag = JsonUtil.parseObject(executionFlow.getWorkflowData(), FlowDag.class); // workflow ? exec String workflowHdfsFile = BaseConfig.getHdfsWorkflowFilename(executionFlow.getProjectId(), executionFlow.getWorkflowName()); HdfsClient hdfsClient = HdfsClient.getInstance(); if (hdfsClient.exists(workflowHdfsFile)) { logger.info("get hdfs workflow file:{}", workflowHdfsFile); String destPath = execLocalPath + File.separator + executionFlow.getWorkflowName() + ".zip"; logger.info("Copy hdfs workflow: {} to local: {}", workflowHdfsFile, destPath); HdfsClient.getInstance().copyHdfsToLocal(workflowHdfsFile, destPath, false, true); // ?? workflow workflowName.zip File zipFile = new File(destPath); if (zipFile.exists()) { String cmd = String.format("unzip -o %s -d %s", destPath, execLocalPath); logger.info("call cmd:{}", cmd); Process process = Runtime.getRuntime().exec(cmd); int ret = process.waitFor(); if (ret != 0) { logger.error("run cmd error:{}", cmd); logger.error(IOUtils.toString(process.getErrorStream(), Charset.forName("UTF-8"))); } } else { logger.error("can't found workflow zip file:{}", zipFile.getPath()); } } else { logger.debug("hdfs workflow file:{} not exists", workflowHdfsFile); } // ???? "?" ? List<String> projectRes = genProjectResFiles(flowDag); // hdfs ?? EnvHelper.copyResToLocal(executionFlow.getProjectId(), execLocalPath, projectRes, logger); // ? Dag, Graph<String, FlowNode, FlowNodeRelation> dagGraph = genDagGraph(flowDag); // flow, ?? status = runFlow(dagGraph); } catch (ExecTimeoutException e) { logger.error("Exec flow timeout", e); clean(true); } catch (Exception e) { logger.error(String.format("run exec id: %s", executionFlow.getId()), e); clean(true); } finally { // if (status == null) { updateExecutionFlow(FlowStatus.FAILED); } else { // ExecutionFlow updateExecutionFlow(status); } // ?? postProcess(); } } /** * ?flow DAG <p> * * @return DAG */ private Graph<String, FlowNode, FlowNodeRelation> genDagGraph(FlowDag flowDag) { Graph<String, FlowNode, FlowNodeRelation> dagGraph = new DAGGraph<>(); if (CollectionUtils.isNotEmpty(flowDag.getNodes())) { for (FlowNode node : flowDag.getNodes()) { dagGraph.addVertex(node.getName(), node); } } if (CollectionUtils.isNotEmpty(flowDag.getEdges())) { for (FlowNodeRelation edge : flowDag.getEdges()) { dagGraph.addEdge(edge.getStartNode(), edge.getEndNode()); } } return dagGraph; } /** * ?? */ private List<String> genProjectResFiles(FlowDag flowDag) throws IllegalArgumentException, InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException { List<FlowNode> nodes = flowDag.getNodes(); Set<String> projectFiles = new HashSet<>(); for (FlowNode node : nodes) { // ?? BaseParam baseParam = BaseParamFactory.getBaseParam(node.getType(), node.getParameter()); // ??? if (baseParam != null) { List<String> projectResourceFiles = baseParam.getProjectResourceFiles(); if (projectResourceFiles != null) { projectFiles.addAll(projectResourceFiles); } } } return new ArrayList<>(projectFiles); } /** * ? DAG: 1. ? start 2. , ??? 3. , ?, , END, , 4 4. * , ?, ?, ?, 5 5. ?, ??, ??, 2 6. ?, SUCCESS <p> * END: ??, ; ?? */ private FlowStatus runFlow(Graph<String, FlowNode, FlowNodeRelation> dagGraph) { // ??, ?? Semaphore semaphore = new Semaphore(0); // dagGraph ??, ?? try { for (String nodeName : dagGraph.topologicalSort()) { ExecutionNode executionNode = flowDao.queryExecutionNode(executionFlow.getId(), nodeName); // ? if (executionNode != null && executionNode.getStatus().typeIsFinished()) { dagGraph.removeVertex(nodeName); } } } catch (Exception e) { logger.error("Get topological of graph failed.", e); return FlowStatus.FAILED; } // Collection<String> startVertex = dagGraph.getStartVertex(); // ?? for (String nodeName : startVertex) { if (!executionNodeMap.containsKey(nodeName)) { // ? ExecutionNode executionNode = insertExecutionNode(executionFlow, nodeName); // executionNodeMap.put(nodeName, executionNode); // ?? submitNodeRunner(dagGraph.getVertex(nodeName), executionNode, semaphore); } } // ? FlowStatus status = FlowStatus.SUCCESS; // ?, while (!activeNodeRunners.isEmpty()) { boolean acquire = false; try { // , , acquire = semaphore.tryAcquire(calcNodeTimeout(), TimeUnit.SECONDS); } catch (InterruptedException e) { logger.error(e.getMessage(), e); } catch (ExecTimeoutException e) { logger.error(e.getMessage(), e); } // ?, ? if (!acquire) { clean(true); return FlowStatus.FAILED; } // ?, ? boolean done = false; while (!done) { // ? try { Thread.sleep(50); } catch (InterruptedException e) { logger.error(e.getMessage(), e); } // ??, ?? for (Map.Entry<NodeRunner, Future<Boolean>> entry : activeNodeRunners.entrySet()) { NodeRunner nodeRunner = entry.getKey(); Future<Boolean> future = entry.getValue(); // ? if (future.isDone()) { // ? done = true; // , activeNodeRunners.remove(nodeRunner); Boolean value = false; Date now = new Date(); try { value = future.get(); } catch (CancellationException e) { logger.error("task has been cancel"); // ? clean(true); return FlowStatus.KILL; } catch (InterruptedException e) { logger.error(e.getMessage(), e); } catch (ExecutionException e) { logger.error(e.getMessage(), e); } // if (!value) { // ?, ??? ExecutionNode executionNode = executionNodeMap.get(nodeRunner.getNodename()); // , 2, ?? 2 if (executionNode.getAttempt() < maxTryTimes) { executionNode.incAttempt(); // ? flowDao.updateExecutionNode(executionNode); // ??? submitNodeRunner(dagGraph.getVertex(nodeRunner.getNodename()), executionNode, semaphore); } else { // ?? status = FlowStatus.FAILED; executionNode.setEndTime(now); executionNode.setStatus(status); // ? flowDao.updateExecutionNode(executionNode); if (failurePolicyType == FailurePolicyType.END) { clean(true); return status; } } } else { // ? // ? ExecutionNode executionNode = executionNodeMap.get(nodeRunner.getNodename()); executionNode.setEndTime(now); executionNode.setStatus(FlowStatus.SUCCESS); flowDao.updateExecutionNode(executionNode); // ?, ?, ??? for (String nodeName : dagGraph.getPostNode(nodeRunner.getNodename())) { if (!executionNodeMap.containsKey(nodeName) && isPreNodesAllSuccess(dagGraph.getPreNode(nodeName))) { // ? ExecutionNode newExecutionNode = insertExecutionNode(executionFlow, nodeName); // executionNodeMap.put(nodeName, newExecutionNode); // ?? submitNodeRunner(dagGraph.getVertex(nodeName), newExecutionNode, semaphore); } } } break; } } } } return status; } /** * ?, , */ private ExecutionNode insertExecutionNode(ExecutionFlow executionFlow, String nodeName) { ExecutionNode executionNode = flowDao.queryExecutionNode(executionFlow.getId(), nodeName); if (executionNode != null) { return executionNode; } // ? executionNode = new ExecutionNode(); Date now = new Date(); executionNode.setExecId(executionFlow.getId()); executionNode.setName(nodeName); executionNode.setAttempt(0); executionNode.setStartTime(now); executionNode.setStatus(FlowStatus.INIT); executionNode.setJobId(LoggerUtil.genJobId(JOB_PREFIX, executionFlow.getId(), nodeName)); logger.info("insert execution node, id: {}, name: {}, start time: {}, status: {}, job id: {}", executionNode.getExecId(), nodeName, now, FlowStatus.INIT, LoggerUtil.genJobId(JOB_PREFIX, executionFlow.getId(), nodeName)); // ? flowDao.insertExecutionNode(executionNode); return executionNode; } /** * ?? NodeRunner */ private void submitNodeRunner(FlowNode flowNode, ExecutionNode executionNode, Semaphore semaphore) { JobContext jobContext = new JobContext(); jobContext.setExecutionFlow(executionFlow); jobContext.setExecutionNode(executionNode); jobContext.setFlowNode(flowNode); jobContext.setSemaphore(semaphore); // node runner NodeRunner nodeRunner = new NodeRunner(jobContext); Future<Boolean> future = nodeExecutorService.submit(nodeRunner); activeNodeRunners.putIfAbsent(nodeRunner, future); } /** * s <p> * * @return */ private int calcNodeTimeout() { int usedTime = (int) ((System.currentTimeMillis() - startTime) / 1000); int remainTime = timeout - usedTime; if (remainTime <= 0) { throw new ExecTimeoutException("workflow execution time out"); } return remainTime; } /** * ExecutionFlow <p> */ private void updateExecutionFlow(FlowStatus status) { // , shutdown , ? if (shutdown && status == FlowStatus.KILL && (executionFlow.getType() == ExecType.COMPLEMENT_DATA || executionFlow.getType() == ExecType.SCHEDULER)) { return; } Date now = new Date(); // ?? if (executionFlow.getStatus().typeIsNotFinished()) { executionFlow.setEndTime(now); executionFlow.setStatus(status); flowDao.updateExecutionFlow(executionFlow); } } /** * ExecutionFlow <p> ?, ??, ?. */ public void updateExecutionFlowToKillStatus(boolean updateKilled) { ExecutionFlow queryExecutionFlow = flowDao.queryExecutionFlow(executionFlow.getId()); if (updateKilled || (queryExecutionFlow.getType() != ExecType.SCHEDULER && queryExecutionFlow.getType() != ExecType.COMPLEMENT_DATA)) { updateToKilled(queryExecutionFlow); } } /** * kill ? * * @param executionFlow : flow */ private void updateToKilled(ExecutionFlow executionFlow) { if (executionFlow.getStatus().typeIsNotFinished()) { Date now = new Date(); executionFlow.setEndTime(now); executionFlow.setStatus(FlowStatus.KILL); flowDao.updateExecutionFlow(executionFlow); } } /** * */ private void updateNodeToKilled(ExecutionNode executionNode) { Date now = new Date(); executionNode.setStatus(FlowStatus.KILL); executionNode.setEndTime(now); flowDao.updateExecutionNode(executionNode); } /** * , ?? */ public void clean(boolean updateKilled) { // kill ? kill(); // ? updateUnfinishNodeStatus(updateKilled); } /** * */ public void shutdown() { this.shutdown = true; } /** * ?, ? */ private void updateUnfinishNodeStatus(boolean updateKilled) { Date now = new Date(); // ??? for (Map.Entry<NodeRunner, Future<Boolean>> entry : activeNodeRunners.entrySet()) { NodeRunner nodeRunner = entry.getKey(); Future<Boolean> future = entry.getValue(); // if (!future.isDone()) { // ?, ??, ??? if (updateKilled || (nodeRunner.getExecType() != ExecType.SCHEDULER && nodeRunner.getExecType() != ExecType.COMPLEMENT_DATA)) { ExecutionNode executionNode = nodeRunner.getExecutionNode(); updateNodeToKilled(executionNode); } } else { // ?? Boolean value = false; try { value = future.get(); if (value) { ExecutionNode executionNode = nodeRunner.getExecutionNode(); executionNode.setStatus(FlowStatus.SUCCESS); executionNode.setEndTime(now); flowDao.updateExecutionNode(executionNode); } } catch (InterruptedException e) { logger.error(e.getMessage(), e); } catch (ExecutionException e) { logger.error(e.getMessage(), e); } catch (CancellationException e) { // ? logger.error("task has been cancel, name:{}", nodeRunner.getNodename()); } catch (Exception e) { logger.error(e.getMessage(), e); } finally { if (!value) { if (updateKilled || (nodeRunner.getExecType() != ExecType.SCHEDULER && nodeRunner.getExecType() != ExecType.COMPLEMENT_DATA)) { ExecutionNode executionNode = nodeRunner.getExecutionNode(); updateNodeToKilled(executionNode); } } } } } } /** * kill */ private void kill() { synchronized (this) { if (activeNodeRunners.isEmpty()) { return; } logger.info("Kill has been called on exec id: {}, num: {}", executionFlow.getId(), activeNodeRunners.size()); // ? for (Map.Entry<NodeRunner, Future<Boolean>> entry : activeNodeRunners.entrySet()) { NodeRunner nodeRunner = entry.getKey(); Future<Boolean> future = entry.getValue(); if (!future.isDone()) { // kill ? logger.info("kill exec, id: {}, node: {}", executionFlow.getId(), nodeRunner.getNodename()); // ? nodeRunner.kill(); // , future.cancel(true); } } } } /** * flow ?? <p> */ private void postProcess() { logger.info("Develop mode is: {}", BaseConfig.isDevelopMode()); if (!BaseConfig.isDevelopMode()) { // ?, ?, ?? String execLocalPath = BaseConfig.getFlowExecDir(executionFlow.getProjectId(), executionFlow.getFlowId(), executionFlow.getId()); try { FileUtils.deleteDirectory(new File(execLocalPath)); } catch (IOException e) { logger.error(String.format("delete exec dir exception: %s", execLocalPath), e); } // ?, ? udf hdfsCleanUp(BaseConfig.getJobHiveUdfJarPath(executionFlow.getId(), ExternalJobType.WORKFLOW)); // ?, ? import/export hdfsCleanUp(BaseConfig.getHdfsImpExpDir(executionFlow.getProjectId(), executionFlow.getId())); } EmailManager.sendMessageOfExecutionFlow(executionFlow); } /** * ? hdfs */ private void hdfsCleanUp(String path) { try { if (HdfsClient.getInstance().exists(path)) { HdfsClient.getInstance().delete(path, true); } } catch (Exception e) { logger.error(String.format("cleanup hdfs dir exception: %s", path), e); } } /** * ?? OK */ private boolean isPreNodesAllSuccess(Set<String> preNodes) { // ?? if (CollectionUtils.isEmpty(preNodes)) { return true; } for (String preNode : preNodes) { ExecutionNode preFinishedNode = executionNodeMap.get(preNode); // if (preFinishedNode == null || preFinishedNode.getStatus().typeIsNotFinished()) { return false; } // , ? if (!preFinishedNode.getStatus().typeIsSuccess()/* && failurePolicyType == FailurePolicyType.END*/) { return false; } } return true; } }