com.baifendian.swordfish.execserver.runner.flow.FlowRunner.java Source code

Java tutorial

Introduction

Here is the source code for com.baifendian.swordfish.execserver.runner.flow.FlowRunner.java

Source

/*
 * Copyright (C) 2017 Baifendian Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *          http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.baifendian.swordfish.execserver.runner.flow;

import com.baifendian.swordfish.common.config.BaseConfig;
import com.baifendian.swordfish.common.enums.ExternalJobType;
import com.baifendian.swordfish.common.hadoop.HdfsClient;
import com.baifendian.swordfish.common.job.struct.node.BaseParam;
import com.baifendian.swordfish.common.job.struct.node.BaseParamFactory;
import com.baifendian.swordfish.common.mail.EmailManager;
import com.baifendian.swordfish.common.utils.graph.DAGGraph;
import com.baifendian.swordfish.common.utils.graph.Graph;
import com.baifendian.swordfish.dao.DaoFactory;
import com.baifendian.swordfish.dao.FlowDao;
import com.baifendian.swordfish.dao.enums.ExecType;
import com.baifendian.swordfish.dao.enums.FailurePolicyType;
import com.baifendian.swordfish.dao.enums.FlowStatus;
import com.baifendian.swordfish.dao.model.ExecutionFlow;
import com.baifendian.swordfish.dao.model.ExecutionNode;
import com.baifendian.swordfish.dao.model.FlowNode;
import com.baifendian.swordfish.dao.model.FlowNodeRelation;
import com.baifendian.swordfish.dao.model.flow.FlowDag;
import com.baifendian.swordfish.dao.utils.json.JsonUtil;
import com.baifendian.swordfish.execserver.exception.ExecTimeoutException;
import com.baifendian.swordfish.execserver.job.JobContext;
import com.baifendian.swordfish.execserver.parameter.ParamHelper;
import com.baifendian.swordfish.execserver.parameter.SystemParamManager;
import com.baifendian.swordfish.execserver.runner.node.NodeRunner;
import com.baifendian.swordfish.execserver.utils.EnvHelper;
import com.baifendian.swordfish.execserver.utils.LoggerUtil;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * flow  <p>
 */
public class FlowRunner implements Runnable {

    private final Logger logger = LoggerFactory.getLogger(getClass());

    private static final String JOB_PREFIX = "FLOW";

    /**
     * {@link FlowDao}
     */
    private final FlowDao flowDao;

    /**
     * {@link ExecutionFlow}
     */
    private final ExecutionFlow executionFlow;

    /**
     * {@link ExecutorService}
     */
    private final ExecutorService nodeExecutorService;

    /**
     * ??, node name => ExecutionNode
     */
    private final Map<String, ExecutionNode> executionNodeMap = new ConcurrentHashMap<>();

    /**
     * ? nodeRunner
     */
    private Map<NodeRunner, Future<Boolean>> activeNodeRunners = new ConcurrentHashMap<>();

    /**
     * ?
     */
    private final FailurePolicyType failurePolicyType;

    /**
     * ?
     */
    private final int maxTryTimes;

    /**
     * ?, ??: 
     */
    private final int timeout;

    /**
     * ? (ms)
     */
    private final long startTime;

    /**
     * ??
     */
    private volatile boolean shutdown;

    /**
     * @param context
     */
    public FlowRunner(FlowRunnerContext context) {
        this.flowDao = DaoFactory.getDaoInstance(FlowDao.class);
        this.executionFlow = context.getExecutionFlow();
        this.nodeExecutorService = context.getNodeExecutorService();
        this.maxTryTimes = context.getMaxTryTimes();
        this.timeout = context.getTimeout();
        this.failurePolicyType = context.getFailurePolicyType();
        this.startTime = executionFlow.getStartTime().getTime();
        this.shutdown = false;
    }

    /**
     * ?
     */
    @Override
    public void run() {
        // ???
        ExecutionFlow newExecutionFlow = flowDao.queryExecutionFlow(executionFlow.getId());

        // 
        if (newExecutionFlow != null) {
            if (newExecutionFlow.getStatus().typeIsFinished()) {
                logger.info("flow is done: {}", executionFlow.getId());
                return;
            }

            // ?
            // flowDao.deleteExecutionNodes(executionFlow.getId());
        } else { // ?
            logger.info("flow is not exist: {}", executionFlow.getId());
            return;
        }

        FlowStatus status = null;

        // 
        String execLocalPath = BaseConfig.getFlowExecDir(executionFlow.getProjectId(), executionFlow.getFlowId(),
                executionFlow.getId());

        logger.info("exec id:{}, current execution dir:{}, max try times:{}, timeout:{}, failure policy type:{}",
                executionFlow.getId(), execLocalPath, maxTryTimes, timeout, failurePolicyType);

        // ????, 
        if (StringUtils.isEmpty(executionFlow.getWorkflowDataSub())) {
            Map<String, String> systemParamMap = SystemParamManager.buildSystemParam(executionFlow.getType(),
                    executionFlow.getScheduleTime());

            // ?,  ${abc} = ${sf.system.bizdate}, $[yyyyMMdd] 
            Map<String, String> customParamMap = executionFlow.getUserDefinedParamMap();

            Map<String, String> allParamMap = new HashMap<>();

            if (systemParamMap != null) {
                allParamMap.putAll(systemParamMap);
            }

            if (customParamMap != null) {
                allParamMap.putAll(customParamMap);
            }

            executionFlow.setWorkflowDataSub(
                    ParamHelper.resolvePlaceholders(executionFlow.getWorkflowData(), allParamMap));

            flowDao.updateExecutionFlowDataSub(executionFlow);
        }

        // 
        try {
            // 
            EnvHelper.workDirAndUserCreate(execLocalPath, executionFlow.getProxyUser(), logger);

            // ??,  DAG ?
            FlowDag flowDag = JsonUtil.parseObject(executionFlow.getWorkflowData(), FlowDag.class);

            //  workflow ? exec 
            String workflowHdfsFile = BaseConfig.getHdfsWorkflowFilename(executionFlow.getProjectId(),
                    executionFlow.getWorkflowName());
            HdfsClient hdfsClient = HdfsClient.getInstance();

            if (hdfsClient.exists(workflowHdfsFile)) {
                logger.info("get hdfs workflow file:{}", workflowHdfsFile);

                String destPath = execLocalPath + File.separator + executionFlow.getWorkflowName() + ".zip";
                logger.info("Copy hdfs workflow: {} to local: {}", workflowHdfsFile, destPath);

                HdfsClient.getInstance().copyHdfsToLocal(workflowHdfsFile, destPath, false, true);

                // ?? workflow  workflowName.zip
                File zipFile = new File(destPath);
                if (zipFile.exists()) {
                    String cmd = String.format("unzip -o %s -d %s", destPath, execLocalPath);

                    logger.info("call cmd:{}", cmd);

                    Process process = Runtime.getRuntime().exec(cmd);
                    int ret = process.waitFor();
                    if (ret != 0) {
                        logger.error("run cmd error:{}", cmd);
                        logger.error(IOUtils.toString(process.getErrorStream(), Charset.forName("UTF-8")));
                    }
                } else {
                    logger.error("can't found workflow zip file:{}", zipFile.getPath());
                }
            } else {
                logger.debug("hdfs workflow file:{} not exists", workflowHdfsFile);
            }

            // ???? "?" ?
            List<String> projectRes = genProjectResFiles(flowDag);

            //  hdfs ??
            EnvHelper.copyResToLocal(executionFlow.getProjectId(), execLocalPath, projectRes, logger);

            // ? Dag, 
            Graph<String, FlowNode, FlowNodeRelation> dagGraph = genDagGraph(flowDag);

            //  flow, ??
            status = runFlow(dagGraph);
        } catch (ExecTimeoutException e) {
            logger.error("Exec flow timeout", e);
            clean(true);
        } catch (Exception e) {
            logger.error(String.format("run exec id: %s", executionFlow.getId()), e);
            clean(true);
        } finally {
            // 
            if (status == null) {
                updateExecutionFlow(FlowStatus.FAILED);
            } else {
                //  ExecutionFlow
                updateExecutionFlow(status);
            }

            // ??
            postProcess();
        }
    }

    /**
     * ?flow DAG <p>
     *
     * @return DAG
     */
    private Graph<String, FlowNode, FlowNodeRelation> genDagGraph(FlowDag flowDag) {
        Graph<String, FlowNode, FlowNodeRelation> dagGraph = new DAGGraph<>();

        if (CollectionUtils.isNotEmpty(flowDag.getNodes())) {
            for (FlowNode node : flowDag.getNodes()) {
                dagGraph.addVertex(node.getName(), node);
            }
        }

        if (CollectionUtils.isNotEmpty(flowDag.getEdges())) {
            for (FlowNodeRelation edge : flowDag.getEdges()) {
                dagGraph.addEdge(edge.getStartNode(), edge.getEndNode());
            }
        }

        return dagGraph;
    }

    /**
     * ??
     */
    private List<String> genProjectResFiles(FlowDag flowDag) throws IllegalArgumentException,
            InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException {
        List<FlowNode> nodes = flowDag.getNodes();
        Set<String> projectFiles = new HashSet<>();

        for (FlowNode node : nodes) {
            // ??
            BaseParam baseParam = BaseParamFactory.getBaseParam(node.getType(), node.getParameter());

            // ???
            if (baseParam != null) {
                List<String> projectResourceFiles = baseParam.getProjectResourceFiles();
                if (projectResourceFiles != null) {
                    projectFiles.addAll(projectResourceFiles);
                }
            }
        }

        return new ArrayList<>(projectFiles);
    }

    /**
     * ? DAG: 1. ? start  2. , ??? 3. , ?, ,  END, ,  4 4.
     * , ?, ?, ?,  5 5. ?, ??, ??,  2 6. ?,  SUCCESS <p>
     * END: ??, ; ??
     */

    private FlowStatus runFlow(Graph<String, FlowNode, FlowNodeRelation> dagGraph) {
        // ??, ??
        Semaphore semaphore = new Semaphore(0);

        //  dagGraph ??, ??
        try {
            for (String nodeName : dagGraph.topologicalSort()) {
                ExecutionNode executionNode = flowDao.queryExecutionNode(executionFlow.getId(), nodeName);

                // ?
                if (executionNode != null && executionNode.getStatus().typeIsFinished()) {
                    dagGraph.removeVertex(nodeName);
                }
            }
        } catch (Exception e) {
            logger.error("Get topological of graph failed.", e);
            return FlowStatus.FAILED;
        }

        // 
        Collection<String> startVertex = dagGraph.getStartVertex();

        // ??
        for (String nodeName : startVertex) {
            if (!executionNodeMap.containsKey(nodeName)) {
                // ?
                ExecutionNode executionNode = insertExecutionNode(executionFlow, nodeName);

                // 
                executionNodeMap.put(nodeName, executionNode);

                // ??
                submitNodeRunner(dagGraph.getVertex(nodeName), executionNode, semaphore);
            }
        }

        // ?
        FlowStatus status = FlowStatus.SUCCESS;

        // ?, 
        while (!activeNodeRunners.isEmpty()) {
            boolean acquire = false;

            try {
                // , , 
                acquire = semaphore.tryAcquire(calcNodeTimeout(), TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                logger.error(e.getMessage(), e);
            } catch (ExecTimeoutException e) {
                logger.error(e.getMessage(), e);
            }

            // ?, ?
            if (!acquire) {
                clean(true);
                return FlowStatus.FAILED;
            }

            // ?, ?
            boolean done = false;

            while (!done) {
                // ?
                try {
                    Thread.sleep(50);
                } catch (InterruptedException e) {
                    logger.error(e.getMessage(), e);
                }

                // ??, ??
                for (Map.Entry<NodeRunner, Future<Boolean>> entry : activeNodeRunners.entrySet()) {
                    NodeRunner nodeRunner = entry.getKey();
                    Future<Boolean> future = entry.getValue();

                    // ?
                    if (future.isDone()) {
                        // ?
                        done = true;

                        // , 
                        activeNodeRunners.remove(nodeRunner);

                        Boolean value = false;

                        Date now = new Date();

                        try {
                            value = future.get();
                        } catch (CancellationException e) {
                            logger.error("task has been cancel");

                            // ?
                            clean(true);
                            return FlowStatus.KILL;
                        } catch (InterruptedException e) {
                            logger.error(e.getMessage(), e);
                        } catch (ExecutionException e) {
                            logger.error(e.getMessage(), e);
                        }

                        // 
                        if (!value) {
                            // ?, ???
                            ExecutionNode executionNode = executionNodeMap.get(nodeRunner.getNodename());

                            // ,  2, ?? 2 
                            if (executionNode.getAttempt() < maxTryTimes) {
                                executionNode.incAttempt();

                                // ?
                                flowDao.updateExecutionNode(executionNode);

                                // ???
                                submitNodeRunner(dagGraph.getVertex(nodeRunner.getNodename()), executionNode,
                                        semaphore);
                            } else {
                                // ??
                                status = FlowStatus.FAILED;

                                executionNode.setEndTime(now);
                                executionNode.setStatus(status);

                                // ?
                                flowDao.updateExecutionNode(executionNode);

                                if (failurePolicyType == FailurePolicyType.END) {
                                    clean(true);
                                    return status;
                                }
                            }
                        } else { // ?
                            // ?
                            ExecutionNode executionNode = executionNodeMap.get(nodeRunner.getNodename());

                            executionNode.setEndTime(now);
                            executionNode.setStatus(FlowStatus.SUCCESS);

                            flowDao.updateExecutionNode(executionNode);

                            // ?, ?, ???
                            for (String nodeName : dagGraph.getPostNode(nodeRunner.getNodename())) {
                                if (!executionNodeMap.containsKey(nodeName)
                                        && isPreNodesAllSuccess(dagGraph.getPreNode(nodeName))) {
                                    // ?
                                    ExecutionNode newExecutionNode = insertExecutionNode(executionFlow, nodeName);

                                    // 
                                    executionNodeMap.put(nodeName, newExecutionNode);

                                    // ??
                                    submitNodeRunner(dagGraph.getVertex(nodeName), newExecutionNode, semaphore);
                                }
                            }
                        }

                        break;
                    }
                }
            }
        }

        return status;
    }

    /**
     * ?, , 
     */
    private ExecutionNode insertExecutionNode(ExecutionFlow executionFlow, String nodeName) {
        ExecutionNode executionNode = flowDao.queryExecutionNode(executionFlow.getId(), nodeName);

        if (executionNode != null) {
            return executionNode;
        }

        // ?
        executionNode = new ExecutionNode();

        Date now = new Date();

        executionNode.setExecId(executionFlow.getId());
        executionNode.setName(nodeName);
        executionNode.setAttempt(0);
        executionNode.setStartTime(now);
        executionNode.setStatus(FlowStatus.INIT);
        executionNode.setJobId(LoggerUtil.genJobId(JOB_PREFIX, executionFlow.getId(), nodeName));

        logger.info("insert execution node, id: {}, name: {}, start time: {}, status: {}, job id: {}",
                executionNode.getExecId(), nodeName, now, FlowStatus.INIT,
                LoggerUtil.genJobId(JOB_PREFIX, executionFlow.getId(), nodeName));

        // ?
        flowDao.insertExecutionNode(executionNode);

        return executionNode;
    }

    /**
     * ?? NodeRunner 
     */
    private void submitNodeRunner(FlowNode flowNode, ExecutionNode executionNode, Semaphore semaphore) {
        JobContext jobContext = new JobContext();

        jobContext.setExecutionFlow(executionFlow);
        jobContext.setExecutionNode(executionNode);
        jobContext.setFlowNode(flowNode);
        jobContext.setSemaphore(semaphore);

        //  node runner
        NodeRunner nodeRunner = new NodeRunner(jobContext);

        Future<Boolean> future = nodeExecutorService.submit(nodeRunner);

        activeNodeRunners.putIfAbsent(nodeRunner, future);
    }

    /**
     * s <p>
     *
     * @return 
     */
    private int calcNodeTimeout() {
        int usedTime = (int) ((System.currentTimeMillis() - startTime) / 1000);

        int remainTime = timeout - usedTime;

        if (remainTime <= 0) {
            throw new ExecTimeoutException("workflow execution time out");
        }

        return remainTime;
    }

    /**
     *  ExecutionFlow <p>
     */
    private void updateExecutionFlow(FlowStatus status) {
        // ,  shutdown , ?
        if (shutdown && status == FlowStatus.KILL && (executionFlow.getType() == ExecType.COMPLEMENT_DATA
                || executionFlow.getType() == ExecType.SCHEDULER)) {
            return;
        }

        Date now = new Date();

        // ??
        if (executionFlow.getStatus().typeIsNotFinished()) {
            executionFlow.setEndTime(now);
            executionFlow.setStatus(status);

            flowDao.updateExecutionFlow(executionFlow);
        }
    }

    /**
     *  ExecutionFlow <p> ?, ??, ?.
     */
    public void updateExecutionFlowToKillStatus(boolean updateKilled) {
        ExecutionFlow queryExecutionFlow = flowDao.queryExecutionFlow(executionFlow.getId());

        if (updateKilled || (queryExecutionFlow.getType() != ExecType.SCHEDULER
                && queryExecutionFlow.getType() != ExecType.COMPLEMENT_DATA)) {
            updateToKilled(queryExecutionFlow);
        }
    }

    /**
     *  kill ?
     *
     * @param executionFlow :  flow
     */
    private void updateToKilled(ExecutionFlow executionFlow) {
        if (executionFlow.getStatus().typeIsNotFinished()) {
            Date now = new Date();

            executionFlow.setEndTime(now);
            executionFlow.setStatus(FlowStatus.KILL);

            flowDao.updateExecutionFlow(executionFlow);
        }
    }

    /**
     * 
     */
    private void updateNodeToKilled(ExecutionNode executionNode) {
        Date now = new Date();

        executionNode.setStatus(FlowStatus.KILL);
        executionNode.setEndTime(now);

        flowDao.updateExecutionNode(executionNode);
    }

    /**
     * , ??
     */
    public void clean(boolean updateKilled) {
        // kill ?
        kill();

        // ?
        updateUnfinishNodeStatus(updateKilled);
    }

    /**
     * 
     */
    public void shutdown() {
        this.shutdown = true;
    }

    /**
     * ?, ?
     */
    private void updateUnfinishNodeStatus(boolean updateKilled) {
        Date now = new Date();

        // ???
        for (Map.Entry<NodeRunner, Future<Boolean>> entry : activeNodeRunners.entrySet()) {
            NodeRunner nodeRunner = entry.getKey();
            Future<Boolean> future = entry.getValue();

            // 
            if (!future.isDone()) {
                // ?, ??, ???
                if (updateKilled || (nodeRunner.getExecType() != ExecType.SCHEDULER
                        && nodeRunner.getExecType() != ExecType.COMPLEMENT_DATA)) {
                    ExecutionNode executionNode = nodeRunner.getExecutionNode();
                    updateNodeToKilled(executionNode);
                }
            } else {
                // ??
                Boolean value = false;

                try {
                    value = future.get();

                    if (value) {
                        ExecutionNode executionNode = nodeRunner.getExecutionNode();

                        executionNode.setStatus(FlowStatus.SUCCESS);
                        executionNode.setEndTime(now);

                        flowDao.updateExecutionNode(executionNode);
                    }
                } catch (InterruptedException e) {
                    logger.error(e.getMessage(), e);
                } catch (ExecutionException e) {
                    logger.error(e.getMessage(), e);
                } catch (CancellationException e) { // ?
                    logger.error("task has been cancel, name:{}", nodeRunner.getNodename());
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                } finally {
                    if (!value) {
                        if (updateKilled || (nodeRunner.getExecType() != ExecType.SCHEDULER
                                && nodeRunner.getExecType() != ExecType.COMPLEMENT_DATA)) {
                            ExecutionNode executionNode = nodeRunner.getExecutionNode();
                            updateNodeToKilled(executionNode);
                        }
                    }
                }
            }
        }
    }

    /**
     * kill 
     */
    private void kill() {
        synchronized (this) {
            if (activeNodeRunners.isEmpty()) {
                return;
            }

            logger.info("Kill has been called on exec id: {}, num: {}", executionFlow.getId(),
                    activeNodeRunners.size());

            // ?
            for (Map.Entry<NodeRunner, Future<Boolean>> entry : activeNodeRunners.entrySet()) {
                NodeRunner nodeRunner = entry.getKey();
                Future<Boolean> future = entry.getValue();

                if (!future.isDone()) {
                    //  kill ?
                    logger.info("kill exec, id: {}, node: {}", executionFlow.getId(), nodeRunner.getNodename());

                    // ?
                    nodeRunner.kill();

                    // , 
                    future.cancel(true);
                }
            }
        }
    }

    /**
     * flow ?? <p>
     */
    private void postProcess() {
        logger.info("Develop mode is: {}", BaseConfig.isDevelopMode());

        if (!BaseConfig.isDevelopMode()) {
            // ?, ?, ??
            String execLocalPath = BaseConfig.getFlowExecDir(executionFlow.getProjectId(),
                    executionFlow.getFlowId(), executionFlow.getId());

            try {
                FileUtils.deleteDirectory(new File(execLocalPath));
            } catch (IOException e) {
                logger.error(String.format("delete exec dir exception: %s", execLocalPath), e);
            }

            // ?, ? udf 
            hdfsCleanUp(BaseConfig.getJobHiveUdfJarPath(executionFlow.getId(), ExternalJobType.WORKFLOW));

            // ?, ? import/export 
            hdfsCleanUp(BaseConfig.getHdfsImpExpDir(executionFlow.getProjectId(), executionFlow.getId()));
        }

        EmailManager.sendMessageOfExecutionFlow(executionFlow);
    }

    /**
     * ? hdfs 
     */
    private void hdfsCleanUp(String path) {
        try {
            if (HdfsClient.getInstance().exists(path)) {
                HdfsClient.getInstance().delete(path, true);
            }
        } catch (Exception e) {
            logger.error(String.format("cleanup hdfs dir exception: %s", path), e);
        }
    }

    /**
     * ?? OK
     */
    private boolean isPreNodesAllSuccess(Set<String> preNodes) {
        // ??
        if (CollectionUtils.isEmpty(preNodes)) {
            return true;
        }

        for (String preNode : preNodes) {
            ExecutionNode preFinishedNode = executionNodeMap.get(preNode);

            // 
            if (preFinishedNode == null || preFinishedNode.getStatus().typeIsNotFinished()) {
                return false;
            }

            // , ?
            if (!preFinishedNode.getStatus().typeIsSuccess()/*
                                                            && failurePolicyType == FailurePolicyType.END*/) {
                return false;
            }
        }

        return true;
    }
}