org.apache.airavata.gfac.monitor.util.CommonUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.airavata.gfac.monitor.util.CommonUtils.java

Source

/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
*/
package org.apache.airavata.gfac.monitor.util;

import org.apache.airavata.common.exception.ApplicationSettingsException;
import org.apache.airavata.common.logger.AiravataLogger;
import org.apache.airavata.common.logger.AiravataLoggerFactory;
import org.apache.airavata.common.utils.AiravataZKUtils;
import org.apache.airavata.common.utils.Constants;
import org.apache.airavata.commons.gfac.type.HostDescription;
import org.apache.airavata.gfac.GFacException;
import org.apache.airavata.gfac.core.context.JobExecutionContext;
import org.apache.airavata.gfac.core.handler.GFacHandler;
import org.apache.airavata.gfac.core.handler.GFacHandlerConfig;
import org.apache.airavata.gfac.core.monitor.MonitorID;
import org.apache.airavata.gfac.monitor.HostMonitorData;
import org.apache.airavata.gfac.monitor.UserMonitorData;
import org.apache.airavata.gfac.monitor.exception.AiravataMonitorException;
import org.apache.airavata.schemas.gfac.GsisshHostType;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;

public class CommonUtils {
    private final static AiravataLogger logger = AiravataLoggerFactory.getLogger(CommonUtils.class);

    public static boolean isPBSHost(HostDescription host) {
        if ("pbs".equals(((GsisshHostType) host.getType()).getJobManager())
                || "".equals(((GsisshHostType) host.getType()).getJobManager())) {
            return true;
        } else {
            // default is pbs so we return true
            return false;
        }
    }

    public static boolean isSlurm(HostDescription host) {
        if ("slurm".equals(((GsisshHostType) host.getType()).getJobManager())) {
            return true;
        } else {
            // default is pbs so we return true
            return false;
        }
    }

    public static boolean isSGE(HostDescription host) {
        if ("sge".equals(((GsisshHostType) host.getType()).getJobManager())) {
            return true;
        } else {
            // default is pbs so we return true
            return false;
        }
    }

    public static String getChannelID(MonitorID monitorID) {
        return monitorID.getUserName() + "-" + monitorID.getHost().getType().getHostName();
    }

    public static String getRoutingKey(MonitorID monitorID) {
        return "*." + monitorID.getUserName() + "." + monitorID.getHost().getType().getHostAddress();
    }

    public static String getChannelID(String userName, String hostAddress) {
        return userName + "-" + hostAddress;
    }

    public static String getRoutingKey(String userName, String hostAddress) {
        return "*." + userName + "." + hostAddress;
    }

    public static void addMonitortoQueue(BlockingQueue<UserMonitorData> queue, MonitorID monitorID)
            throws AiravataMonitorException {
        synchronized (queue) {
            Iterator<UserMonitorData> iterator = queue.iterator();
            while (iterator.hasNext()) {
                UserMonitorData next = iterator.next();
                if (next.getUserName().equals(monitorID.getUserName())) {
                    // then this is the right place to update
                    List<HostMonitorData> monitorIDs = next.getHostMonitorData();
                    for (HostMonitorData host : monitorIDs) {
                        if (host.getHost().toXML().equals(monitorID.getHost().toXML())) {
                            // ok we found right place to add this monitorID
                            host.addMonitorIDForHost(monitorID);
                            logger.debugId(monitorID.getJobID(),
                                    "Added new job to the monitoring queue, experiment {}," + " task {}",
                                    monitorID.getExperimentID(), monitorID.getTaskID());
                            return;
                        }
                    }
                    // there is a userMonitor object for this user name but no Hosts for this host
                    // so we have to create new Hosts
                    HostMonitorData hostMonitorData = new HostMonitorData(monitorID.getHost());
                    hostMonitorData.addMonitorIDForHost(monitorID);
                    next.addHostMonitorData(hostMonitorData);
                    logger.debugId(monitorID.getJobID(),
                            "Added new job to the monitoring queue, experiment {}," + " task {}",
                            monitorID.getExperimentID(), monitorID.getTaskID());
                    return;
                }
            }
            HostMonitorData hostMonitorData = new HostMonitorData(monitorID.getHost());
            hostMonitorData.addMonitorIDForHost(monitorID);

            UserMonitorData userMonitorData = new UserMonitorData(monitorID.getUserName());
            userMonitorData.addHostMonitorData(hostMonitorData);
            try {
                queue.put(userMonitorData);
                logger.debugId(monitorID.getJobID(),
                        "Added new job to the monitoring queue, experiment {}," + " task {}",
                        monitorID.getExperimentID(), monitorID.getTaskID());
            } catch (InterruptedException e) {
                throw new AiravataMonitorException(e);
            }
        }
    }

    public static boolean isTheLastJobInQueue(BlockingQueue<MonitorID> queue, MonitorID monitorID) {
        Iterator<MonitorID> iterator = queue.iterator();
        while (iterator.hasNext()) {
            MonitorID next = iterator.next();
            if (monitorID.getUserName().equals(next.getUserName())
                    && CommonUtils.isEqual(monitorID.getHost(), next.getHost())) {
                return false;
            }
        }
        return true;
    }

    /**
     * This method doesn't have to be synchronized because it will be invoked by HPCPullMonitor which already synchronized
     * @param queue
     * @param monitorID
     * @throws AiravataMonitorException
     */
    public static void removeMonitorFromQueue(BlockingQueue<UserMonitorData> queue, MonitorID monitorID)
            throws AiravataMonitorException {
        Iterator<UserMonitorData> iterator = queue.iterator();
        while (iterator.hasNext()) {
            UserMonitorData next = iterator.next();
            if (next.getUserName().equals(monitorID.getUserName())) {
                // then this is the right place to update
                List<HostMonitorData> hostMonitorData = next.getHostMonitorData();
                Iterator<HostMonitorData> iterator1 = hostMonitorData.iterator();
                while (iterator1.hasNext()) {
                    HostMonitorData iHostMonitorID = iterator1.next();
                    if (iHostMonitorID.getHost().toXML().equals(monitorID.getHost().toXML())) {
                        Iterator<MonitorID> iterator2 = iHostMonitorID.getMonitorIDs().iterator();
                        while (iterator2.hasNext()) {
                            MonitorID iMonitorID = iterator2.next();
                            if (iMonitorID.getJobID().equals(monitorID.getJobID())
                                    || iMonitorID.getJobName().equals(monitorID.getJobName())) {
                                // OK we found the object, we cannot do list.remove(object) states of two objects
                                // could be different, thats why we check the jobID
                                iterator2.remove();
                                logger.infoId(monitorID.getJobID(),
                                        "Removed the jobId: {} JobName: {} from monitoring last " + "status:{}",
                                        monitorID.getJobID(), monitorID.getJobName(),
                                        monitorID.getStatus().toString());
                                if (iHostMonitorID.getMonitorIDs().size() == 0) {
                                    iterator1.remove();
                                    logger.debug("Removed host {} from monitoring queue",
                                            iHostMonitorID.getHost().getType().getHostAddress());
                                    if (hostMonitorData.size() == 0) {
                                        // no useful data so we have to remove the element from the queue
                                        queue.remove(next);
                                        logger.debug("Removed user {} from monitoring.", next.getUserName());
                                    }
                                }
                                return;
                            }
                        }
                    }
                }
            }
        }
        logger.info("Cannot find the given MonitorID in the queue with userName " + monitorID.getUserName()
                + "  and jobID " + monitorID.getJobID());
        logger.info("This might not be an error because someone else removed this job from the queue");
    }

    public static boolean isEqual(HostDescription host1, HostDescription host2) {
        if ((host1.getType() instanceof GsisshHostType) && (host2.getType() instanceof GsisshHostType)) {
            GsisshHostType hostType1 = (GsisshHostType) host1.getType();
            GsisshHostType hostType2 = (GsisshHostType) host2.getType();
            if (hostType1.getHostAddress().equals(hostType2.getHostAddress())
                    && hostType1.getJobManager().equals(hostType2.getJobManager())
                    && (hostType1.getPort() == hostType2.getPort())
                    && hostType1.getMonitorMode().equals(hostType2.getMonitorMode())) {
                return true;
            }
        } else {
            logger.error("This method is only impmlemented to handle Gsissh host types");
        }
        return false;
    }

    public static void invokeOutFlowHandlers(JobExecutionContext jobExecutionContext) throws GFacException {
        List<GFacHandlerConfig> handlers = jobExecutionContext.getGFacConfiguration().getOutHandlers();

        for (GFacHandlerConfig handlerClassName : handlers) {
            Class<? extends GFacHandler> handlerClass;
            GFacHandler handler;
            try {
                handlerClass = Class.forName(handlerClassName.getClassName().trim()).asSubclass(GFacHandler.class);
                handler = handlerClass.newInstance();
                handler.initProperties(handlerClassName.getProperties());
            } catch (ClassNotFoundException e) {
                logger.error(e.getMessage());
                throw new GFacException("Cannot load handler class " + handlerClassName, e);
            } catch (InstantiationException e) {
                logger.error(e.getMessage());
                throw new GFacException("Cannot instantiate handler class " + handlerClassName, e);
            } catch (IllegalAccessException e) {
                logger.error(e.getMessage());
                throw new GFacException("Cannot instantiate handler class " + handlerClassName, e);
            }
            try {
                handler.invoke(jobExecutionContext);
            } catch (Exception e) {
                // TODO: Better error reporting.
                throw new GFacException("Error Executing a OutFlow Handler", e);
            }
        }
    }

    /**
     *  Update job count for a given set of paths.
     * @param zk - zookeeper instance
     * @param changeCountMap - map of change job count with relevant path
     * @param isAdd - Should add or reduce existing job count by the given job count.
     */
    public static void updateZkWithJobCount(ZooKeeper zk, final Map<String, Integer> changeCountMap,
            boolean isAdd) {
        StringBuilder changeZNodePaths = new StringBuilder();
        try {
            if (zk == null || !zk.getState().isConnected()) {
                try {
                    final CountDownLatch countDownLatch = new CountDownLatch(1);
                    zk = new ZooKeeper(AiravataZKUtils.getZKhostPort(), 6000, new Watcher() {
                        @Override
                        public void process(WatchedEvent event) {
                            countDownLatch.countDown();
                        }
                    });
                    countDownLatch.await();
                } catch (ApplicationSettingsException e) {
                    logger.error("Error while reading zookeeper hostport string");
                } catch (IOException e) {
                    logger.error(
                            "Error while reconnect attempt to zookeeper where zookeeper connection loss state");
                }
            }

            for (String path : changeCountMap.keySet()) {
                if (isAdd) {
                    CommonUtils.checkAndCreateZNode(zk, path);
                }
                byte[] byteData = zk.getData(path, null, null);
                String nodeData;
                if (byteData == null) {
                    if (isAdd) {
                        zk.setData(path, String.valueOf(changeCountMap.get(path)).getBytes(), -1);
                    } else {
                        // This is not possible, but we handle in case there any data zookeeper communication failure
                        logger.warn("Couldn't reduce job count in " + path
                                + " as it returns null data. Hence reset the job count to 0");
                        zk.setData(path, "0".getBytes(), -1);
                    }
                } else {
                    nodeData = new String(byteData);
                    if (isAdd) {
                        zk.setData(path,
                                String.valueOf(changeCountMap.get(path) + Integer.parseInt(nodeData)).getBytes(),
                                -1);
                    } else {
                        int previousCount = Integer.parseInt(nodeData);
                        int removeCount = changeCountMap.get(path);
                        if (previousCount >= removeCount) {
                            zk.setData(path, String.valueOf(previousCount - removeCount).getBytes(), -1);
                        } else {
                            // This is not possible, do we need to reset the job count to 0 ?
                            logger.error("Requested remove job count is " + removeCount
                                    + " which is higher than the existing job count " + previousCount + " in  "
                                    + path + " path.");
                        }
                    }
                }
                changeZNodePaths.append(path).append(":");
            }

            // update stat node to trigger orchestrator watchers
            if (changeCountMap.size() > 0) {
                changeZNodePaths.deleteCharAt(changeZNodePaths.length() - 1);
                zk.setData("/" + Constants.STAT, changeZNodePaths.toString().getBytes(), -1);
            }
        } catch (KeeperException e) {
            logger.error("Error while writing job count to zookeeper", e);
        } catch (InterruptedException e) {
            logger.error("Error while writing job count to zookeeper", e);
        }

    }

    /**
     * Increase job count by one and update the zookeeper
     * @param monitorID - Job monitorId
     */
    public static void increaseZkJobCount(MonitorID monitorID) {
        Map<String, Integer> addMap = new HashMap<String, Integer>();
        addMap.put(CommonUtils.getJobCountUpdatePath(monitorID), 1);
        updateZkWithJobCount(monitorID.getJobExecutionContext().getZk(), addMap, true);
    }

    /**
     * Construct and return the path for a given MonitorID , eg: /stat/{username}/{resourceName}/job
     * @param monitorID - Job monitorId
     * @return
     */
    public static String getJobCountUpdatePath(MonitorID monitorID) {
        return new StringBuilder("/").append(Constants.STAT).append("/").append(monitorID.getUserName()).append("/")
                .append(monitorID.getHost().getType().getHostAddress()).append("/").append(Constants.JOB)
                .toString();
    }

    /**
     * Check whether znode is exist in given path if not create a new znode
     * @param zk - zookeeper instance
     * @param path - path to check znode
     * @throws KeeperException
     * @throws InterruptedException
     */
    private static void checkAndCreateZNode(ZooKeeper zk, String path)
            throws KeeperException, InterruptedException {
        if (zk.exists(path, null) == null) { // if znode doesn't exist
            if (path.lastIndexOf("/") > 1) { // recursively traverse to parent znode and check parent exist
                checkAndCreateZNode(zk, (path.substring(0, path.lastIndexOf("/"))));
            }
            zk.create(path, null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);// create a znode
        }
    }
}