uk.ac.sanger.cgp.wwdocker.daemon.PrimaryDaemon.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.sanger.cgp.wwdocker.daemon.PrimaryDaemon.java

Source

/**
 * Copyright (c) 2015 Genome Research Ltd.
 * 
 * Author: Cancer Genome Project cgpit@sanger.ac.uk
 * 
 * This file is part of WwDocker.
 * 
 * WwDocker is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Affero General Public License as published by the Free
 * Software Foundation; either version 3 of the License, or (at your option) any
 * later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
 * details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 * 
 * 1. The usage of a range of years within a copyright statement contained within
 * this distribution should be interpreted as being equivalent to a list of years
 * including the first and last year specified and all consecutive years between
 * them. For example, a copyright statement that reads 'Copyright (c) 2005, 2007-
 * 2009, 2011-2012' should be interpreted as being identical to a statement that
 * reads 'Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012' and a copyright
 * statement that reads "Copyright (c) 2005-2012' should be interpreted as being
 * identical to a statement that reads 'Copyright (c) 2005, 2006, 2007, 2008,
 * 2009, 2010, 2011, 2012'."
 */

package uk.ac.sanger.cgp.wwdocker.daemon;

import com.jcraft.jsch.Session;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeoutException;
import org.apache.commons.configuration.BaseConfiguration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import uk.ac.sanger.cgp.wwdocker.Config;
import uk.ac.sanger.cgp.wwdocker.actions.Local;
import uk.ac.sanger.cgp.wwdocker.actions.Remote;
import uk.ac.sanger.cgp.wwdocker.actions.Utils;
import uk.ac.sanger.cgp.wwdocker.beans.WorkerState;
import uk.ac.sanger.cgp.wwdocker.beans.WorkflowIni;
import uk.ac.sanger.cgp.wwdocker.enums.HostStatus;
import uk.ac.sanger.cgp.wwdocker.factories.WorkflowFactory;
import uk.ac.sanger.cgp.wwdocker.interfaces.Daemon;
import uk.ac.sanger.cgp.wwdocker.interfaces.Workflow;
import uk.ac.sanger.cgp.wwdocker.messages.Messaging;

/**
 *
 * @author kr2
 */
public class PrimaryDaemon implements Daemon {
    private static final Logger logger = LogManager.getLogger();

    private static PropertiesConfiguration config;
    private static Messaging messaging;
    private static final int RETRY_INIT = 300;

    public PrimaryDaemon(PropertiesConfiguration config, Messaging rmq) {
        PrimaryDaemon.config = config;
        PrimaryDaemon.messaging = rmq;
    }

    @Override
    public void run(String mode)
            throws IOException, InterruptedException, TimeoutException, ConfigurationException {
        // lots of values that will be used over and over again
        String qPrefix = config.getString("qPrefix");
        File thisJar = Utils.thisJarFile();
        File tmpConf = new File(System.getProperty("java.io.tmpdir") + "/" + qPrefix + ".remote.cfg");
        tmpConf.deleteOnExit(); // contains passwords so cleanup
        config.save(tmpConf.getAbsolutePath()); // done like this so includes are pulled in
        Local.chmod(tmpConf, "go-rwx");

        // setup
        Workflow workManager = new WorkflowFactory().getWorkflow(config);
        Map<String, String> envs = Config.getEnvs(config);

        /*
        * gets the list of worker hosts which CAN change during runtime
        * There is a way to get the config to load when changed, however it only
        * matters when we loop round so we may as well just rebuild the object
        */
        Map<String, String> hosts = new LinkedHashMap<>();
        hostSet(config, hosts);
        cleanHostQueues(config, hosts);

        if (mode != null && mode.equalsIgnoreCase("KILLALL")) {
            killAll(config, hosts, thisJar, tmpConf);
        }

        hosts.clear(); // needs to be clean before looping starts

        // this holds md5 of this JAR and the config (which lists the workflow code to use)
        WorkerState provState = new WorkerState(thisJar, tmpConf);

        int nextRetry = RETRY_INIT;

        while (true) {
            addWorkToPend(workManager, config);

            // this is a reload as this can change during execution
            hostSet(config, hosts);

            for (Map.Entry<String, String> e : hosts.entrySet()) {

                String host = e.getKey();
                if (e.getValue().equals("KILL")) {
                    provState.setChangeStatusTo(HostStatus.KILL);
                    messaging.sendMessage(qPrefix.concat(".").concat(host), Utils.objectToJson(provState));
                    hosts.replace(host, "DELETE");
                    continue;
                }

                provState.setChangeStatusTo(HostStatus.CHECKIN);
                provState.setReplyToQueue(qPrefix.concat(".ACTIVE"));

                if (e.getValue().equals("TO_PROVISION")
                        || ((e.getValue().equals("CURRENT") || e.getValue().equals("RETRY")) && nextRetry == 0)) {
                    if (!messaging.queryGaveResponse(qPrefix.concat(".").concat(host), provState.getReplyToQueue(),
                            Utils.objectToJson(provState), 15000)) {
                        // no response from host... but is it still up
                        // see if docker is running before reprovision
                        Session hostSession = Remote.getSession(config, host);
                        boolean dockerRunning = Remote.dockerRunning(hostSession, hostSession.getUserName());
                        boolean workerRunning = Remote.workerRunning(hostSession, hostSession.getUserName());
                        Remote.closeSsh(hostSession);
                        if (dockerRunning || workerRunning) {
                            logger.trace("Retry host later: " + host);
                            hosts.replace(host, "RETRY");
                            continue;
                        }

                        logger.info("No response from host '".concat(host).concat("' (re)provisioning..."));
                        if (!workManager.provisionHost(host, PrimaryDaemon.config, thisJar, tmpConf, mode, envs)) {
                            hosts.replace(host, "BROKEN");
                            messaging.removeFromStateQueue(qPrefix.concat(".").concat("BROKEN"), host); // just incase it's already there
                            messaging.sendMessage(qPrefix.concat(".").concat("BROKEN"), "Failed to provision",
                                    host);
                            break;
                        }
                    }
                    if (!e.getValue().equals("CURRENT")) {
                        hosts.replace(host, "CURRENT");
                        break; // so we start some work on this host before provisioning more
                    }
                }
            }
            // we need a little sleep here or we'll kill the queues
            Thread.sleep(1000);
            if (nextRetry == 0) {
                nextRetry = RETRY_INIT;
            } else {
                nextRetry--;
            }
        }
    }

    private void hostSet(BaseConfiguration config, Map<String, String> hosts) {
        // first remove the killed off hosts
        List toRemove = new ArrayList();
        for (Map.Entry<String, String> e : hosts.entrySet()) {
            if (e.getValue().equals("DELETE")) {
                toRemove.add(e.getKey());
            }
        }
        hosts.keySet().removeAll(toRemove);

        // add the new hosts
        BaseConfiguration workerConf = Config.loadWorkers(config.getString("workerCfg"));
        String[] rawHosts = workerConf.getStringArray("hosts");
        if (rawHosts.length == 1 && rawHosts[0].equals(new String())) {
            rawHosts = new String[0];
        }
        Set<String> tmp = new LinkedHashSet<>();
        for (String h : rawHosts) {
            if (!hosts.containsKey(h)) {
                hosts.put(h, "TO_PROVISION");
            }
            tmp.add(h);
        }

        // identify which hosts need to be killed
        for (Map.Entry<String, String> e : hosts.entrySet()) {
            if (!tmp.contains(e.getKey())) {
                hosts.replace(e.getKey(), "KILL");
            }
        }
    }

    private void addWorkToPend(Workflow workManager, BaseConfiguration config)
            throws IOException, InterruptedException, TimeoutException {
        // send all work into the wwd_PEND queue, data can be added during execution, this ensures duplicates don't occur
        List<File> iniFiles = Utils.getWorkInis(config);
        if (iniFiles.isEmpty()) {
            return;
        }

        // get all of the existing iniFiles so we can generate a uniq list
        String qPrefix = config.getString("qPrefix");
        List<String> existing = messaging.getMessageStrings(qPrefix.concat(".PEND"), 500);
        Map<String, WorkflowIni> allInis = new HashMap();
        for (String m : existing) {
            WorkflowIni iniFile = (WorkflowIni) Utils.jsonToObject(m, WorkflowIni.class);
            allInis.put(iniFile.getIniFile().getName(), iniFile);
        }
        for (File iniFile : iniFiles) {
            if (!allInis.containsKey(iniFile.getName())) {
                WorkflowIni newIni = new WorkflowIni(iniFile);
                newIni.setLogSearchCmd(workManager.getFindLogsCmds());
                allInis.put(iniFile.getName(), newIni);
            }
        }

        Iterator itr = allInis.values().iterator();
        while (itr.hasNext()) {
            messaging.sendMessage(qPrefix.concat(".PEND"), (WorkflowIni) itr.next());
        }

        workManager.iniUpdate(iniFiles, config, HostStatus.PEND);
    }

    private void killAll(BaseConfiguration config, Map<String, String> hosts, File thisJar, File thisConf)
            throws IOException, InterruptedException, TimeoutException {
        WorkerState killState = new WorkerState(thisJar, thisConf);
        killState.setChangeStatusTo(HostStatus.KILL);
        String killJson = Utils.objectToJson(killState);
        String qPrefix = config.getString("qPrefix");
        for (Map.Entry<String, String> e : hosts.entrySet()) {
            messaging.sendMessage(qPrefix.concat(".").concat(e.getKey()), killJson);
        }
        logger.fatal("All hosts shutting down as requested... exiting");
        System.exit(0);
    }

    private void cleanHostQueues(BaseConfiguration config, Map<String, String> hosts)
            throws IOException, InterruptedException {
        String qPrefix = config.getString("qPrefix");
        for (Map.Entry<String, String> e : hosts.entrySet()) {
            messaging.getMessageStrings(qPrefix.concat(".").concat(e.getKey()), 50);
        }
    }

    //  private static final ExecutorService pushExecutor = Executors.newSingleThreadExecutor();
    //  private static FutureTask<Integer> pushTask = null;
    //  private static FutureTask<Integer> pullTask = null;
    //  private static PushWork pushThread = null;
    //  private static PullWork pullThread = null;
    //  private static WorkerState pushToWorker = null;
    //  private static WorkerState pullFromWorker = null;
    //
    //  private void startPush(Workflow workManager, WorkerState wsIn, Map<String,String> envs) throws IOException, InterruptedException {
    //    if(pushTask != null) {
    //      return;
    //    }
    //
    //    pushToWorker = wsIn;
    //    String host = pushToWorker.getResource().getHostName();
    //
    //    logger.debug("Should be starting to look for data");
    //
    //    // okay get some work if it exists
    //    String message = messaging.getMessageString("wwd_PEND", 50);
    //
    //    if(message == null) {
    //      return;
    //    }
    //
    //
    //
    //    File iniFile = (File) Utils.jsonToObject(message, File.class); // this is the original path before loading
    //    String iniFileName = workManager.iniPathByState(config, iniFile.getAbsolutePath(), HostStatus.PEND);
    //    iniFile = new File(iniFileName);
    //
    //    logger.debug("FOUND:" + iniFile.getAbsolutePath());
    //
    //
    //    // tell worker to change state
    //    pushToWorker.setChangeStatusTo(HostStatus.RECEIVE);
    //    pushToWorker.setWorkflowIni(iniFile);
    //    messaging.sendMessage("wwd_"+host, Utils.objectToJson(pushToWorker));
    //    messaging.getMessageString("wwd-active", -1); // clean the response
    //
    //    pushThread = new PushWork(iniFile.getName(), config, host, workManager.filesToPush(iniFile), envs);
    //    pushTask = new FutureTask<Integer>(pushThread);
    //    pushExecutor.execute(pushTask);
    //  }
    //
    //  private void checkPush() throws IOException, InterruptedException {
    //    if(pushTask == null) {
    //      logger.trace("checkPush: nothing");
    //      return;
    //    }
    //    logger.trace("checkPush: something");
    //    if(pushTask.isDone()) {
    //      logger.trace("checkPush: done");
    //      String sentTo = pushThread.getHost();
    //      int pushExitCode;
    //      try {
    //        pushExitCode = pushTask.get();
    //      } catch(ExecutionException e) {
    //        pushExitCode = 1;
    //        pushToWorker.setError(e.getMessage());
    //      }
    //      if(pushExitCode == 0) {
    //        pushToWorker.setChangeStatusTo(HostStatus.RUNNING);
    //        messaging.sendMessage("wwd_"+sentTo, Utils.objectToJson(pushToWorker));
    //      } else {
    //        pushToWorker.setChangeStatusTo(HostStatus.ERROR);
    //        messaging.sendMessage("wwd_"+sentTo, Utils.objectToJson(pushToWorker));
    //      }
    //      messaging.getMessageString("wwd-active", -1); // clean the response
    //      pushTask = null;
    //      pushThread = null;
    //      pushToWorker = null;
    //    } else {
    //      logger.trace("checkPush: NOT done");
    //    }
    //  }
}