Java tutorial
/** * Copyright (c) 2015 Genome Research Ltd. * * Author: Cancer Genome Project cgpit@sanger.ac.uk * * This file is part of WwDocker. * * WwDocker is free software: you can redistribute it and/or modify it under * the terms of the GNU Affero General Public License as published by the Free * Software Foundation; either version 3 of the License, or (at your option) any * later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more * details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * 1. The usage of a range of years within a copyright statement contained within * this distribution should be interpreted as being equivalent to a list of years * including the first and last year specified and all consecutive years between * them. For example, a copyright statement that reads 'Copyright (c) 2005, 2007- * 2009, 2011-2012' should be interpreted as being identical to a statement that * reads 'Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012' and a copyright * statement that reads "Copyright (c) 2005-2012' should be interpreted as being * identical to a statement that reads 'Copyright (c) 2005, 2006, 2007, 2008, * 2009, 2010, 2011, 2012'." */ package uk.ac.sanger.cgp.wwdocker.daemon; import com.jcraft.jsch.Session; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeoutException; import org.apache.commons.configuration.BaseConfiguration; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import uk.ac.sanger.cgp.wwdocker.Config; import uk.ac.sanger.cgp.wwdocker.actions.Local; import uk.ac.sanger.cgp.wwdocker.actions.Remote; import uk.ac.sanger.cgp.wwdocker.actions.Utils; import uk.ac.sanger.cgp.wwdocker.beans.WorkerState; import uk.ac.sanger.cgp.wwdocker.beans.WorkflowIni; import uk.ac.sanger.cgp.wwdocker.enums.HostStatus; import uk.ac.sanger.cgp.wwdocker.factories.WorkflowFactory; import uk.ac.sanger.cgp.wwdocker.interfaces.Daemon; import uk.ac.sanger.cgp.wwdocker.interfaces.Workflow; import uk.ac.sanger.cgp.wwdocker.messages.Messaging; /** * * @author kr2 */ public class PrimaryDaemon implements Daemon { private static final Logger logger = LogManager.getLogger(); private static PropertiesConfiguration config; private static Messaging messaging; private static final int RETRY_INIT = 300; public PrimaryDaemon(PropertiesConfiguration config, Messaging rmq) { PrimaryDaemon.config = config; PrimaryDaemon.messaging = rmq; } @Override public void run(String mode) throws IOException, InterruptedException, TimeoutException, ConfigurationException { // lots of values that will be used over and over again String qPrefix = config.getString("qPrefix"); File thisJar = Utils.thisJarFile(); File tmpConf = new File(System.getProperty("java.io.tmpdir") + "/" + qPrefix + ".remote.cfg"); tmpConf.deleteOnExit(); // contains passwords so cleanup config.save(tmpConf.getAbsolutePath()); // done like this so includes are pulled in Local.chmod(tmpConf, "go-rwx"); // setup Workflow workManager = new WorkflowFactory().getWorkflow(config); Map<String, String> envs = Config.getEnvs(config); /* * gets the list of worker hosts which CAN change during runtime * There is a way to get the config to load when changed, however it only * matters when we loop round so we may as well just rebuild the object */ Map<String, String> hosts = new LinkedHashMap<>(); hostSet(config, hosts); cleanHostQueues(config, hosts); if (mode != null && mode.equalsIgnoreCase("KILLALL")) { killAll(config, hosts, thisJar, tmpConf); } hosts.clear(); // needs to be clean before looping starts // this holds md5 of this JAR and the config (which lists the workflow code to use) WorkerState provState = new WorkerState(thisJar, tmpConf); int nextRetry = RETRY_INIT; while (true) { addWorkToPend(workManager, config); // this is a reload as this can change during execution hostSet(config, hosts); for (Map.Entry<String, String> e : hosts.entrySet()) { String host = e.getKey(); if (e.getValue().equals("KILL")) { provState.setChangeStatusTo(HostStatus.KILL); messaging.sendMessage(qPrefix.concat(".").concat(host), Utils.objectToJson(provState)); hosts.replace(host, "DELETE"); continue; } provState.setChangeStatusTo(HostStatus.CHECKIN); provState.setReplyToQueue(qPrefix.concat(".ACTIVE")); if (e.getValue().equals("TO_PROVISION") || ((e.getValue().equals("CURRENT") || e.getValue().equals("RETRY")) && nextRetry == 0)) { if (!messaging.queryGaveResponse(qPrefix.concat(".").concat(host), provState.getReplyToQueue(), Utils.objectToJson(provState), 15000)) { // no response from host... but is it still up // see if docker is running before reprovision Session hostSession = Remote.getSession(config, host); boolean dockerRunning = Remote.dockerRunning(hostSession, hostSession.getUserName()); boolean workerRunning = Remote.workerRunning(hostSession, hostSession.getUserName()); Remote.closeSsh(hostSession); if (dockerRunning || workerRunning) { logger.trace("Retry host later: " + host); hosts.replace(host, "RETRY"); continue; } logger.info("No response from host '".concat(host).concat("' (re)provisioning...")); if (!workManager.provisionHost(host, PrimaryDaemon.config, thisJar, tmpConf, mode, envs)) { hosts.replace(host, "BROKEN"); messaging.removeFromStateQueue(qPrefix.concat(".").concat("BROKEN"), host); // just incase it's already there messaging.sendMessage(qPrefix.concat(".").concat("BROKEN"), "Failed to provision", host); break; } } if (!e.getValue().equals("CURRENT")) { hosts.replace(host, "CURRENT"); break; // so we start some work on this host before provisioning more } } } // we need a little sleep here or we'll kill the queues Thread.sleep(1000); if (nextRetry == 0) { nextRetry = RETRY_INIT; } else { nextRetry--; } } } private void hostSet(BaseConfiguration config, Map<String, String> hosts) { // first remove the killed off hosts List toRemove = new ArrayList(); for (Map.Entry<String, String> e : hosts.entrySet()) { if (e.getValue().equals("DELETE")) { toRemove.add(e.getKey()); } } hosts.keySet().removeAll(toRemove); // add the new hosts BaseConfiguration workerConf = Config.loadWorkers(config.getString("workerCfg")); String[] rawHosts = workerConf.getStringArray("hosts"); if (rawHosts.length == 1 && rawHosts[0].equals(new String())) { rawHosts = new String[0]; } Set<String> tmp = new LinkedHashSet<>(); for (String h : rawHosts) { if (!hosts.containsKey(h)) { hosts.put(h, "TO_PROVISION"); } tmp.add(h); } // identify which hosts need to be killed for (Map.Entry<String, String> e : hosts.entrySet()) { if (!tmp.contains(e.getKey())) { hosts.replace(e.getKey(), "KILL"); } } } private void addWorkToPend(Workflow workManager, BaseConfiguration config) throws IOException, InterruptedException, TimeoutException { // send all work into the wwd_PEND queue, data can be added during execution, this ensures duplicates don't occur List<File> iniFiles = Utils.getWorkInis(config); if (iniFiles.isEmpty()) { return; } // get all of the existing iniFiles so we can generate a uniq list String qPrefix = config.getString("qPrefix"); List<String> existing = messaging.getMessageStrings(qPrefix.concat(".PEND"), 500); Map<String, WorkflowIni> allInis = new HashMap(); for (String m : existing) { WorkflowIni iniFile = (WorkflowIni) Utils.jsonToObject(m, WorkflowIni.class); allInis.put(iniFile.getIniFile().getName(), iniFile); } for (File iniFile : iniFiles) { if (!allInis.containsKey(iniFile.getName())) { WorkflowIni newIni = new WorkflowIni(iniFile); newIni.setLogSearchCmd(workManager.getFindLogsCmds()); allInis.put(iniFile.getName(), newIni); } } Iterator itr = allInis.values().iterator(); while (itr.hasNext()) { messaging.sendMessage(qPrefix.concat(".PEND"), (WorkflowIni) itr.next()); } workManager.iniUpdate(iniFiles, config, HostStatus.PEND); } private void killAll(BaseConfiguration config, Map<String, String> hosts, File thisJar, File thisConf) throws IOException, InterruptedException, TimeoutException { WorkerState killState = new WorkerState(thisJar, thisConf); killState.setChangeStatusTo(HostStatus.KILL); String killJson = Utils.objectToJson(killState); String qPrefix = config.getString("qPrefix"); for (Map.Entry<String, String> e : hosts.entrySet()) { messaging.sendMessage(qPrefix.concat(".").concat(e.getKey()), killJson); } logger.fatal("All hosts shutting down as requested... exiting"); System.exit(0); } private void cleanHostQueues(BaseConfiguration config, Map<String, String> hosts) throws IOException, InterruptedException { String qPrefix = config.getString("qPrefix"); for (Map.Entry<String, String> e : hosts.entrySet()) { messaging.getMessageStrings(qPrefix.concat(".").concat(e.getKey()), 50); } } // private static final ExecutorService pushExecutor = Executors.newSingleThreadExecutor(); // private static FutureTask<Integer> pushTask = null; // private static FutureTask<Integer> pullTask = null; // private static PushWork pushThread = null; // private static PullWork pullThread = null; // private static WorkerState pushToWorker = null; // private static WorkerState pullFromWorker = null; // // private void startPush(Workflow workManager, WorkerState wsIn, Map<String,String> envs) throws IOException, InterruptedException { // if(pushTask != null) { // return; // } // // pushToWorker = wsIn; // String host = pushToWorker.getResource().getHostName(); // // logger.debug("Should be starting to look for data"); // // // okay get some work if it exists // String message = messaging.getMessageString("wwd_PEND", 50); // // if(message == null) { // return; // } // // // // File iniFile = (File) Utils.jsonToObject(message, File.class); // this is the original path before loading // String iniFileName = workManager.iniPathByState(config, iniFile.getAbsolutePath(), HostStatus.PEND); // iniFile = new File(iniFileName); // // logger.debug("FOUND:" + iniFile.getAbsolutePath()); // // // // tell worker to change state // pushToWorker.setChangeStatusTo(HostStatus.RECEIVE); // pushToWorker.setWorkflowIni(iniFile); // messaging.sendMessage("wwd_"+host, Utils.objectToJson(pushToWorker)); // messaging.getMessageString("wwd-active", -1); // clean the response // // pushThread = new PushWork(iniFile.getName(), config, host, workManager.filesToPush(iniFile), envs); // pushTask = new FutureTask<Integer>(pushThread); // pushExecutor.execute(pushTask); // } // // private void checkPush() throws IOException, InterruptedException { // if(pushTask == null) { // logger.trace("checkPush: nothing"); // return; // } // logger.trace("checkPush: something"); // if(pushTask.isDone()) { // logger.trace("checkPush: done"); // String sentTo = pushThread.getHost(); // int pushExitCode; // try { // pushExitCode = pushTask.get(); // } catch(ExecutionException e) { // pushExitCode = 1; // pushToWorker.setError(e.getMessage()); // } // if(pushExitCode == 0) { // pushToWorker.setChangeStatusTo(HostStatus.RUNNING); // messaging.sendMessage("wwd_"+sentTo, Utils.objectToJson(pushToWorker)); // } else { // pushToWorker.setChangeStatusTo(HostStatus.ERROR); // messaging.sendMessage("wwd_"+sentTo, Utils.objectToJson(pushToWorker)); // } // messaging.getMessageString("wwd-active", -1); // clean the response // pushTask = null; // pushThread = null; // pushToWorker = null; // } else { // logger.trace("checkPush: NOT done"); // } // } }