Java tutorial
/* * Copyright (C) 2011 SeqWare * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package net.sourceforge.seqware.pipeline.plugins; import io.seqware.Engines; import java.io.File; import java.io.IOException; import java.io.StringBufferInputStream; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sourceforge.seqware.common.metadata.Metadata; import net.sourceforge.seqware.common.model.WorkflowRun; import net.sourceforge.seqware.common.model.WorkflowRunStatus; import net.sourceforge.seqware.common.module.ReturnValue; import net.sourceforge.seqware.common.util.Log; import net.sourceforge.seqware.common.util.filetools.FileTools; import net.sourceforge.seqware.common.util.filetools.FileTools.LocalhostPair; import net.sourceforge.seqware.common.util.workflowtools.WorkflowTools; import net.sourceforge.seqware.pipeline.plugin.Plugin; import net.sourceforge.seqware.pipeline.plugin.PluginInterface; import net.sourceforge.seqware.pipeline.tools.RunLock; import net.sourceforge.seqware.pipeline.workflowV2.engine.oozie.object.OozieJob; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.oozie.client.OozieClient; import org.apache.oozie.client.WorkflowAction; import org.apache.oozie.client.WorkflowJob; import org.apache.xerces.util.XMLChar; import org.openide.util.lookup.ServiceProvider; /** * This plugin lets you monitor the status of running workflows and updates the * metadata object with their status. * * @author boconnor * @version $Id: $Id */ @ServiceProvider(service = PluginInterface.class) public class WorkflowStatusChecker extends Plugin { public static final String WORKFLOW_RUN_ACCESSION = "workflow-run-accession"; private ReturnValue classReturnValue = new ReturnValue(); // NOTE: this is shared with WorkflowLauncher so only one can run at a time public static final String appID = "net.sourceforge.seqware.pipeline.plugins.WorkflowStatusCheckerOrLauncher"; private static final String metadata_sync = "synch_for_metadata"; // variables for use in the app private String hostname = null; private String username = null; /** * <p> * Constructor for WorkflowStatusChecker. * </p> */ public WorkflowStatusChecker() { super(); parser.acceptsAll(Arrays.asList("status-cmd", "s"), "Optional: the Pegasus status command, if you specify this option the command will be run, potentially displaying the summarized/parsed errors, but the database will not be updated.") .withRequiredArg(); parser.acceptsAll(Arrays.asList(WORKFLOW_RUN_ACCESSION, "wra"), "Optional: this will cause the program to only check the status of workflow run(s). For multiple runs, comma-separate with no spaces") .withRequiredArg().withValuesSeparatedBy(',').ofType(Integer.class); parser.acceptsAll(Arrays.asList("workflow-accession", "wa"), "Optional: this will cause the program to only check the status of workflow runs that are this type of workflow.") .withRequiredArg(); parser.acceptsAll(Arrays.asList("force-host", "fh"), "Optional: if specified, workflow runs scheduled to this specified host will be checked even if this is not the current host (a dangerous option).") .withRequiredArg(); parser.acceptsAll(Arrays.asList("check-failed", "cf"), "Optional: if specified, workflow runs that have previously failed will be re-checked."); parser.acceptsAll(Arrays.asList("threads-in-thread-pool", "tp"), "Optional: this will determine the number of threads to run with. Default: 1").withRequiredArg() .ofType(Integer.class); classReturnValue.setExitStatus(ReturnValue.SUCCESS); } /** * {@inheritDoc} * @return */ @Override public ReturnValue init() { RunLock.acquire(); // bail out if failed if (classReturnValue.getExitStatus() != ReturnValue.SUCCESS) { return (classReturnValue); } LocalhostPair localhost = FileTools.getLocalhost(options); // returnValue can be null if we use forcehost if (localhost.returnValue != null && localhost.returnValue.getExitStatus() != ReturnValue.SUCCESS) { return (localhost.returnValue); } else { this.hostname = localhost.hostname; } // figure out the username if (this.config.get("SW_REST_USER") == null || "".equals(this.config.get("SW_REST_USER"))) { Log.error("You must define SW_REST_USER in your SeqWare settings file!"); classReturnValue.setExitStatus(ReturnValue.FAILURE); } this.username = this.config.get("SW_REST_USER"); classReturnValue.setExitStatus(ReturnValue.SUCCESS); return classReturnValue; } /** * {@inheritDoc} * @return */ @Override public ReturnValue do_test() { return classReturnValue; } /** * {@inheritDoc} * @return */ @Override public ReturnValue do_run() { ReturnValue ret = new ReturnValue(ReturnValue.SUCCESS); // lets you just check a given workflow without metadata if (options.has("status-cmd") && options.valueOf("status-cmd") != null) { ret = checkWorkflow((String) options.valueOf("status-cmd")); } else { // this checks workflows and writes their status back to the DB Set<WorkflowRun> runningWorkflows = new HashSet<>(); if (options.has(WORKFLOW_RUN_ACCESSION)) { List<Integer> swids = (List<Integer>) options.valuesOf(WORKFLOW_RUN_ACCESSION); for (Integer swid : swids) { WorkflowRun wr = this.metadata.getWorkflowRun(swid); runningWorkflows.add(wr); } } else { runningWorkflows.addAll(this.metadata.getWorkflowRunsByStatus(WorkflowRunStatus.running)); runningWorkflows.addAll(this.metadata.getWorkflowRunsByStatus(WorkflowRunStatus.pending)); runningWorkflows.addAll(this.metadata.getWorkflowRunsByStatus(WorkflowRunStatus.submitted_cancel)); runningWorkflows.addAll(this.metadata.getWorkflowRunsByStatus(WorkflowRunStatus.submitted_retry)); if (options.has("check-failed")) { runningWorkflows.addAll(this.metadata.getWorkflowRunsByStatus(WorkflowRunStatus.failed)); } } // setup thread pool ExecutorService pool; // Executors.newFixedThreadPool(4); if (options.has("threads-in-thread-pool")) { int threads = (Integer) options.valueOf("threads-in-thread-pool"); if (threads <= 0) { Log.fatal("Inappropriate number of threads selected"); ret = new ReturnValue(ReturnValue.FAILURE); return ret; } pool = Executors.newFixedThreadPool(threads); } else { pool = Executors.newSingleThreadExecutor(); } List<Future<?>> futures = new ArrayList<>(runningWorkflows.size()); // loop over running workflows and check their status for (WorkflowRun wr : runningWorkflows) { futures.add(pool.submit(new CheckerThread(wr))); } for (Future<?> future : futures) { try { future.get(); } catch (InterruptedException ex) { Log.fatal(ex); } catch (ExecutionException ex) { Log.fatal(ex); } } pool.shutdown(); } return ret; } /** * {@inheritDoc} * @return */ @Override public ReturnValue clean_up() { return classReturnValue; } /** * {@inheritDoc} * @return */ @Override public String get_description() { return "This plugin lets you monitor the status of running workflows and updates " + "the metadata object with their status. Keep in mind a few things: 1) if the status command is specified no data " + "will be saved to the DB, this tool is just useful for gathering error reports, 2) status commands that are malformed " + "or whose status directory is not present on the filesystem will be skipped and an error noted, 3) by default every running or unknown " + "workflow_run in the database will be checked if they are owned by the username in your .seqware/settings file " + "and the hostname is the same as 'hostname --long', and 4) you can force the checking of workflows with a particular " + "host value but be careful with that."; } /** * This takes like 15 seconds per check! * * @param statusCmd * @return */ private ReturnValue checkWorkflow(String statusCmd) { Log.info("Checking the status using " + statusCmd); WorkflowTools workflowTools = new WorkflowTools(); String statusDir = findStatusDir(statusCmd); ReturnValue ret = workflowTools.watchWorkflow(statusCmd, statusDir, 1); Log.info("OUT: " + ret.getStdout()); Log.info("ERR: " + ret.getStderr()); Log.info("STATUS: " + ret.getExitStatus()); return (ret); } private String findStatusDir(String statusCmd) { String statusDir = null; if (statusCmd == null || "".equals(statusCmd)) { return (statusDir); } Pattern p = Pattern.compile("pegasus-status -l (\\S+)"); Matcher m = p.matcher(statusCmd); if (m.find()) { statusDir = m.group(1); } return (statusDir); } protected Metadata getMetadata() { return metadata; } private final class CheckerThread implements Runnable { private final WorkflowRun wr; protected CheckerThread(WorkflowRun wr) { this.wr = wr; } @Override public void run() { Log.info("ownerUserName: " + wr.getOwnerUserName()); Log.info("workflowAccession: " + wr.getWorkflowAccession()); Log.info("workflowRunID: " + wr.getWorkflowRunId()); // check that this workflow run matches the specified workflow if provided if (options.has("workflow-accession") && options.valueOf("workflow-accession") != null && !((String) options.valueOf("workflow-accession")) .equals(wr.getWorkflowAccession().toString())) { return; } // ignore host matching when run accession is specified if (options.has(WORKFLOW_RUN_ACCESSION) == false) { // check the host is either overridden or this is the same host the // workflow was launched from if (options.has("force-host") && options.valueOf("force-host") != null && !((String) options.valueOf("force-host")).equals(wr.getHost())) { return; } else if (!options.has("force-host") && WorkflowStatusChecker.this.hostname != null && !WorkflowStatusChecker.this.hostname.equals(wr.getHost())) { return; } } // check the rest API username from SeqWare settings is the same username // in the DB if (WorkflowStatusChecker.this.username == null || wr.getOwnerUserName() == null || !WorkflowStatusChecker.this.username.equals(wr.getOwnerUserName())) { return; } if (Engines.isOozie(wr.getWorkflowEngine())) { checkOozie(); } else { checkPegasus(); } } private void checkOozie() { try { OozieClient oc = new OozieClient((String) config.get("OOZIE_URL")); String jobId = wr.getStatusCmd(); if (jobId == null) { handlePreLaunch(); return; } WorkflowJob wfJob = oc.getJobInfo(jobId); if (wfJob == null) { throw new IllegalStateException("No Oozie job found for WorkflowRun: swid=" + wr.getSwAccession() + " oozie-id=" + jobId); } WorkflowRunStatus curSqwStatus = wr.getStatus(); WorkflowRunStatus nextSqwStatus; if (curSqwStatus == null) { nextSqwStatus = convertOozieToSeqware(wfJob.getStatus()); } else { switch (curSqwStatus) { case submitted_cancel: { switch (wfJob.getStatus()) { case PREP: case RUNNING: case SUSPENDED: // Note: here we treat SUSPENDED as running, so that it can be killed oc.kill(jobId); nextSqwStatus = WorkflowRunStatus.cancelled; break; default: // Let others propagate as normal nextSqwStatus = convertOozieToSeqware(wfJob.getStatus()); } break; } case submitted_retry: { switch (wfJob.getStatus()) { case SUSPENDED: oc.resume(jobId); nextSqwStatus = WorkflowRunStatus.pending; break; case FAILED: case KILLED: Properties conf = getCurrentConf(wfJob); conf.setProperty(OozieClient.RERUN_FAIL_NODES, "true"); oc.reRun(jobId, conf); nextSqwStatus = WorkflowRunStatus.pending; break; default: // Let others propagate as normal nextSqwStatus = convertOozieToSeqware(wfJob.getStatus()); } break; } default: nextSqwStatus = convertOozieToSeqware(wfJob.getStatus()); } } String err; String out; if (wr.getWorkflowEngine().equals("oozie-sge")) { File dir = OozieJob.scriptsDir(wr.getCurrentWorkingDir()); if (dir.exists()) { Set<String> extIds = sgeIds(wfJob); out = sgeConcat(sgeFiles(SGE_OUT_FILE, dir, extIds)); err = sgeConcat(sgeFiles(SGE_ERR_FILE, dir, extIds)); } else { // working dir has been deleted, do not wipe-out the stored output out = wr.getStdOut(); err = wr.getStdErr(); } } else { StringBuilder sb = new StringBuilder(); for (WorkflowAction action : wfJob.getActions()) { if (action.getErrorMessage() != null) { sb.append(MessageFormat.format(" Name: {0} Type: {1} ErrorMessage: {2}\n", action.getName(), action.getType(), action.getErrorMessage())); } } out = ""; err = sb.toString(); } synchronized (metadata_sync) { wr.setStatus(nextSqwStatus); wr.setStdErr(err); wr.setStdOut(out); WorkflowStatusChecker.this.metadata.updateWorkflowRun(wr); } } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } } private void handlePreLaunch() { switch (wr.getStatus()) { case submitted_cancel: // run cancelled before launching wr.setStatus(WorkflowRunStatus.cancelled); synchronized (metadata_sync) { WorkflowStatusChecker.this.metadata.updateWorkflowRun(wr); } break; case submitted_retry: // retrying a pre-launch cancellation wr.setStatus(WorkflowRunStatus.submitted); synchronized (metadata_sync) { WorkflowStatusChecker.this.metadata.updateWorkflowRun(wr); } break; default: throw new IllegalStateException("No Oozie job ID found for WorkflowRun: swid=" + wr.getSwAccession() + " status=" + wr.getStatus().name()); } } @SuppressWarnings("deprecation") private Properties getCurrentConf(WorkflowJob wfJob) { /* * Why this method is needed: * * To rerun an oozie job, one must pass in a Properties instance. * * The current conf of a WorkflowJob is only exposed via getConf() which * does not return a Properties instance, but rather a String of XML. * * The XML is not of a Properties, but rather of a hadoop Configuration! * * A hadoop Configuration instance cannot be loaded from a String, but * only from resources or an input stream. * * Further, a hadoop Configuration instance does not expose a public * method for obtaining a Properties representation. * * It does expose an iterator of Map.Entry objects (which is internally * obtained from a Properties instance!). * * It'd be swell if these guys could just pick one representation, or at * least an easy way to convert between them. */ Configuration conf = new Configuration(false); conf.addResource(new StringBufferInputStream(wfJob.getConf())); Properties props = new Properties(); for (Map.Entry<String, String> e : conf) { props.setProperty(e.getKey(), e.getValue()); } return props; } private WorkflowRunStatus convertOozieToSeqware(WorkflowJob.Status oozieStatus) { WorkflowRunStatus sqwStatus; /* * There's no analog to SUSPENDED on the seware side, treating as failed so it can be picked up for retry */ switch (oozieStatus) { case PREP: case RUNNING: case SUSPENDED: sqwStatus = WorkflowRunStatus.running; break; case FAILED: sqwStatus = WorkflowRunStatus.failed; break; case KILLED: /* * NOTE: At the time of this writing, Oozie workflows that fail due to * an error have an oozie status of KILLED. This would result in failed * workflows appearing in seqware as 'cancelled'. * * To compensate for this idiosyncrasy, we will treat KILLED workflow * runs as FAILED. Workflow runs that are cancelled via seqware will * correctly have their status properly set to 'cancelled', since we are * aware of the intent. * * The drawback is that workflow runs killed via other means, e.g., HUE, * will be propagated back to seqware as 'failed'. I feel this is the * best of the bad options. */ // sqwStatus = WorkflowRunStatus.cancelled; sqwStatus = WorkflowRunStatus.failed; break; case SUCCEEDED: sqwStatus = WorkflowRunStatus.completed; break; default: throw new RuntimeException("Unexpected oozie status value: " + oozieStatus); } return sqwStatus; } private void checkPegasus() { if (wr.getStatus() != null) { switch (wr.getStatus()) { case submitted_cancel: case submitted_retry: // This should be prevented from ever happening on the submit-side. throw new RuntimeException("cancel/retry not supported with pegasus engine."); default: // continue } } // check the owner of the status dir boolean dirOwner = true; String statusDir = findStatusDir(wr.getStatusCmd()); if (statusDir != null && !FileTools.isFileOwner(statusDir)) { dirOwner = false; Log.info("You don't own the status directory: " + wr.getStatusCmd()); } else if (statusDir == null) { dirOwner = false; Log.info("The status directory can't be parsed!: " + wr.getStatusCmd()); } if (dirOwner) { ReturnValue currRet = checkWorkflow(wr.getStatusCmd()); if (currRet.getExitStatus() == ReturnValue.SUCCESS) { synchronized (metadata_sync) { WorkflowStatusChecker.this.metadata.update_workflow_run(wr.getWorkflowRunId(), wr.getCommand(), wr.getTemplate(), WorkflowRunStatus.completed, wr.getStatusCmd(), wr.getCurrentWorkingDir(), wr.getDax(), wr.getIniFile(), wr.getHost(), currRet.getStderr(), currRet.getStdout(), wr.getWorkflowEngine(), wr.getInputFileAccessions()); } } else if (currRet.getExitStatus() == ReturnValue.PROCESSING) { synchronized (metadata_sync) { WorkflowStatusChecker.this.metadata.update_workflow_run(wr.getWorkflowRunId(), wr.getCommand(), wr.getTemplate(), WorkflowRunStatus.running, wr.getStatusCmd(), wr.getCurrentWorkingDir(), wr.getDax(), wr.getIniFile(), wr.getHost(), currRet.getStderr(), currRet.getStdout(), wr.getWorkflowEngine(), wr.getInputFileAccessions()); } } else if (currRet.getExitStatus() == ReturnValue.FAILURE) { Log.error( "WORKFLOW FAILURE: this workflow has failed and this status will be saved to the DB."); synchronized (metadata_sync) { WorkflowStatusChecker.this.metadata.update_workflow_run(wr.getWorkflowRunId(), wr.getCommand(), wr.getTemplate(), WorkflowRunStatus.failed, wr.getStatusCmd(), wr.getCurrentWorkingDir(), wr.getDax(), wr.getIniFile(), wr.getHost(), currRet.getStderr(), currRet.getStdout(), wr.getWorkflowEngine(), wr.getInputFileAccessions()); } } else if (currRet.getExitStatus() == ReturnValue.UNKNOWN) { Log.error( "ERROR: the workflow status has returned UNKNOWN, this is typically if the workflow status command points" + "to a non-existant directory or a directory that is not writable or owned by you. No information will be saved to the" + "DB since the workflow state cannot be determined!"); } } } } private static final Pattern SGE_OUT_FILE = Pattern.compile(".+\\.o(\\d+)"); private static final Pattern SGE_ERR_FILE = Pattern.compile(".+\\.e(\\d+)"); private static SortedMap<Integer, File> sgeFiles(Pattern p, File dir, final Set<String> extIds) { SortedMap<Integer, File> idFiles = new TreeMap<>(); for (File f : dir.listFiles()) { Matcher m = p.matcher(f.getName()); if (m.find()) { String id = m.group(1); if (extIds.contains(id)) { idFiles.put(Integer.parseInt(id), f); } } } return idFiles; } private static final Pattern SGE_FILE = Pattern.compile("(.+)\\.[eo]\\d+"); private static String sgeConcat(SortedMap<Integer, File> idFiles) { StringBuilder sb = new StringBuilder(); for (Map.Entry<Integer, File> e : idFiles.entrySet()) { File f = e.getValue(); Matcher m = SGE_FILE.matcher(f.getName()); m.find(); String jobName = m.group(1); sb.append("-----------------------------------------------------------------------"); sb.append("\nJob Name: "); sb.append(jobName); sb.append("\nJob ID: "); sb.append(e.getKey()); sb.append("\nFile: "); sb.append(f.getAbsolutePath()); sb.append("\nUpdated: "); sb.append(new Date(f.lastModified())); sb.append("\nContents:\n"); try { sb.append(stripInvalidXmlCharacters(FileUtils.readFileToString(f))); } catch (IOException ex) { sb.append(" *** ERROR READING FILE: "); sb.append(ex.getMessage()); sb.append(" ***"); } if (sb.charAt(sb.length() - 1) != '\n') { sb.append("\n"); } sb.append("-----------------------------------------------------------------------\n\n"); } return sb.toString(); } private static Set<String> sgeIds(WorkflowJob wf) { List<WorkflowAction> actions = wf.getActions(); final Set<String> extIds = new HashSet<>(); for (WorkflowAction a : actions) { String extId = a.getExternalId(); if (a != null) { extIds.add(extId); } } return extIds; } /** * Stolen from * https://stackoverflow.com/questions/93655/stripping-invalid-xml-characters-in-java/9635310#9635310 * * @param input * @return */ public static String stripInvalidXmlCharacters(String input) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); if (XMLChar.isValid(c)) { sb.append(c); } } return sb.toString(); } }