net.sourceforge.seqware.pipeline.deciders.BasicDecider.java Source code

Java tutorial

Introduction

Here is the source code for net.sourceforge.seqware.pipeline.deciders.BasicDecider.java

Source

/*
 * Copyright (C) 2012 SeqWare
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package net.sourceforge.seqware.pipeline.deciders;

import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import java.util.logging.Level;
import java.util.logging.Logger;
import net.sourceforge.seqware.common.hibernate.FindAllTheFiles;
import net.sourceforge.seqware.common.hibernate.FindAllTheFiles.Header;
import net.sourceforge.seqware.common.metadata.Metadata;
import net.sourceforge.seqware.common.metadata.MetadataDB;
import net.sourceforge.seqware.common.metadata.MetadataWS;
import net.sourceforge.seqware.common.model.Study;
import net.sourceforge.seqware.common.model.WorkflowParam;
import net.sourceforge.seqware.common.model.WorkflowRun;
import net.sourceforge.seqware.common.module.FileMetadata;
import net.sourceforge.seqware.common.module.ReturnValue;
import net.sourceforge.seqware.common.util.Log;
import net.sourceforge.seqware.common.util.filetools.FileTools;
import net.sourceforge.seqware.common.util.filetools.FileTools.LocalhostPair;
import net.sourceforge.seqware.pipeline.decider.DeciderInterface;
import net.sourceforge.seqware.pipeline.plugin.Plugin;
import net.sourceforge.seqware.pipeline.plugin.PluginInterface;
import net.sourceforge.seqware.pipeline.runner.PluginRunner;
import net.sourceforge.seqware.pipeline.tools.SetOperations;
import org.apache.commons.lang.StringUtils;
import org.openide.util.lookup.ServiceProvider;

/**
 *
 * @author mtaschuk
 */
@ServiceProvider(service = PluginInterface.class)
public class BasicDecider extends Plugin implements DeciderInterface {

    protected ReturnValue ret = new ReturnValue();
    private MetadataWS metaws;
    private Header header = Header.FILE_SWA;
    private Set<String> parentWorkflowAccessions = new TreeSet<String>();
    private Set<String> workflowAccessionsToCheck = new TreeSet<String>();
    private List<String> metaTypes = null;
    private Boolean forceRunAll = null;
    private Boolean test = null;
    private String workflowAccession = null;
    protected Random random = new Random(System.currentTimeMillis());
    private Boolean metadataWriteback = null;
    private Collection<String> parentAccessionsToRun;
    private Collection<String> filesToRun;
    private Collection<String> workflowParentAccessionsToRun;
    private Set<String> studyReporterOutput;
    private ArrayList<String> iniFiles;
    private Boolean runNow = null;
    private Boolean skipStuff = null;
    private int launchMax = Integer.MAX_VALUE, launched = 0;
    private int rerunMax = 5;
    private String host = null;

    public BasicDecider() {
        super();
        parser.acceptsAll(Arrays.asList("wf-accession"), "The workflow accession of the workflow")
                .withRequiredArg();
        parser.acceptsAll(Arrays.asList("study-name"),
                "Full study name. One of sample-name, study-name, sequencer-run-name or all is required.")
                .withRequiredArg();
        parser.acceptsAll(Arrays.asList("sample-name"),
                "Full sample name. One of sample-name, study-name, sequencer-run-name or all is required.")
                .withRequiredArg();
        parser.acceptsAll(Arrays.asList("sequencer-run-name"),
                "Full sequencer run name. One of sample-name, study-name, sequencer-run-name or all is required.")
                .withRequiredArg();
        parser.accepts("all",
                "Run everything. One of sample-name, study-name, sequencer-run-name or all is required.");
        parser.acceptsAll(Arrays.asList("group-by"),
                "Optional: Group by one of the headings in FindAllTheFiles. Default: FILE_SWA. One of LANE_SWA or IUS_SWA.")
                .withRequiredArg();
        parser.acceptsAll(Arrays.asList("parent-wf-accessions"),
                "The workflow accessions of the parent workflows, comma-separated with no spaces. May also specify the meta-type.")
                .withRequiredArg();
        parser.acceptsAll(Arrays.asList("meta-types"),
                "The comma-separated meta-type(s) of the files to run this workflow with. Alternatively, use parent-wf-accessions.")
                .withRequiredArg();
        parser.acceptsAll(Arrays.asList("check-wf-accessions"),
                "The comma-separated, no spaces, workflow accessions of the workflow that perform the same function (e.g. older versions). Any files that have been processed with these workflows will be skipped.")
                .withRequiredArg();
        parser.acceptsAll(Arrays.asList("force-run-all"),
                "Forces the decider to run all matches regardless of whether they've been run before or not");
        parser.acceptsAll(Arrays.asList("test"),
                "Testing mode. Prints the INI files to standard out and does not submit the workflow.");
        parser.acceptsAll(Arrays.asList("no-meta-db", "no-metadata"),
                "Optional: a flag that prevents metadata writeback (which is done "
                        + "by default) by the Decider and that is subsequently "
                        + "passed to the called workflow which can use it to determine if "
                        + "they should write metadata at runtime on the cluster.");
        parser.acceptsAll(Arrays.asList("schedule"),
                "Schedule this workflow to be run rather than running it immediately. See also: --run");
        parser.acceptsAll(Arrays.asList("run"),
                "Run this workflow now. This is the default behaviour. See also: --schedule");
        parser.acceptsAll(Arrays.asList("ignore-skip-flag"),
                "Ignores any 'skip' flags on lanes, IUSes, sequencer runs, samples, etc. Use caution.");
        parser.acceptsAll(Arrays.asList("launch-max"),
                "The maximum number of jobs to launch at once. Default: infinite.").withRequiredArg();
        parser.acceptsAll(Arrays.asList("rerun-max"),
                "The maximum number of times to re-launch a workflowrun if failed. Default: 5.").withRequiredArg();
        parser.acceptsAll(Arrays.asList("host", "ho"),
                "Used only in combination with --schedule to schedule onto a specific host. If not provided, the default is the local host")
                .withRequiredArg();
        ret.setExitStatus(ReturnValue.SUCCESS);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String get_description() {
        return "The decider from which all other deciders came";
    }

    @Override
    /**
     * This method is intended to be called AFTER any implementing class's init
     * method.
     */
    public ReturnValue init() {

        if (!(options.has("study-name") ^ options.has("sample-name") ^ options.has("sequencer-run-name")
                ^ options.has("all"))) {
            Log.stdout(this.get_syntax());
            Log.error("Please provide one of sample-name, study-name, sequencer-run-name or all");
            ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
        }

        try {
            ResourceBundle rb = PropertyResourceBundle.getBundle("decider");
            String parents = rb.getString("parent-workflow-accessions");
            String checks = rb.getString("check-wf-accessions");
            String wfa = rb.getString("workflow-accession");
            if (wfa != null && !wfa.trim().isEmpty()) {
                this.setWorkflowAccession(wfa);
            }
            if (parents != null && !parents.trim().isEmpty()) {
                List<String> pas = Arrays.asList(parents.split(","));
                this.setParentWorkflowAccessions(new TreeSet(pas));
            }
            if (checks != null && !checks.trim().isEmpty()) {
                List<String> cwa = Arrays.asList(checks.split(","));
                this.setWorkflowAccessionsToCheck(new TreeSet(cwa));

            }
        } catch (MissingResourceException e) {
            Log.debug("No decider resource found: ", e);
        }

        //Group-by allows you to group processing events based on one characteristic.
        //Normally, this allows you to run on a group of samples (for example, all
        //of the IUS-level BAM files). The default is no grouping, so the workflow
        //will be run independently on every file it finds
        if (options.has("group-by")) {
            String headerString = (String) options.valueOf("group-by");
            try {
                header = Header.valueOf(headerString);
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
                StringBuilder sb = new StringBuilder();
                sb.append("group-by attribute must be one of the following: \n");
                for (Header h : Header.values()) {
                    sb.append("\t").append(h.name()).append("\n");

                }
                Log.stdout(sb.toString());
                ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
            }
        }

        if (options.has("wf-accession")) {
            workflowAccession = (String) options.valueOf("wf-accession");
        } else if (workflowAccession == null) {
            Log.error("Must specify the workflow-accession of the workflow to run");
            ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
        }

        // Check for filtering on the files. Either parent workflow accessions
        // or file meta-types, or both
        boolean hasFilter = false;
        if (options.has("parent-wf-accessions")) {
            String pas = (String) options.valueOf("parent-wf-accessions");
            for (String p : pas.split(",")) {
                parentWorkflowAccessions.add(p.trim());
                hasFilter = true;
            }
        }
        if (options.has("meta-types")) {
            String mt = (String) options.valueOf("meta-types");
            metaTypes = Arrays.asList(mt.split(","));
            hasFilter = true;
        }

        if (!hasFilter && parentWorkflowAccessions.isEmpty() && metaTypes == null) {
            Log.error("You must run a decider with parent-wf-accessions or meta-types (or both).");
            ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
        }

        //Repeat-checking options. if present, check-wf-accessions will check to
        //see if the samples have been run through a particular workflow before.
        //These workflows will not be launched again
        //Optionally you can force the decider to re-run all possibilities in
        //the database with force-run-all.
        if (options.has("check-wf-accessions")) {

            String pas = (String) options.valueOf("check-wf-accessions");
            Log.debug("Pas = " + pas);
            if (pas.contains(",")) {
                for (String p : pas.split(",")) {
                    workflowAccessionsToCheck.add(p.trim());
                }
            } else {
                workflowAccessionsToCheck.add(pas.trim());
            }
            //Separate out this logic
            //workflowAccessionsToCheck.add(workflowAccession);
        }
        if (forceRunAll == null) {
            forceRunAll = options.has("force-run-all");
        }

        //test turns off all of the submission functions and just prints to stdout
        if (test == null) {
            test = options.has("test");
        }
        if (test) {
            StringWriter writer = new StringWriter();
            try {
                FindAllTheFiles.printHeader(writer, true);
                Log.stdout(writer.toString());
            } catch (IOException ex) {
                Log.error(ex);
            }
        }

        if (skipStuff == null) {
            skipStuff = !options.has("ignore-skip-flag");
        }

        if (metadataWriteback == null) {
            metadataWriteback = !(options.has("no-metadata") || options.has("no-meta-db"));
        }

        if (runNow == null) {
            if (options.has("schedule")) {
                runNow = false;
            } else {
                runNow = true;
            }
        }

        LocalhostPair localhostPair = FileTools.getLocalhost(options);
        String localhost = localhostPair.hostname;
        if (host == null) {
            if (options.has("host") || options.has("ho")) {
                host = (String) options.valueOf("host");
            } else {
                host = localhost;
            }
        }
        if (localhostPair.returnValue.getExitStatus() != ReturnValue.SUCCESS && host == null) {
            Log.error("Could not determine localhost: Return value " + localhostPair.returnValue.getExitStatus());
            Log.error("Please supply it on the command line with --host");
            ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
        } else if (!host.equals(localhost)) {
            Log.warn("The localhost and the scheduling host are not the same: " + localhost + " and " + host
                    + ". Proceeding anyway.");
        }

        if (options.has("launch-max")) {
            try {
                launchMax = Integer.parseInt(options.valueOf("launch-max").toString());
            } catch (NumberFormatException e) {
                Log.error("The launch-max parameter must be an integer. Unparseable integer: "
                        + options.valueOf("launch-max").toString());
                ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
            }
        }

        if (options.has("rerun-max")) {
            try {
                rerunMax = Integer.parseInt(options.valueOf("rerun-max").toString());
            } catch (NumberFormatException e) {
                Log.error("The rerun-max parameter must be an integer. Unparseable integer: "
                        + options.valueOf("rerun-max").toString());
                ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
            }
        }

        if (workflowAccession == null || "".equals(workflowAccession)) {
            Log.error("The wf-accession must be defined.");
            ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
        }

        if (metadata instanceof MetadataDB) {
            metaws = new MetadataWS();
            ret = metaws.init(config.get("SW_REST_URL"), config.get("SW_REST_USER"), config.get("SW_REST_PASS"));
        } else if (metadata instanceof MetadataWS) {
            metaws = (MetadataWS) metadata;
            //metadb = new MetadataDB();
            //String connection = "jdbc:postgresql://" + config.get("SW_DB_SERVER") + "/" + config.get("SW_DB");
            //ret = metadb.init(connection, config.get("SW_DB_USER"), config.get("SW_DB_PASS"));
        } else {
            ret.setExitStatus(ReturnValue.DBCOULDNOTINITIALIZE);
        }
        return ret;
    }

    @Override
    public ReturnValue do_test() {
        return ReturnValue.featureNotImplemented();
    }

    @Override
    public ReturnValue do_run() {
        String groupBy = header.getTitle();
        Map<String, List<ReturnValue>> mappedFiles = null;
        List<ReturnValue> vals = null;

        if (options.has("all")) {
            List<ReturnValue> rv;
            List<Study> studies = metadata.getAllStudies();
            for (Study study : studies) {
                String name = study.getTitle();
                Log.stdout("Retrieving study " + name);
                rv = metadata.findFilesAssociatedWithAStudy(name, true);
                mappedFiles = separateFiles(rv, groupBy);
                ret = launchWorkflows(mappedFiles);
                if (ret.getExitStatus() != ReturnValue.SUCCESS) {
                    break;
                }
            }
            return ret;
        } else if (options.has("study-name")) {
            String studyName = (String) options.valueOf("study-name");
            vals = metaws.findFilesAssociatedWithAStudy(studyName, true);
        } else if (options.has("sample-name")) {
            String sampleName = (String) options.valueOf("sample-name");
            vals = metaws.findFilesAssociatedWithASample(sampleName, true);
        } else if (options.has("sequencer-run-name")) {
            String runName = (String) options.valueOf("sequencer-run-name");
            vals = metaws.findFilesAssociatedWithASequencerRun(runName, true);
        } else {
            Log.error("Unknown option");
        }

        mappedFiles = separateFiles(vals, groupBy);
        ret = launchWorkflows(mappedFiles);
        return ret;
    }

    private ReturnValue launchWorkflows(Map<String, List<ReturnValue>> mappedFiles) {
        if (mappedFiles != null) {

            for (Entry<String, List<ReturnValue>> entry : mappedFiles.entrySet()) {
                Log.info("Considering key:" + entry.getKey());

                parentAccessionsToRun = new HashSet<String>();
                filesToRun = new HashSet<String>();
                workflowParentAccessionsToRun = new HashSet<String>();
                studyReporterOutput = new HashSet<String>();

                //for each grouping (e.g. sample), iterate through the files
                List<ReturnValue> files = entry.getValue();
                Log.info("key:" + entry.getKey() + " consists of " + files.size() + " files");
                List<Integer> fileSWIDs = new ArrayList<Integer>();

                for (ReturnValue file : files) {
                    String wfAcc = file.getAttribute(Header.WORKFLOW_SWA.getTitle());
                    String fileSWID = file.getAttribute(Header.FILE_SWA.getTitle());
                    fileSWIDs.add(Integer.valueOf(fileSWID));
                    Log.debug(Header.WORKFLOW_SWA.getTitle() + ": WF accession is " + wfAcc);

                    //if there is no parent accessions, or if the parent accession is correct
                    //this makes an assumption that if the wfAcc is null then the parentWorkflowAccessions will be empty
                    //and thus we are able to find files of a particular metatype with no wfAcc
                    if (parentWorkflowAccessions.isEmpty()
                            || (wfAcc != null && parentWorkflowAccessions.contains(wfAcc))) {

                        //check for each file if the metatype is correct (if it exists), 
                        //or just add it
                        for (FileMetadata fm : file.getFiles()) {
                            if (metaTypes != null) {
                                if (metaTypes.contains(fm.getMetaType())) {
                                    addFileToSets(file, fm, workflowParentAccessionsToRun, parentAccessionsToRun,
                                            filesToRun);
                                }
                            } else {
                                addFileToSets(file, fm, workflowParentAccessionsToRun, parentAccessionsToRun,
                                        filesToRun);
                            }
                        }

                    }
                } //end iterate through files

                if (!parentAccessionsToRun.isEmpty() && !filesToRun.isEmpty()
                        && !workflowParentAccessionsToRun.isEmpty()) {
                    final String parentAccessionString = commaSeparateMy(parentAccessionsToRun);
                    final String fileString = commaSeparateMy(filesToRun);
                    Log.debug("FileString: " + fileString);
                    boolean rerun = rerunWorkflowRun(filesToRun, fileSWIDs);

                    // SEQWARE-1728 - move creation of ini to launches (and test launches) to conserve disk space 
                    iniFiles = new ArrayList<String>();

                    ReturnValue newRet = this.doFinalCheck(fileString, parentAccessionString);
                    if (newRet.getExitStatus() != ReturnValue.SUCCESS) {
                        Log.warn("Final check failed. Return value was: " + newRet.getExitStatus());
                        rerun = false;
                    }

                    if (forceRunAll) {
                        Log.debug("Forcing the running of this workflow because --force-run-all was enabled");
                        rerun = true;
                    }
                    //if we're in testing mode or we don't want to rerun and we don't want to force the re-processing
                    if (test || !rerun) {
                        // we need to simplify the logic and make it more readable here for testing
                        if (rerun) {
                            iniFiles.add(createIniFile(fileString, parentAccessionString));
                            for (String line : studyReporterOutput) {
                                Log.stdout(line);
                            }
                            Log.debug("NOT RUNNING (but would have ran). test=" + test + " or !rerun=" + !rerun);
                            reportLaunch();
                            // SEQWARE-1642 - output to stdout only whether a decider would launch
                            ret = do_summary();
                            launched++;
                        } else {
                            for (String line : studyReporterOutput) {
                                Log.debug(line);
                            }
                            Log.debug(
                                    "NOT RUNNING (and would not have ran). test=" + test + " or !rerun=" + !rerun);
                        }
                    } else if (launched < launchMax) {
                        iniFiles.add(createIniFile(fileString, parentAccessionString));
                        launched++;
                        //construct the INI and run it
                        for (String line : studyReporterOutput) {
                            Log.stdout(line);
                        }
                        Log.debug("RUNNING");
                        //                        // setup workflow object
                        //                        Workflow w = new Workflow(metadata, config);
                        //                        if (runNow) {
                        //                            ret = w.launchInstalledBundle(workflowAccession, null,
                        //                                    iniFiles, metadataWriteback, new ArrayList(parentAccessionsToRun),
                        //                                    new ArrayList(workflowParentAccessionsToRun), false, options.nonOptionArguments());
                        //                        } else {
                        //                            ret = w.scheduleInstalledBundle(workflowAccession, null,
                        //                                    iniFiles, metadataWriteback, new ArrayList(parentAccessionsToRun),
                        //                                    new ArrayList(workflowParentAccessionsToRun), false, options.nonOptionArguments());
                        //                        }
                        //construct the INI and run it                     
                        ArrayList<String> runArgs = constructCommand();
                        PluginRunner.main(runArgs.toArray(new String[runArgs.size()]));
                        Log.stdout("Launching.");
                        do_summary();

                    }
                    // separate this out so that it is reachable when in --test
                    if (launched >= launchMax) {
                        Log.info("The maximum number of jobs has been " + (runNow ? "launched" : "scheduled")
                                + ". The next jobs will be launched when the decider runs again.");
                        ret.setExitStatus(ReturnValue.QUEUED);
                        // SEQWARE-1666 - short-circuit and exit when the maximum number of jobs have been launched
                        return ret;
                    }
                } else {
                    Log.debug("Cannot run: parentAccessions: " + parentAccessionsToRun.size() + " filesToRun: "
                            + filesToRun.size() + " workflowParentAccessions: "
                            + workflowParentAccessionsToRun.size());
                }

            }
        } else {
            Log.stdout("There are no files");
        }
        return ret;
    }

    protected ArrayList<String> constructCommand() {
        ArrayList<String> runArgs = new ArrayList<String>();
        runArgs.add("--plugin");
        runArgs.add("net.sourceforge.seqware.pipeline.plugins.WorkflowLauncher");
        runArgs.add("--");
        runArgs.add("--workflow-accession");
        runArgs.add(workflowAccession);
        runArgs.add("--ini-files");
        runArgs.add(commaSeparateMy(iniFiles));
        if (!metadataWriteback) {
            runArgs.add("--no-metadata");
        }
        runArgs.add("--parent-accessions");
        runArgs.add(commaSeparateMy(parentAccessionsToRun));
        runArgs.add("--link-workflow-run-to-parents");
        runArgs.add(commaSeparateMy(workflowParentAccessionsToRun));
        if (!runNow) {
            runArgs.add("--schedule");
        }
        runArgs.add("--host");
        runArgs.add(host);

        runArgs.add("--");
        for (String s : options.nonOptionArguments()) {
            runArgs.add(s);
        }
        return runArgs;
    }

    /**
     * Returns true only if there are more files to run than have been run on
     * any workflow so far, or if the filesToRun have different filepaths than
     * those that have been run before.
     * @param filesToRun
     * @param fileSWIDs
     * @return 
     */
    protected boolean rerunWorkflowRun(final Collection<String> filesToRun, List<Integer> fileSWIDs) {

        boolean rerun;
        List<Boolean> failures = new ArrayList<Boolean>();
        List<WorkflowRun> runs1 = produceAccessionListWithFileList(fileSWIDs,
                "CHILDREN_VIA_PROCESSING_RELATIONSHIP");
        rerun = processWorkflowRuns(filesToRun, failures, runs1);
        if (!rerun) {
            Log.debug("This workflow has failed to launch based on workflow runs found via Processing");
            return rerun;
        }
        List<WorkflowRun> runs2 = produceAccessionListWithFileList(fileSWIDs, "CHILDREN_VIA_IUS_WORKFLOW_RUN");
        rerun = processWorkflowRuns(filesToRun, failures, runs2);
        if (!rerun) {
            Log.debug("This workflow has failed to launch based on workflow runs found via IUS");
            return rerun;
        }
        List<WorkflowRun> runs3 = produceAccessionListWithFileList(fileSWIDs, "CHILDREN_VIA_LANE_WORKFLOW_RUN");
        rerun = processWorkflowRuns(filesToRun, failures, runs3);
        if (!rerun) {
            Log.debug("This workflow has failed to launch based on workflow runs found via Lane");
            return rerun;
        }
        if (failures.size() >= this.rerunMax) {
            Log.debug("This workflow has failed " + rerunMax + " times: not running");
            rerun = false;
        }
        return rerun;
    }

    /**
     * Determines whether a workflow run completed, failed, or other (submitted, pending, etc.)
     * @param workflowRunAcc
     * @return 
     */
    protected PREVIOUS_RUN_STATUS determineStatus(int workflowRunAcc) {
        String generateStatus = this.generateStatus(workflowRunAcc);
        if (generateStatus.equals(Metadata.COMPLETED)) {
            return PREVIOUS_RUN_STATUS.COMPLETED;
        } else if (generateStatus.equals(Metadata.FAILED)) {
            return PREVIOUS_RUN_STATUS.FAILED;
        } else {
            return PREVIOUS_RUN_STATUS.OTHER;
        }
    }

    /**
     * Returns true if the filesToRun are totally contained by the files associated with the 
     * files in a given workflowRunAcc
     * @param workflowRunAcc accession of the workflow run to compare against
     * @param filesToRun the files to check to see if they are contained by the past run
     * @return 
     */
    protected boolean isToRunContained(int workflowRunAcc, Collection<String> filesToRun) {
        List<String> ranOnList = getListOfFiles(workflowRunAcc);
        Log.info("Files to run: " + StringUtils.join(filesToRun, ','));
        // use set operations to be more explicit about our cases
        Set<String> setToRun = new HashSet<String>(filesToRun);
        Set<String> setHasRun = new HashSet<String>(ranOnList);
        return SetOperations.isSuperset(setHasRun, setToRun);
    }

    /**
     * Tests if the files from the workflow run (workflowRunAcc) are the same as
     * those found in the database (filesToRun). True if the filesToRun has more
     * files than the workflow run. True if the filesToRun and the workflow run
     * have the same number of files but with different filepaths. False if the
     * filesToRun and the workflow run have the same number of files with the
     * same file paths. False and prints an error message if there are more
     * files in the workflow run than in the filesToRun.
     */
    protected FILE_STATUS compareWorkflowRunFiles(int workflowRunAcc, Collection<String> filesToRun) {
        List<String> ranOnList = getListOfFiles(workflowRunAcc);
        Log.info("Files to run: " + StringUtils.join(filesToRun, ','));
        Log.info("Files has run: " + StringUtils.join(ranOnList, ','));

        // use set operations to be more explicit about our cases
        Set<String> setToRun = new HashSet<String>(filesToRun);
        Set<String> setHasRun = new HashSet<String>(ranOnList);
        if (setToRun.equals(setHasRun)) {
            return FILE_STATUS.SAME_FILES;
        }
        if (SetOperations.isSubset(setHasRun, setToRun)) {
            return FILE_STATUS.PAST_SUBSET_OR_INTERSECTION;
        }
        if (SetOperations.isSuperset(setHasRun, setToRun)) {
            return FILE_STATUS.PAST_SUPERSET;
        }
        if (SetOperations.intersection(setToRun, setHasRun).size() > 0) {
            return FILE_STATUS.PAST_SUBSET_OR_INTERSECTION;
        }
        return FILE_STATUS.DISJOINT_SETS;
    }

    private void addFileToSets(ReturnValue file, FileMetadata fm, Collection<String> workflowParentAccessionsToRun,
            Collection<String> parentAccessionsToRun, Collection<String> filesToRun) {
        if (checkFileDetails(file, fm)) {
            if (skipStuff) {
                for (String key : file.getAttributes().keySet()) {
                    if (key.contains("skip")) {
                        Log.warn("File SWID:" + fm.getDescription() + " path " + fm.getFilePath() + " is skipped: "
                                + key + ">" + file.getAttribute(key));
                        return;
                    }
                }
            }
            if (test) {
                printFileMetadata(file, fm);
            }

            filesToRun.add(fm.getFilePath());
            parentAccessionsToRun.add(file.getAttribute(Header.PROCESSING_SWID.getTitle()));

            String swid = file.getAttribute(Header.IUS_SWA.getTitle());
            if (swid == null || swid.trim().isEmpty()) {
                swid = file.getAttribute(Header.LANE_SWA.getTitle());
            }
            workflowParentAccessionsToRun.add(swid);

        }
    }

    protected void printFileMetadata(ReturnValue file, FileMetadata fm) {
        String studyName = (String) options.valueOf("study-name");
        try {
            StringWriter writer = new StringWriter();
            FindAllTheFiles.print(writer, file, studyName, true, fm);
            studyReporterOutput.add(writer.getBuffer().toString().trim());
        } catch (IOException ex) {
            Log.error("Error printing file metadata", ex);
        }
    }

    protected String commaSeparateMy(Collection<String> list) {
        return separateMy(list, ",");
    }

    protected String spaceSeparateMy(Collection<String> list) {
        return separateMy(list, " ");
    }

    private String separateMy(Collection<String> list, String delimiter) {
        StringBuilder sb = new StringBuilder();
        for (String s : list) {
            if (sb.length() != 0) {
                sb.append(delimiter);
            }
            sb.append(s);
        }
        return sb.toString();
    }

    private String createIniFile(String commaSeparatedFilePaths, String commaSeparatedParentAccessions) {
        String iniPath = "";

        Map<String, String> iniFileMap = new TreeMap<String, String>();
        SortedSet<WorkflowParam> wps = metaws.getWorkflowParams(workflowAccession);
        for (WorkflowParam param : wps) {
            iniFileMap.put(param.getKey(), param.getDefaultValue());
        }

        Map<String, String> iniParameters = modifyIniFile(commaSeparatedFilePaths, commaSeparatedParentAccessions);

        for (String param : iniParameters.keySet()) {
            iniFileMap.put(param, iniParameters.get(param));
        }

        PrintWriter writer = null;
        File file = null;
        try {
            file = File.createTempFile("" + random.nextInt(), ".ini");
            writer = new PrintWriter(new FileWriter(file), true);

            for (String key : iniFileMap.keySet()) {
                StringBuilder sb = new StringBuilder();
                sb.append(key).append("=").append(iniFileMap.get(key));
                writer.println(sb.toString());
            }

        } catch (IOException ex) {
            Logger.getLogger(BasicDecider.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            if (writer != null) {
                writer.close();
            }
        }
        if (file != null) {
            iniPath = file.getAbsolutePath();
        }
        return iniPath;

    }

    /**
     * Performs any additional checks on the file before adding it to the list
     * of files to incorporate. This method should be extended for future
     * deciders for custom behaviour. You can also pull any details out of the
     * file metadata here.
     *
     * @param returnValue The ReturnValue representing the Processing event. May
     * have one or more files. The attributes table contains the information
     * from FindAllTheFiles.Header.
     * @param fm the particular file that will be added
     * @return true if the file can be added to the list, false otherwise
     */
    protected boolean checkFileDetails(ReturnValue returnValue, FileMetadata fm) {
        return true;
    }

    protected Map<String, String> modifyIniFile(String commaSeparatedFilePaths,
            String commaSeparatedParentAccessions) {
        Map<String, String> iniFileMap = new TreeMap<String, String>();
        iniFileMap.put("input_files", commaSeparatedFilePaths);
        return iniFileMap;
    }

    protected String handleGroupByAttribute(String attribute) {
        return attribute;
    }

    //    protected
    public Map<String, List<ReturnValue>> separateFiles(List<ReturnValue> vals, String groupBy) {
        //get files from study
        Map<String, List<ReturnValue>> map = new HashMap<String, List<ReturnValue>>();

        //group files according to the designated header (e.g. sample SWID)
        for (ReturnValue r : vals) {
            String currVal = r.getAttributes().get(groupBy);

            if (currVal != null) {
                currVal = handleGroupByAttribute(currVal);
            }

            List<ReturnValue> vs = map.get(currVal);
            if (vs == null) {
                vs = new ArrayList<ReturnValue>();
            }
            vs.add(r);
            map.put(currVal, vs);
        }
        return map;

    }

    @Override
    public ReturnValue clean_up() {
        return ReturnValue.featureNotImplemented();
    }

    @Override
    public ReturnValue do_summary() {
        String command = do_summary_command();
        Log.stdout(command);
        return ret;
    }

    public Boolean getForceRunAll() {
        return forceRunAll;
    }

    public void setForceRunAll(Boolean forceRunAll) {
        this.forceRunAll = forceRunAll;
    }

    /**
     * use getGroupingStrategy
     *
     * @return
     */
    @Deprecated
    public Header getHeader() {
        return header;
    }

    public Header getGroupingStrategy() {
        return this.header;
    }

    /**
     * use setGroupingStrategy
     *
     * @param header
     */
    @Deprecated
    public void setHeader(Header header) {
        this.header = header;
    }

    public void setGroupingStrategy(Header strategy) {
        this.header = strategy;
    }

    public List<String> getMetaType() {
        return metaTypes;
    }

    public void setMetaType(List<String> metaType) {
        this.metaTypes = metaType;
    }

    public Boolean getMetadataWriteback() {
        return metadataWriteback;
    }

    public void setMetadataWriteback(Boolean metadataWriteback) {
        this.metadataWriteback = metadataWriteback;
    }

    public Set<String> getParentWorkflowAccessions() {
        return parentWorkflowAccessions;
    }

    public void setParentWorkflowAccessions(Set<String> parentWorkflowAccessions) {
        this.parentWorkflowAccessions = parentWorkflowAccessions;
    }

    public Boolean getTest() {
        return test;
    }

    public void setTest(Boolean test) {
        this.test = test;
    }

    public String getWorkflowAccession() {
        return workflowAccession;
    }

    public void setWorkflowAccession(String workflowAccession) {
        this.workflowAccession = workflowAccession;
    }

    public Set<String> getWorkflowAccessionsToCheck() {
        return workflowAccessionsToCheck;
    }

    public void setWorkflowAccessionsToCheck(Set<String> workflowAccessions) {
        this.workflowAccessionsToCheck = workflowAccessions;
    }

    /**
     * allow to user to do the final check and decide to run or cancel the
     * decider e.g. check if all files are present
     *
     * @param commaSeparatedFilePaths
     * @param commaSeparatedParentAccessions
     * @return
     */
    protected ReturnValue doFinalCheck(String commaSeparatedFilePaths, String commaSeparatedParentAccessions) {
        ReturnValue checkReturnValue = new ReturnValue(ReturnValue.SUCCESS);
        return checkReturnValue;
    }

    /**
     * Report an actual launch of a workflow for testing purpose
     * @return false iff we don't actually want to launch
     */
    protected boolean reportLaunch() {
        return true;
    }

    private Map<String, String> generateWorkflowRunMap(int workflowRunAcc) {
        String report = metaws.getWorkflowRunReport(workflowRunAcc);
        String[] lines = report.split("\n");
        String[] reportHeader = lines[0].split("\t");
        String[] data = lines[1].split("\t");
        Map<String, String> map = new TreeMap<String, String>();
        for (int i = 0; i < reportHeader.length; i++) {
            map.put(reportHeader[i].trim(), data[i].trim());
        }
        return map;
    }

    public void setMetaws(MetadataWS metaws) {
        this.metaws = metaws;
    }

    protected String generateStatus(int workflowRunAcc) {
        Map<String, String> map = generateWorkflowRunMap(workflowRunAcc);
        String status = map.get("Workflow Run Status");
        Log.debug("Status is " + status);
        return status;
    }

    /**
     * We now use the guideline that we only count failures when they occur on the same number of files
     * (with the same paths)
     * @param fileStatus
     * @param previousStatus
     * @return 
     */
    protected static boolean isCountAsFail(FILE_STATUS fileStatus, PREVIOUS_RUN_STATUS previousStatus) {
        return (fileStatus == FILE_STATUS.SAME_FILES && previousStatus == PREVIOUS_RUN_STATUS.FAILED);
    }

    /**
     * See https://wiki.oicr.on.ca/display/SEQWARE/BasicDecider+logic
     * @param fileStatus
     * @param previousStatus
     * @return
     */
    protected static boolean isDoRerun(FILE_STATUS fileStatus, PREVIOUS_RUN_STATUS previousStatus) {
        Log.info("Considering match with " + fileStatus.name() + " status:" + previousStatus.name());
        boolean strangeCondition = fileStatus == FILE_STATUS.PAST_SUPERSET
                && previousStatus == PREVIOUS_RUN_STATUS.FAILED;
        if (strangeCondition) {
            Log.stderr(
                    "****** Workflow run has more files in the past but failed. We will try to re-run, but you should investigate!!!! *******");
        }
        boolean doRerun = true;
        if (fileStatus == FILE_STATUS.PAST_SUBSET_OR_INTERSECTION) {
            if (previousStatus == PREVIOUS_RUN_STATUS.OTHER) {
                doRerun = false;
            } else {
                doRerun = true;
            }
        } else if (fileStatus == FILE_STATUS.SAME_FILES || fileStatus == FILE_STATUS.PAST_SUPERSET) {
            if (previousStatus == PREVIOUS_RUN_STATUS.FAILED) {
                doRerun = true;
            } else {
                doRerun = false;
            }
        }
        return doRerun;
    }

    private List<WorkflowRun> produceAccessionListWithFileList(List<Integer> fileSWIDs, String searchType) {
        // find relevant workflow runs for this group of files
        List<WorkflowRun> wrFiles1 = this.metadata.getWorkflowRunsAssociatedWithFiles(fileSWIDs, searchType);
        Log.debug("Found " + wrFiles1.size() + " workflow runs via " + searchType);
        return wrFiles1;
    }

    /**
     * For a given set of file SWIDs in filesToRun, we will count up the number of previous workflow runs that 
     * failed and return whether or not we think the workflow should be rerun
     * @param filesToRun
     * @param failures
     * @param previousWorkflowRuns
     * @return 
     */
    private boolean processWorkflowRuns(Collection<String> filesToRun, List<Boolean> failures,
            List<WorkflowRun> previousWorkflowRuns) {
        int count = 0;
        boolean rerun = true;
        for (WorkflowRun previousWorkflowRun : previousWorkflowRuns) {
            count++;
            // only consider previous runs of the same workflow
            if (workflowAccession.equals(previousWorkflowRun.getWorkflowAccession().toString())) {
                FILE_STATUS fileStatus = compareWorkflowRunFiles(previousWorkflowRun.getSwAccession(), filesToRun);
                Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " has a file status of "
                        + fileStatus);
                PREVIOUS_RUN_STATUS previousStatus = determineStatus(previousWorkflowRun.getSwAccession());
                Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " has a status of "
                        + previousStatus);

                boolean countAsFail = isCountAsFail(fileStatus, previousStatus);
                boolean doRerun = isDoRerun(fileStatus, previousStatus);

                if (countAsFail) {
                    Log.info("Workflow run " + previousWorkflowRun.getSwAccession()
                            + " counted as a failure with a file status of " + fileStatus);
                    Log.info("The failing run was workflow_run " + count + "/" + previousWorkflowRuns.size()
                            + " out of " + previousWorkflowRuns.size());
                    failures.add(true);
                }
                if (!doRerun) {
                    Log.info("Workflow run " + previousWorkflowRun.getSwAccession()
                            + " blocking re-run with a status of: " + previousStatus + "  file status of: "
                            + fileStatus);
                    Log.info("The blocking run was workflow_run " + count + "/" + previousWorkflowRuns.size()
                            + " out of " + previousWorkflowRuns.size());
                    rerun = false;
                    break;
                }
            } else if (this.workflowAccessionsToCheck
                    .contains(previousWorkflowRun.getWorkflowAccession().toString())) {
                Log.debug("Workflow run " + previousWorkflowRun.getWorkflowAccession() + " has a workflow "
                        + previousWorkflowRun.getWorkflowAccession()
                        + " on the list of workflow accessions to check");
                // we will check whether all the files to run are contained within the previous run of the workflow, if so we will not re-run
                FILE_STATUS fileStatus = compareWorkflowRunFiles(previousWorkflowRun.getSwAccession(), filesToRun);
                Log.info("Workflow run " + previousWorkflowRun.getSwAccession() + " has a file status of "
                        + fileStatus);
                if (this.isToRunContained(previousWorkflowRun.getSwAccession(), filesToRun)) {
                    Log.info("Previous workflow run contained the all of the files that we want to run");
                    rerun = false;
                }
            } else {
                Log.info("Workflow run " + previousWorkflowRun.getSwAccession()
                        + " was neither a workflow to check nor a previous run of " + workflowAccession
                        + " , ignored");
            }
        }
        return rerun;
    }

    /**
     * Given a workflowRunAcc returns a list of file paths that were used in that 
     * run
     * @param workflowRunAcc
     * @param filesToRun
     * @return 
     */
    private List<String> getListOfFiles(int workflowRunAcc) {
        Map<String, String> map = generateWorkflowRunMap(workflowRunAcc);
        String ranOnString = map.get("Immediate Input File Meta-Types");
        List<String> ranOnList = Arrays.asList(ranOnString.split(","));
        List<Integer> indices = new ArrayList<Integer>();
        for (int i = 0; i < ranOnList.size(); i++) {
            if (metaTypes.contains(ranOnList.get(i).trim())) {
                indices.add(i);
            }
        }
        ranOnString = map.get("Immediate Input File Paths");
        String[] ranOnArr = ranOnString.split(",");
        ranOnList = new ArrayList<String>();
        for (Integer i : indices) {
            ranOnList.add(ranOnArr[i].trim());
            Log.trace("Adding item: " + ranOnArr[i]);
        }
        Log.debug("Got list of files: " + StringUtils.join(ranOnList, ','));
        return ranOnList;
    }

    private String do_summary_command() {
        StringBuilder command = new StringBuilder();
        // SEQWARE-1612 Change test command to actual jar name
        String SEQWARE_VERSION = this.metadata.getClass().getPackage().getImplementationVersion();
        command.append("\njava -jar seqware-distribution-").append(SEQWARE_VERSION).append("-full.jar ");
        command.append(spaceSeparateMy(constructCommand()));
        command.append("\n");
        return command.toString();
    }

    /**
     * These file statuses reflect the discussion at  
     * https://wiki.oicr.on.ca/display/SEQWARE/BasicDecider+logic     
     */
    protected enum FILE_STATUS {
        /**
         * Two sets of files have no relationship
         */
        DISJOINT_SETS,
        /**
         * Two sets of files partially overlap
         * i.e. intersection and subset (set of files in the past was smaller)
         */
        PAST_SUBSET_OR_INTERSECTION,
        /**
         * the same files are found at the same paths
         */
        SAME_FILES,
        /**
         * The set of files in the past was strictly larger than the current
         * files under consideration
         */
        PAST_SUPERSET
    }

    /**
     * We care about three types of status, an outright fail, other (pending, running, submitted, etc.), and completed
     */
    protected enum PREVIOUS_RUN_STATUS {
        FAILED, OTHER, COMPLETED
    }
}