de.huberlin.wbi.hiway.am.galaxy.GalaxyApplicationMaster.java Source code

Java tutorial

Introduction

Here is the source code for de.huberlin.wbi.hiway.am.galaxy.GalaxyApplicationMaster.java

Source

/*******************************************************************************
 * In the Hi-WAY project we propose a novel approach of executing scientific
 * workflows processing Big Data, as found in NGS applications, on distributed
 * computational infrastructures. The Hi-WAY software stack comprises the func-
 * tional workflow language Cuneiform as well as the Hi-WAY ApplicationMaster
 * for Apache Hadoop 2.x (YARN).
 *
 * List of Contributors:
 *
 * Marc Bux (HU Berlin)
 * Jrgen Brandt (HU Berlin)
 * Hannes Schuh (HU Berlin)
 * Ulf Leser (HU Berlin)
 *
 * Jrgen Brandt is funded by the European Commission through the BiobankCloud
 * project. Marc Bux is funded by the Deutsche Forschungsgemeinschaft through
 * research training group SOAMED (GRK 1651).
 *
 * Copyright 2014 Humboldt-Universitt zu Berlin
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.huberlin.wbi.hiway.am.galaxy;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.cli.ParseException;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import de.huberlin.wbi.cuneiform.core.semanticmodel.JsonReportEntry;
import de.huberlin.wbi.hiway.am.HiWay;
import de.huberlin.wbi.hiway.common.HiWayConfiguration;
import de.huberlin.wbi.hiway.common.TaskInstance;
import de.huberlin.wbi.hiway.common.WorkflowStructureUnknownException;

public class GalaxyApplicationMaster extends HiWay {

    public static void main(String[] args) {
        HiWay.loop(new GalaxyApplicationMaster(), args);
    }

    /**
     * A helper function for processing the loc file of a single Galaxy data table; the loc file stores information on any registered data (e.g., genomic
     * indices)
     * 
     * @param file
     *            a data table's loc file
     * @param galaxyDataTable
     *            the data table object corresponding to this loc file
     */
    private static void processLocFile(File file, GalaxyDataTable galaxyDataTable) {
        if (!file.exists())
            return;
        try (BufferedReader locBr = new BufferedReader(new FileReader(file))) {
            System.out.println("Processing Galaxy data table loc file " + file.getCanonicalPath());
            String line;
            while ((line = locBr.readLine()) != null) {
                if (line.startsWith(galaxyDataTable.getComment_char()))
                    continue;
                String[] content = line.split("\t");
                galaxyDataTable.addContent(content);
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
    }

    /* a data structure that stores the data tables of the local Galaxy installation; data tables contain references to installed data sets (e.g., genome
     * indices) */
    private Map<String, GalaxyDataTable> galaxyDataTables;
    /* a data structure that stores the data types of the local Galaxy installation; data types are associated with metadata information that is required to
     * invoke the tools provided by Galaxy */
    private Map<String, GalaxyDataType> galaxyDataTypes;
    /* the path of the local Galaxy installation, as specifified in the hiway-site.xml */
    public final String galaxyPath;
    /* a data structure that stores the tools of the local Galaxy installation; these tools include the library tools pre-installed in Galaxy as well as
     * additional tools installed via Galaxy's tool shed functionality */
    private Map<String, Map<String, GalaxyTool>> galaxyTools;

    public GalaxyApplicationMaster() {
        super();
        galaxyPath = getConf().get(HiWayConfiguration.HIWAY_GALAXY_PATH);
        if (galaxyPath == null) {
            System.err.println(
                    HiWayConfiguration.HIWAY_GALAXY_PATH + " not set in  " + HiWayConfiguration.HIWAY_SITE_XML);
            throw new RuntimeException();
        }
        galaxyDataTables = new HashMap<>();
        galaxyDataTypes = new HashMap<>();
        galaxyTools = new HashMap<>();
        setDetermineFileSizes();
    }

    /**
     * A helper function for setting and obtaining tools from their data structure
     * 
     * @param id
     *            the identifier of the tool, as specified in the root element of the tool's description XML
     * @return a map storing instances of the tool by their version number
     */
    private Map<String, GalaxyTool> addAndGetToolMap(String id) {
        if (!galaxyTools.containsKey(id)) {
            Map<String, GalaxyTool> toolMap = new HashMap<>();
            galaxyTools.put(id, toolMap);
        }
        return galaxyTools.get(id);
    }

    /**
     * A (recursive) helper function for parsing the parameters of a tool from their XML specification
     * 
     * @param el
     *            the XML element from which to commence the parsing
     * @return the set of parameters under this element
     * @throws XPathExpressionException
     */
    private Set<GalaxyParam> getParams(Element el, GalaxyTool tool) throws XPathExpressionException {
        Set<GalaxyParam> params = new HashSet<>();
        XPath xpath = XPathFactory.newInstance().newXPath();

        // there are three different types of parameters in Galaxy's tool descriptions: atomic parameters, conditionals and repeats
        NodeList paramNds = (NodeList) xpath.evaluate("param", el, XPathConstants.NODESET);
        NodeList conditionalNds = (NodeList) xpath.evaluate("conditional", el, XPathConstants.NODESET);
        NodeList repeatNds = (NodeList) xpath.evaluate("repeat", el, XPathConstants.NODESET);

        // (1) parse atomic parameters
        for (int i = 0; i < paramNds.getLength(); i++) {
            Element paramEl = (Element) paramNds.item(i);
            String name = paramEl.getAttribute("name");
            GalaxyParamValue param = new GalaxyParamValue(name);
            params.add(param);

            // (a) determine default values and mappings of values
            String type = paramEl.getAttribute("type");
            switch (type) {
            case "data":
                param.addMapping("", "{\"path\": \"\"}");
                tool.setPath(name);
                break;
            case "boolean":
                String trueValue = paramEl.getAttribute("truevalue");
                param.addMapping("True", trueValue);
                String falseValue = paramEl.getAttribute("falsevalue");
                param.addMapping("False", falseValue);
                break;
            case "select":
                param.addMapping("", "None");
                break;
            default:
            }

            // (b) resolve references to Galaxy data tables
            NodeList optionNds = (NodeList) xpath.evaluate("option", paramEl, XPathConstants.NODESET);
            NodeList optionsNds = (NodeList) xpath.evaluate("options", paramEl, XPathConstants.NODESET);
            for (int j = 0; j < optionNds.getLength() + optionsNds.getLength(); j++) {
                Element optionEl = j < optionNds.getLength() ? (Element) optionNds.item(j)
                        : (Element) optionsNds.item(j - optionNds.getLength());
                if (optionEl.hasAttribute("from_data_table")) {
                    String tableName = optionEl.getAttribute("from_data_table");
                    GalaxyDataTable galaxyDataTable = galaxyDataTables.get(tableName);
                    for (String value : galaxyDataTable.getValues()) {
                        param.addMapping(value, galaxyDataTable.getContent(value));
                    }
                }
            }
        }

        // (2) parse conditionals, which consist of a single condition parameter and several "when condition equals" parameters
        for (int i = 0; i < conditionalNds.getLength(); i++) {
            Element conditionalEl = (Element) conditionalNds.item(i);
            String name = conditionalEl.getAttribute("name");
            GalaxyConditional conditional = new GalaxyConditional(name);

            NodeList conditionNds = (NodeList) xpath.evaluate("param", conditionalEl, XPathConstants.NODESET);
            NodeList whenNds = (NodeList) xpath.evaluate("when", conditionalEl, XPathConstants.NODESET);
            if (conditionNds.getLength() == 0 || whenNds.getLength() == 0)
                continue;

            Element conditionEl = (Element) conditionNds.item(0);
            name = conditionEl.getAttribute("name");
            GalaxyParamValue condition = new GalaxyParamValue(name);
            conditional.setCondition(condition);

            for (int j = 0; j < whenNds.getLength(); j++) {
                Element whenEl = (Element) whenNds.item(j);
                String conditionValue = whenEl.getAttribute("value");
                conditional.setConditionalParams(conditionValue, getParams(whenEl, tool));
            }

            params.add(conditional);
        }

        // (3) parse repeats, which consist of a list of parameters
        for (int i = 0; i < repeatNds.getLength(); i++) {
            Element repeatEl = (Element) repeatNds.item(i);
            String name = repeatEl.getAttribute("name");
            GalaxyRepeat repeat = new GalaxyRepeat(name);
            params.add(repeat);

            repeat.setParams(getParams(repeatEl, tool));
        }

        return params;
    }

    @Override
    public boolean init(String[] args) throws ParseException {
        super.init(args);

        // (1) determine the config files that are to be parsed
        String tool_data_table_config_path = "config/tool_data_table_conf.xml.sample";
        String shed_tool_data_table_config = "config/shed_tool_data_table_conf.xml.sample";
        String tool_dependency_dir = "dependencies";
        String tool_path = "tools";
        String tool_config_file = "config/tool_conf.xml.sample";
        String datatypes_config_file = "config/datatypes_conf.xml.sample";
        try (BufferedReader iniBr = new BufferedReader(
                new FileReader(new File(galaxyPath + "/config/galaxy.ini")))) {
            String line;
            while ((line = iniBr.readLine()) != null) {
                if (line.startsWith("tool_data_table_config_path"))
                    tool_data_table_config_path = line.split("=")[1].trim();
                if (line.startsWith("shed_tool_data_table_config"))
                    shed_tool_data_table_config = line.split("=")[1].trim();
                if (line.startsWith("tool_dependency_dir"))
                    tool_dependency_dir = line.split("=")[1].trim();
                if (line.startsWith("tool_path"))
                    tool_path = line.split("=")[1].trim();
                if (line.startsWith("tool_config_file"))
                    tool_config_file = line.split("=")[1].trim();
                if (line.startsWith("datatypes_config_file"))
                    datatypes_config_file = line.split("=")[1].trim();
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        String[] tool_config_files = tool_config_file.split(",");

        // (2) parse the config files for Galaxy's data types, data tables, and tool libraries
        try {
            processDataTypes(new File(galaxyPath + "/" + datatypes_config_file));
            processDataTables(new File(galaxyPath + "/" + tool_data_table_config_path));
            processDataTables(new File(galaxyPath + "/" + shed_tool_data_table_config));
            for (String config_file : tool_config_files) {
                processToolLibraries(new File(galaxyPath + "/" + config_file.trim()), tool_path,
                        tool_dependency_dir);
            }
        } catch (FactoryConfigurationError e) {
            e.printStackTrace();
            System.exit(-1);
        }

        return true;
    }

    /**
     * A helper function for parsing a Galaxy tool's XML file
     * 
     * @param file
     *            the XML file to be parsed
     * @return the Galaxy tools described in the XML file
     */
    private GalaxyTool parseToolFile(File file) {
        System.out.println("Parsing Galaxy tool file " + file);
        try {
            DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            String path = file.getCanonicalPath();
            String dir = path.substring(0, path.lastIndexOf("/"));
            Document doc = builder.parse(file);
            Element rootEl = doc.getDocumentElement();
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
            StreamResult result = new StreamResult(new StringWriter());
            DOMSource source = new DOMSource(rootEl);
            transformer.transform(source, result);
            String toolDescription = result.getWriter().toString();

            // (1) parse macros, if any
            NodeList macrosNds = rootEl.getElementsByTagName("macros");
            Map<String, String> macrosByName = new HashMap<>();
            for (int i = 0; i < macrosNds.getLength(); i++) {
                Node macrosNd = macrosNds.item(i);
                macrosByName.putAll(processMacros(macrosNd, dir));
            }

            // (2) insert macros into the XML and parse the document
            Pattern p = Pattern.compile("<expand macro=\"([^\"]*)\"(>.*?</expand>|/>)", Pattern.DOTALL);
            Matcher m = p.matcher(toolDescription);
            while (m.find()) {
                String name = m.group(1);
                String replace = m.group(0);
                String with = macrosByName.get(name);
                if (m.group(2).startsWith(">")) {
                    String yield = m.group(2).substring(1, m.group(2).indexOf("</expand>"));
                    with = with.replaceAll("<yield/>", yield.trim());
                }
                if (with != null)
                    toolDescription = toolDescription.replace(replace, with);
            }

            doc = builder.parse(new InputSource(new StringReader(toolDescription)));
            rootEl = doc.getDocumentElement();
            String version = rootEl.hasAttribute("version") ? rootEl.getAttribute("version") : "1.0.0";
            String id = rootEl.getAttribute("id");
            GalaxyTool tool = new GalaxyTool(id, version, dir, galaxyPath);

            // (3) determine requirements (libraries and executables) of this tool; requirements have to be parsed such that the environment of the task can be
            // set to include them
            NodeList requirementNds = rootEl.getElementsByTagName("requirement");
            for (int i = 0; i < requirementNds.getLength(); i++) {
                Element requirementEl = (Element) requirementNds.item(i);
                String requirementName = requirementEl.getChildNodes().item(0).getNodeValue().trim();
                String requirementVersion = requirementEl.getAttribute("version");
                tool.addRequirement(requirementName, requirementVersion);
            }

            // (4) determine and set the template for the command of the task; this template will be compiled at runtime by Cheetah
            Element commandEl = (Element) rootEl.getElementsByTagName("command").item(0);
            if (commandEl != null) {
                String command = commandEl.getChildNodes().item(0).getNodeValue().trim();
                String script = command.split(" ")[0];
                String interpreter = commandEl.getAttribute("interpreter");
                if (interpreter.length() > 0) {
                    command = command.replace(script, dir + "/" + script);
                    command = interpreter + " " + command;
                }
                command = command.replaceAll("\\.value", "");
                command = command.replaceAll("\\.dataset", "");
                tool.setTemplate(command);
            }

            // (5) determine the parameters (atomic, conditional and repeat) of this tool
            Element inputsEl = (Element) rootEl.getElementsByTagName("inputs").item(0);
            if (inputsEl != null)
                tool.setParams(getParams(inputsEl, tool));

            // (6) determine the output files produced by this tool
            Element outputsEl = (Element) rootEl.getElementsByTagName("outputs").item(0);
            if (outputsEl != null) {
                NodeList dataNds = outputsEl.getElementsByTagName("data");
                for (int i = 0; i < dataNds.getLength(); i++) {
                    Element dataEl = (Element) dataNds.item(i);
                    String name = dataEl.getAttribute("name");
                    GalaxyParamValue param = new GalaxyParamValue(name);
                    tool.setPath(name);
                    tool.addParam(param);

                    String format = dataEl.getAttribute("format");
                    String metadata_source = dataEl.getAttribute("metadata_source");
                    if (format.equals("input") && metadata_source != null && metadata_source.length() > 0) {
                        param.setDataType(metadata_source);
                    } else {
                        param.setDataType(format);
                    }

                    String from_work_dir = dataEl.getAttribute("from_work_dir");
                    param.setFrom_work_dir(from_work_dir);
                }
            }

            // (7) register the tool in the Galaxy tool data structure
            if (tool.getTemplate() != null) {
                Map<String, GalaxyTool> toolMap = addAndGetToolMap(id);
                toolMap.put(version, tool);
            }

            return tool;
        } catch (SAXException | IOException | TransformerException | XPathExpressionException
                | ParserConfigurationException e) {
            e.printStackTrace();
            System.exit(-1);
            return null;
        }
    }

    @Override
    public void parseWorkflow() {
        System.out.println("Parsing Galaxy workflow " + getWorkflowFile());
        Map<Long, TaskInstance> tasks = new HashMap<>();
        try (BufferedReader reader = new BufferedReader(
                new FileReader(getWorkflowFile().getLocalPath().toString()))) {
            StringBuilder sb = new StringBuilder();
            String line;
            while ((line = reader.readLine()) != null) {
                sb.append(line).append("\n");
            }
            JSONObject workflow = new JSONObject(sb.toString());
            JSONObject steps = workflow.optJSONObject("steps");

            // (1) First pass: Parse Nodes
            for (int i = 0; i < steps.length(); i++) {
                JSONObject step = steps.optJSONObject(Integer.toString(i));
                long id = step.getLong("id");
                String type = step.getString("type");

                // (a) input nodes are nodes that do not invoke a task, but simply specify where an input file is located
                if (type.equals("data_input")) {
                    JSONArray inputs = step.optJSONArray("inputs");
                    for (int j = 0; j < inputs.length(); j++) {
                        JSONObject input = inputs.optJSONObject(j);
                        String name = input.getString("name");
                        GalaxyData data = new GalaxyData(name);

                        if (name.contains(".")) {
                            String extension = name.substring(name.indexOf(".") + 1);
                            if (galaxyDataTypes.containsKey(extension)) {
                                data.setDataType(galaxyDataTypes.get(extension));
                            }
                        }

                        String idName = id + "_output";
                        data.setInput(true);
                        getFiles().put(idName, data);
                    }

                    // (b) tool nodes are the actual nodes comprising the workflow
                } else if (type.equals("tool")) {
                    // (i) obtain the tool description and generate a task instance object
                    String toolVersion = step.getString("tool_version");
                    String toolId = step.getString("tool_id");
                    String[] splitId = toolId.split("/");
                    if (splitId.length > 2)
                        toolId = splitId[splitId.length - 2];
                    Map<String, GalaxyTool> tools = galaxyTools.get(toolId);
                    if (tools == null) {
                        System.err
                                .println("Tool " + toolId + " could not be located in local Galaxy installation.");
                        throw new RuntimeException();
                    }
                    GalaxyTool tool = tools.get(toolVersion);
                    if (tool == null) {
                        System.err.println("Tool version " + toolVersion + " of tool " + toolId
                                + " could not be located in local Galaxy installation.");
                        throw new RuntimeException();
                    }
                    GalaxyTaskInstance task = new GalaxyTaskInstance(id, getRunId(), tool.getId(), tool,
                            galaxyPath);
                    tasks.put(id, task);

                    // (ii) determine the and incorporate post job actions apecified in the workflow (e.g., renaming the task's output data)
                    Map<String, String> renameOutputs = new HashMap<>();
                    Set<String> hideOutputs = new HashSet<>();
                    if (step.has("post_job_actions")) {
                        JSONObject post_job_actions = step.optJSONObject("post_job_actions");
                        for (Iterator<?> it = post_job_actions.keys(); it.hasNext();) {
                            JSONObject post_job_action = post_job_actions.optJSONObject((String) it.next());
                            String action_type = post_job_action.getString("action_type");
                            if (action_type.equals("RenameDatasetAction")) {
                                String output_name = post_job_action.getString("output_name");
                                JSONObject action_arguments = post_job_action.optJSONObject("action_arguments");
                                if (action_arguments != null) {
                                    String newname = action_arguments.getString("newname");
                                    if (newname.contains(" "))
                                        newname = newname.replaceAll("\\s", "_");
                                    renameOutputs.put(output_name, newname);
                                }
                            } else if (action_type.equals("HideDatasetAction")) {
                                String output_name = post_job_action.getString("output_name");
                                hideOutputs.add(output_name);
                            }
                        }
                    }

                    // (iii) set the tool state (i.e., the parameter settings) of the task
                    task.addToolState(step.getString("tool_state"));

                    // (iv) resolve the file names of input data
                    Map<String, String> inputNameToIdName = new HashMap<>();
                    JSONObject input_connections = step.optJSONObject("input_connections");
                    for (String input_name : JSONObject.getNames(input_connections)) {
                        JSONObject input_connection = input_connections.optJSONObject(input_name);
                        inputNameToIdName.put(input_name,
                                input_connection.getString("id") + "_" + input_connection.getString("output_name"));
                    }

                    // (v) handle output data
                    JSONArray outputs = step.optJSONArray("outputs");
                    List<String> outputFiles = new LinkedList<>();
                    for (int j = 0; j < outputs.length(); j++) {
                        JSONObject output = outputs.optJSONObject(j);
                        String outputName = output.getString("name");

                        // determine the output file's data type
                        GalaxyDataType dataType = null;
                        GalaxyParamValue param = tool.getFirstMatchingParamByName(outputName);
                        String outputTypeString = param.getDataType();
                        if (galaxyDataTypes.containsKey(outputTypeString)) {
                            dataType = galaxyDataTypes.get(outputTypeString);
                        } else if (inputNameToIdName.containsKey(outputTypeString)) {
                            dataType = ((GalaxyData) getFiles().get(inputNameToIdName.get(outputTypeString)))
                                    .getDataType();
                        } else if (outputTypeString.equals("input")) {
                            dataType = ((GalaxyData) getFiles().get(inputNameToIdName.values().iterator().next()))
                                    .getDataType();
                        }

                        // determine the output file's name
                        String fileName = id + "_" + outputName;
                        if (dataType != null) {
                            String extension = dataType.getExtension();
                            if (extension != null && extension.length() > 0) {
                                fileName = fileName + "." + extension;
                            }
                        }
                        if (renameOutputs.containsKey(outputName))
                            fileName = renameOutputs.get(outputName);

                        // if the output file is to moved from a (temporary) working directory, append a command in the task's post script
                        if (param.hasFrom_work_dir())
                            task.addToPostScript("mv " + param.getFrom_work_dir() + " " + fileName);

                        // create the data object and add it to the task object and data structures
                        GalaxyData data = new GalaxyData(fileName);
                        data.setDataType(dataType);
                        String idName = id + "_" + outputName;
                        if (!hideOutputs.contains(outputName)) {
                            data.setOutput(true);
                        }
                        getFiles().put(idName, data);
                        task.addOutputData(data);
                        task.addFile(outputName, false, data);
                        outputFiles.add(fileName);
                    }

                    task.getReport()
                            .add(new JsonReportEntry(task.getWorkflowId(), task.getTaskId(), task.getTaskName(),
                                    task.getLanguageLabel(), task.getId(), null, JsonReportEntry.KEY_INVOC_OUTPUT,
                                    new JSONObject().put("output", outputFiles)));

                }
            }

            // (2) Second pass: Parse Edges
            for (int i = 0; i < steps.length(); i++) {
                JSONObject step = steps.optJSONObject(Integer.toString(i));
                long id = step.getLong("id");
                String type = step.getString("type");
                if (type.equals("tool")) {
                    GalaxyTaskInstance task = (GalaxyTaskInstance) tasks.get(id);
                    JSONObject input_connections = step.optJSONObject("input_connections");
                    for (Iterator<?> it = input_connections.keys(); it.hasNext();) {
                        String input_connection_key = (String) it.next();
                        JSONObject input_connection = input_connections.optJSONObject(input_connection_key);
                        long parentId = input_connection.getLong("id");
                        String idName = parentId + "_" + input_connection.getString("output_name");

                        // (a) register workflow edges
                        TaskInstance parentTask = tasks.get(parentId);
                        if (parentTask != null) {
                            task.addParentTask(parentTask);
                            parentTask.addChildTask(task);
                        }

                        // (b) obtain the data object and add it to the task object
                        task.addInputData(getFiles().get(idName));
                        task.addFile(input_connection_key, true, (GalaxyData) getFiles().get(idName));
                        continue;
                    }

                    // (c) Prepare the python script that populates the tool state with remaining parameter settings required to invoke the tool
                    task.prepareParamScript();
                }
            }

        } catch (IOException | JSONException | WorkflowStructureUnknownException e) {
            e.printStackTrace();
            e.printStackTrace();
            System.exit(-1);
        }

        getScheduler().addTasks(tasks.values());
    }

    /**
     * A helper function for processing the Galaxy config file that specifies metadata for data tables along with the location of their loc files
     * 
     * @param file
     *            the Galaxy data table config file
     */
    private void processDataTables(File file) {
        try {
            System.out.println("Processing Galaxy data table config file " + file.getCanonicalPath());
            DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            Document doc = builder.parse(file);
            NodeList tables = doc.getElementsByTagName("table");
            for (int i = 0; i < tables.getLength(); i++) {
                Element tableEl = (Element) tables.item(i);
                Element columnsEl = (Element) tableEl.getElementsByTagName("columns").item(0);
                Element fileEl = (Element) tableEl.getElementsByTagName("file").item(0);
                String name = tableEl.getAttribute("name");
                String comment_char = tableEl.getAttribute("comment_char");
                String[] columns = columnsEl.getFirstChild().getNodeValue().split(", ");
                if (!fileEl.hasAttribute("path"))
                    continue;
                String path = fileEl.getAttribute("path");
                if (!path.startsWith("/"))
                    path = galaxyPath + "/" + path;
                GalaxyDataTable galaxyDataTable = new GalaxyDataTable(name, comment_char, columns, path);
                processLocFile(new File(path), galaxyDataTable);
                galaxyDataTables.put(name, galaxyDataTable);
            }

        } catch (SAXException | IOException | ParserConfigurationException e) {
            e.printStackTrace();
            System.exit(-1);
        }
    }

    /**
     * A helper function for processing the Galaxy config file that specifies the extensions and python script locations for Galaxy's data types
     * 
     * @param file
     *            the Galaxy data type config file
     */
    private void processDataTypes(File file) {
        try {
            System.out.println("Processing Galaxy data type config file " + file.getCanonicalPath());
            DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            Document doc = builder.parse(file);
            NodeList datatypeNds = doc.getElementsByTagName("datatype");
            for (int i = 0; i < datatypeNds.getLength(); i++) {
                Element datatypeEl = (Element) datatypeNds.item(i);
                if (!datatypeEl.hasAttribute("extension") || !datatypeEl.hasAttribute("type")
                        || datatypeEl.hasAttribute("subclass"))
                    continue;
                String extension = datatypeEl.getAttribute("extension");
                String[] splitType = datatypeEl.getAttribute("type").split(":");
                galaxyDataTypes.put(extension, new GalaxyDataType(splitType[0], splitType[1], extension));
            }
        } catch (SAXException | IOException | ParserConfigurationException e) {
            e.printStackTrace();
            System.exit(-1);
        }
    }

    /**
     * A (recursive) helper function for parsing the macros used by the XML files describing Galaxy's tools
     * 
     * @param macrosNd
     *            an XML node that specifies a set of macros
     * @param dir
     *            the directory in which the currently processed macros are located
     * @return processed macros accessible by their name
     */
    private Map<String, String> processMacros(Node macrosNd, String dir) {
        Map<String, String> macrosByName = new HashMap<>();
        try {
            DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            Element macrosEl = (Element) macrosNd;

            // (1) if additional macro files are to be imported, open the files and recursively invoke this method
            NodeList importNds = macrosEl.getElementsByTagName("import");
            for (int j = 0; j < importNds.getLength(); j++) {
                Element importEl = (Element) importNds.item(j);
                String importFileName = importEl.getChildNodes().item(0).getNodeValue().trim();
                File file = new File(dir, importFileName);
                Document doc = builder.parse(file);
                macrosByName.putAll(processMacros(doc.getDocumentElement(), dir));
            }

            // (2) parse all macros in this set
            NodeList macroNds = macrosEl.getElementsByTagName("macro");
            for (int j = 0; j < macroNds.getLength(); j++) {
                Element macroEl = (Element) macroNds.item(j);
                String name = macroEl.getAttribute("name");

                Transformer transformer = TransformerFactory.newInstance().newTransformer();
                transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
                StreamResult result = new StreamResult(new StringWriter());
                DOMSource source = new DOMSource(macroEl);
                transformer.transform(source, result);
                String macro = result.getWriter().toString();
                macro = macro.substring(macro.indexOf('\n') + 1, macro.lastIndexOf('\n'));
                macrosByName.put(name, macro);
            }
        } catch (SAXException | IOException | TransformerException | ParserConfigurationException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        return macrosByName;
    }

    /**
     * A helper function for processing a Galaxy config file that lists the tools within a single library
     * 
     * @param file
     *            the Galaxy tool library config file
     * @param defaultPath
     *            the directory in which pre-installed Galaxy tools are located
     * @param dependencyDir
     *            the directory in which the tool's dependencies are located
     */
    private void processToolLibraries(File file, String defaultPath, String dependencyDir) {
        try {
            System.out.println("Processing Galaxy tool library config file " + file.getCanonicalPath());
            File galaxyPathFile = new File(galaxyPath);
            File dir = new File(galaxyPathFile, defaultPath);
            DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            Document doc = builder.parse(file);
            Element toolboxEl = doc.getDocumentElement();
            if (toolboxEl.hasAttribute("tool_path")) {
                dir = new File(galaxyPathFile, toolboxEl.getAttribute("tool_path"));
            }

            NodeList tools = toolboxEl.getElementsByTagName("tool");
            for (int i = 0; i < tools.getLength(); i++) {
                // (1) parse a single XML tool file
                Element toolEl = (Element) tools.item(i);
                String toolFile = toolEl.getAttribute("file");
                GalaxyTool tool = parseToolFile(new File(dir, toolFile));

                // (2) go over the tool's dependencies and determine the environment-setting pre-script accordingly
                NodeList repositoryNameNds = toolEl.getElementsByTagName("repository_name");
                String repositoryName = repositoryNameNds.getLength() > 0
                        ? repositoryNameNds.item(0).getChildNodes().item(0).getNodeValue().trim()
                        : "";
                NodeList ownerNds = toolEl.getElementsByTagName("repository_owner");
                String owner = ownerNds.getLength() > 0
                        ? ownerNds.item(0).getChildNodes().item(0).getNodeValue().trim()
                        : "";
                NodeList revisionNds = toolEl.getElementsByTagName("installed_changeset_revision");
                String revision = revisionNds.getLength() > 0
                        ? revisionNds.item(0).getChildNodes().item(0).getNodeValue().trim()
                        : "";

                if (repositoryName.length() > 0 && owner.length() > 0 && revision.length() > 0) {
                    for (String requirementName : tool.getRequirements()) {
                        File envFile = new File(galaxyPath + "/" + dependencyDir,
                                requirementName + "/" + tool.getRequirementVersion(requirementName) + "/" + owner
                                        + "/" + repositoryName + "/" + revision + "/env.sh");
                        if (envFile.exists()) {
                            try (BufferedReader br = new BufferedReader(new FileReader(envFile))) {
                                String line;
                                while ((line = br.readLine()) != null) {
                                    tool.addEnv(line);
                                }
                            }
                        }
                    }
                }

            }
        } catch (SAXException | IOException | ParserConfigurationException e) {
            e.printStackTrace();
            System.exit(-1);
        }
    }
}