de.huberlin.wbi.hiway.am.dax.DaxApplicationMaster.java Source code

Java tutorial

Introduction

Here is the source code for de.huberlin.wbi.hiway.am.dax.DaxApplicationMaster.java

Source

/*******************************************************************************
 * In the Hi-WAY project we propose a novel approach of executing scientific
 * workflows processing Big Data, as found in NGS applications, on distributed
 * computational infrastructures. The Hi-WAY software stack comprises the func-
 * tional workflow language Cuneiform as well as the Hi-WAY ApplicationMaster
 * for Apache Hadoop 2.x (YARN).
 *
 * List of Contributors:
 *
 * Marc Bux (HU Berlin)
 * Jrgen Brandt (HU Berlin)
 * Hannes Schuh (HU Berlin)
 * Ulf Leser (HU Berlin)
 *
 * Jrgen Brandt is funded by the European Commission through the BiobankCloud
 * project. Marc Bux is funded by the Deutsche Forschungsgemeinschaft through
 * research training group SOAMED (GRK 1651).
 *
 * Copyright 2014 Humboldt-Universitt zu Berlin
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.huberlin.wbi.hiway.am.dax;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.json.JSONException;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import de.huberlin.wbi.cuneiform.core.semanticmodel.JsonReportEntry;
import de.huberlin.wbi.hiway.am.HiWay;
import de.huberlin.wbi.hiway.common.Data;
import de.huberlin.wbi.hiway.common.TaskInstance;
import de.huberlin.wbi.hiway.common.WorkflowStructureUnknownException;

public class DaxApplicationMaster extends HiWay {

    public static void main(String[] args) {
        HiWay.loop(new DaxApplicationMaster(), args);
    }

    public DaxApplicationMaster() {
        super();
        setDetermineFileSizes();
    }

    @Override
    public void parseWorkflow() {
        Map<Object, TaskInstance> tasks = new HashMap<>();
        System.out.println("Parsing Pegasus DAX " + getWorkflowFile());

        try {
            DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            Document doc = builder.parse(new File(getWorkflowFile().getLocalPath().toString()));
            NodeList jobNds = doc.getElementsByTagName("job");

            for (int i = 0; i < jobNds.getLength(); i++) {
                Element jobEl = (Element) jobNds.item(i);
                String id = jobEl.getAttribute("id");
                String taskName = jobEl.getAttribute("name");
                DaxTaskInstance task = new DaxTaskInstance(getRunId(), taskName);
                task.setRuntime(
                        jobEl.hasAttribute("runtime") ? Double.parseDouble(jobEl.getAttribute("runtime")) : 0d);
                tasks.put(id, task);

                StringBuilder arguments = new StringBuilder();
                NodeList argumentNds = jobEl.getElementsByTagName("argument");
                for (int j = 0; j < argumentNds.getLength(); j++) {
                    Element argumentEl = (Element) argumentNds.item(j);

                    NodeList argumentChildNds = argumentEl.getChildNodes();
                    for (int k = 0; k < argumentChildNds.getLength(); k++) {
                        Node argumentChildNd = argumentChildNds.item(k);
                        String argument = "";

                        switch (argumentChildNd.getNodeType()) {
                        case Node.ELEMENT_NODE:
                            Element argumentChildEl = (Element) argumentChildNd;
                            if (argumentChildEl.getNodeName().equals("file")) {
                                if (argumentChildEl.hasAttribute("name")) {
                                    argument = argumentChildEl.getAttribute("name");
                                }
                            } else if (argumentChildEl.getNodeName().equals("filename")) {
                                if (argumentChildEl.hasAttribute("file")) {
                                    argument = argumentChildEl.getAttribute("file");
                                }
                            }
                            break;
                        case Node.TEXT_NODE:
                            argument = argumentChildNd.getNodeValue().replaceAll("\\s+", " ").trim();
                            break;
                        default:
                        }

                        if (argument.length() > 0) {
                            arguments.append(" ").append(argument);
                        }
                    }
                }

                NodeList usesNds = jobEl.getElementsByTagName("uses");
                for (int j = 0; j < usesNds.getLength(); j++) {
                    Element usesEl = (Element) usesNds.item(j);
                    String link = usesEl.getAttribute("link");
                    String fileName = usesEl.getAttribute("file");
                    long size = usesEl.hasAttribute("size") ? Long.parseLong(usesEl.getAttribute("size")) : 0l;
                    List<String> outputs = new LinkedList<>();

                    switch (link) {
                    case "input":
                        if (!getFiles().containsKey(fileName)) {
                            Data data = new Data(fileName);
                            data.setInput(true);
                            getFiles().put(fileName, data);
                        }
                        Data data = getFiles().get(fileName);
                        task.addInputData(data, size);
                        break;
                    case "output":
                        if (!getFiles().containsKey(fileName))
                            getFiles().put(fileName, new Data(fileName));
                        data = getFiles().get(fileName);
                        task.addOutputData(data, size);
                        data.setInput(false);
                        outputs.add(fileName);
                        break;
                    default:
                    }

                    task.getReport()
                            .add(new JsonReportEntry(task.getWorkflowId(), task.getTaskId(), task.getTaskName(),
                                    task.getLanguageLabel(), Long.valueOf(task.getId()), null,
                                    JsonReportEntry.KEY_INVOC_OUTPUT, new JSONObject().put("output", outputs)));
                }

                task.setCommand(taskName + arguments.toString());
                System.out.println(
                        "Adding task " + task + ": " + task.getInputData() + " -> " + task.getOutputData());
            }

            NodeList childNds = doc.getElementsByTagName("child");
            for (int i = 0; i < childNds.getLength(); i++) {
                Element childEl = (Element) childNds.item(i);
                String childId = childEl.getAttribute("ref");
                TaskInstance child = tasks.get(childId);

                NodeList parentNds = childEl.getElementsByTagName("parent");
                for (int j = 0; j < parentNds.getLength(); j++) {
                    Element parentEl = (Element) parentNds.item(j);
                    String parentId = parentEl.getAttribute("ref");
                    TaskInstance parent = tasks.get(parentId);

                    child.addParentTask(parent);
                    parent.addChildTask(child);
                }
            }

            for (TaskInstance task : tasks.values()) {
                if (task.getChildTasks().size() == 0) {
                    for (Data data : task.getOutputData()) {
                        data.setOutput(true);
                    }
                }

                task.getReport()
                        .add(new JsonReportEntry(task.getWorkflowId(), task.getTaskId(), task.getTaskName(),
                                task.getLanguageLabel(), Long.valueOf(task.getId()), null,
                                JsonReportEntry.KEY_INVOC_SCRIPT, task.getCommand()));
            }

        } catch (WorkflowStructureUnknownException | IOException | JSONException | ParserConfigurationException
                | SAXException e) {
            e.printStackTrace();
            System.exit(-1);
        }

        getScheduler().addTasks(tasks.values());
    }

}