de.huberlin.wbi.hiway.scheduler.Scheduler.java Source code

Java tutorial

Introduction

Here is the source code for de.huberlin.wbi.hiway.scheduler.Scheduler.java

Source

/*******************************************************************************
 * In the Hi-WAY project we propose a novel approach of executing scientific
 * workflows processing Big Data, as found in NGS applications, on distributed
 * computational infrastructures. The Hi-WAY software stack comprises the func-
 * tional workflow language Cuneiform as well as the Hi-WAY ApplicationMaster
 * for Apache Hadoop 2.x (YARN).
 *
 * List of Contributors:
 *
 * Marc Bux (HU Berlin)
 * Jrgen Brandt (HU Berlin)
 * Hannes Schuh (HU Berlin)
 * Ulf Leser (HU Berlin)
 *
 * Jrgen Brandt is funded by the European Commission through the BiobankCloud
 * project. Marc Bux is funded by the Deutsche Forschungsgemeinschaft through
 * research training group SOAMED (GRK 1651).
 *
 * Copyright 2014 Humboldt-Universitt zu Berlin
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.huberlin.wbi.hiway.scheduler;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.json.JSONException;

import de.huberlin.hiwaydb.useDB.HiwayDB;
import de.huberlin.hiwaydb.useDB.HiwayDBI;
import de.huberlin.hiwaydb.useDB.HiwayDBNoSQL;
import de.huberlin.hiwaydb.useDB.InvocStat;
import de.huberlin.wbi.cuneiform.core.semanticmodel.JsonReportEntry;
import de.huberlin.wbi.hiway.common.HiWayConfiguration;
import de.huberlin.wbi.hiway.common.LogParser;
import de.huberlin.wbi.hiway.common.TaskInstance;

/**
 * An abstract implementation of a workflow scheduler.
 * 
 * @author Marc Bux
 * 
 */
public abstract class Scheduler {

    protected HiWayConfiguration conf;
    protected HiwayDBI dbInterface;
    protected final FileSystem hdfs;
    protected int maxRetries = 0;
    protected Map<String, Long> maxTimestampPerHost;
    protected int numberOfFinishedTasks = 0;
    protected int numberOfPreviousRunTasks = 0;
    protected int numberOfRemainingTasks = 0;
    protected int numberOfRunningTasks = 0;
    protected boolean relaxLocality = true;
    protected Map<String, Map<Long, RuntimeEstimate>> runtimeEstimatesPerNode;
    protected Set<Long> taskIds;
    // a queue of nodes on which containers are to be requested
    protected Queue<String[]> unissuedNodeRequests;
    protected String workflowName;

    public Scheduler(String workflowName, HiWayConfiguration conf, FileSystem hdfs) {
        this.workflowName = workflowName;

        this.conf = conf;
        this.hdfs = hdfs;
        unissuedNodeRequests = new LinkedList<>();

        taskIds = new HashSet<>();
        runtimeEstimatesPerNode = new HashMap<>();
        maxTimestampPerHost = new HashMap<>();
    }

    public void addEntryToDB(JsonReportEntry entry) {
        System.out.println("HiwayDB: Adding entry " + entry + " to database.");
        dbInterface.logToDB(entry);
        System.out.println("HiwayDB: Added entry to database.");
    }

    protected abstract void addTask(TaskInstance task);

    public void addTasks(Collection<TaskInstance> tasks) {
        for (TaskInstance task : tasks) {
            addTask(task);
        }
    }

    public abstract void addTaskToQueue(TaskInstance task);

    public String[] getNextNodeRequest() {
        return unissuedNodeRequests.remove();
    }

    public abstract TaskInstance getNextTask(Container container);

    protected Set<String> getNodeIds() {
        return new HashSet<>(runtimeEstimatesPerNode.keySet());
    }

    public int getNumberOfFinishedTasks() {
        return numberOfFinishedTasks - numberOfPreviousRunTasks;
    }

    public abstract int getNumberOfReadyTasks();

    public int getNumberOfRunningTasks() {
        return numberOfRunningTasks;
    }

    public int getNumberOfTotalTasks() {
        int fin = getNumberOfFinishedTasks();
        int run = getNumberOfRunningTasks();
        int rem = numberOfRemainingTasks;

        System.out.println("Scheduled Containers Finished: " + fin);
        System.out.println("Scheduled Containers Running: " + run);
        System.out.println("Scheduled Containers Remaining: " + rem);

        return fin + run + rem;
    }

    protected Set<Long> getTaskIds() {
        return new HashSet<>(taskIds);
    }

    public boolean hasNextNodeRequest() {
        return !unissuedNodeRequests.isEmpty();
    }

    public void initialize() {
        maxRetries = conf.getInt(HiWayConfiguration.HIWAY_AM_TASK_RETRIES,
                HiWayConfiguration.HIWAY_AM_TASK_RETRIES_DEFAULT);

        HiWayConfiguration.HIWAY_DB_TYPE_OPTS dbType = HiWayConfiguration.HIWAY_DB_TYPE_OPTS.valueOf(
                conf.get(HiWayConfiguration.HIWAY_DB_TYPE, HiWayConfiguration.HIWAY_DB_TYPE_DEFAULT.toString()));
        switch (dbType) {
        case SQL:
            String sqlUser = conf.get(HiWayConfiguration.HIWAY_DB_SQL_USER);
            if (sqlUser == null) {
                System.err.println(
                        HiWayConfiguration.HIWAY_DB_SQL_USER + " not set in  " + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            String sqlPassword = conf.get(HiWayConfiguration.HIWAY_DB_SQL_PASSWORD);
            if (sqlPassword == null) {
                System.err.println(HiWayConfiguration.HIWAY_DB_SQL_PASSWORD + " not set in  "
                        + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            String sqlURL = conf.get(HiWayConfiguration.HIWAY_DB_SQL_URL);
            if (sqlURL == null) {
                System.err.println(
                        HiWayConfiguration.HIWAY_DB_SQL_URL + " not set in  " + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            dbInterface = new HiwayDB(sqlUser, sqlPassword, sqlURL);
            break;
        case NoSQL:
            sqlUser = conf.get(HiWayConfiguration.HIWAY_DB_SQL_USER);
            if (sqlUser == null) {
                System.err.println(
                        HiWayConfiguration.HIWAY_DB_SQL_USER + " not set in  " + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            sqlPassword = conf.get(HiWayConfiguration.HIWAY_DB_SQL_PASSWORD);
            if (sqlPassword == null) {
                System.err.println(HiWayConfiguration.HIWAY_DB_SQL_PASSWORD + " not set in  "
                        + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            sqlURL = conf.get(HiWayConfiguration.HIWAY_DB_SQL_URL);
            if (sqlURL == null) {
                System.err.println(
                        HiWayConfiguration.HIWAY_DB_SQL_URL + " not set in  " + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            String noSqlBucket = conf.get(HiWayConfiguration.HIWAY_DB_NOSQL_BUCKET);
            if (noSqlBucket == null) {
                System.err.println(HiWayConfiguration.HIWAY_DB_NOSQL_BUCKET + " not set in  "
                        + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            String noSqlPassword = conf.get(HiWayConfiguration.HIWAY_DB_NOSQL_PASSWORD);
            if (noSqlPassword == null) {
                System.err.println(HiWayConfiguration.HIWAY_DB_NOSQL_PASSWORD + " not set in  "
                        + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            String noSqlURIs = conf.get(HiWayConfiguration.HIWAY_DB_NOSQL_URLS);
            if (noSqlURIs == null) {
                System.err.println(HiWayConfiguration.HIWAY_DB_NOSQL_URLS + " not set in  "
                        + HiWayConfiguration.HIWAY_SITE_XML);
                throw new RuntimeException();
            }
            List<URI> noSqlURIList = new ArrayList<>();
            for (String uri : noSqlURIs.split(",")) {
                noSqlURIList.add(URI.create(uri));
            }
            dbInterface = new HiwayDBNoSQL(noSqlBucket, noSqlPassword, noSqlURIList, sqlUser, sqlPassword, sqlURL);

            break;
        default:
            dbInterface = new LogParser();
            parseLogs();
        }
    }

    protected void newHost(String nodeId) {
        Map<Long, RuntimeEstimate> runtimeEstimates = new HashMap<>();
        for (long taskId : getTaskIds()) {
            runtimeEstimates.put(taskId, new RuntimeEstimate());
        }
        runtimeEstimatesPerNode.put(nodeId, runtimeEstimates);
        maxTimestampPerHost.put(nodeId, 0l);
    }

    protected void newTask(long taskId) {
        taskIds.add(taskId);
        for (Map<Long, RuntimeEstimate> runtimeEstimates : runtimeEstimatesPerNode.values()) {
            runtimeEstimates.put(taskId, new RuntimeEstimate());
        }
    }

    public boolean nothingToSchedule() {
        return getNumberOfReadyTasks() == 0;
    }

    protected void parseLogs() {
        String hdfsBaseDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE,
                HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE_DEFAULT);
        String hdfsSandboxDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE,
                HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE_DEFAULT);
        Path hdfsBaseDirectory = new Path(new Path(hdfs.getUri()), hdfsBaseDirectoryName);
        Path hdfsSandboxDirectory = new Path(hdfsBaseDirectory, hdfsSandboxDirectoryName);
        try {
            for (FileStatus appDirStatus : hdfs.listStatus(hdfsSandboxDirectory)) {
                if (appDirStatus.isDirectory()) {
                    Path appDir = appDirStatus.getPath();
                    for (FileStatus srcStatus : hdfs.listStatus(appDir)) {
                        Path src = srcStatus.getPath();
                        String srcName = src.getName();
                        if (srcName.endsWith(".log")) {
                            Path dest = new Path(appDir.getName());
                            System.out.println("Parsing log " + dest.toString());
                            hdfs.copyToLocalFile(false, src, dest);

                            try (BufferedReader reader = new BufferedReader(
                                    new FileReader(new File(dest.toString())))) {
                                String line;
                                while ((line = reader.readLine()) != null) {
                                    JsonReportEntry entry = new JsonReportEntry(line);
                                    addEntryToDB(entry);
                                }
                            }
                        }
                    }
                }
            }
        } catch (IOException | JSONException e) {
            e.printStackTrace();
            System.exit(-1);
        }
    }

    public boolean relaxLocality() {
        return relaxLocality;
    }

    public Collection<ContainerId> taskCompleted(TaskInstance task, ContainerStatus containerStatus,
            long runtimeInMs) {

        numberOfRunningTasks--;
        numberOfFinishedTasks++;

        System.out.println("Task " + task + " in container " + containerStatus.getContainerId().getContainerId()
                + " finished after " + runtimeInMs + " ms");

        return new ArrayList<>();
    }

    public Collection<ContainerId> taskFailed(TaskInstance task, ContainerStatus containerStatus) {
        numberOfRunningTasks--;

        System.out.println(
                "Task " + task + " on container " + containerStatus.getContainerId().getContainerId() + " failed");
        if (task.retry(maxRetries)) {
            System.out.println("Retrying task " + task + ".");
            addTask(task);
        } else {
            System.out.println(
                    "Task " + task + " has exceeded maximum number of allowed retries. Aborting workflow.");
            throw new RuntimeException();
        }

        return new ArrayList<>();
    }

    protected void updateRuntimeEstimate(InvocStat stat) {
        RuntimeEstimate re = runtimeEstimatesPerNode.get(stat.getHostName()).get(stat.getTaskId());
        re.finishedTasks += 1;
        re.timeSpent += stat.getRealTime();
        re.weight = re.averageRuntime = re.timeSpent / re.finishedTasks;
    }

    public void updateRuntimeEstimates(String runId) {
        System.out.println("Updating Runtime Estimates.");

        System.out.println("HiwayDB: Querying Host Names from database.");
        Collection<String> newHostIds = dbInterface.getHostNames();
        System.out.println("HiwayDB: Retrieved Host Names " + newHostIds.toString() + " from database.");
        newHostIds.removeAll(getNodeIds());
        for (String newHostId : newHostIds) {
            newHost(newHostId);
        }
        System.out.println("HiwayDB: Querying Task Ids for workflow " + workflowName + " from database.");
        Collection<Long> newTaskIds = dbInterface.getTaskIdsForWorkflow(workflowName);
        System.out.println("HiwayDB: Retrieved Task Ids " + newTaskIds.toString() + " from database.");

        newTaskIds.removeAll(getTaskIds());
        for (long newTaskId : newTaskIds) {
            newTask(newTaskId);
        }

        for (String hostName : getNodeIds()) {
            long oldMaxTimestamp = maxTimestampPerHost.get(hostName);
            long newMaxTimestamp = oldMaxTimestamp;
            for (long taskId : getTaskIds()) {
                System.out.println("HiwayDB: Querying InvocStats for task id " + taskId + " on host " + hostName
                        + " since timestamp " + oldMaxTimestamp + " from database.");
                Collection<InvocStat> invocStats = dbInterface.getLogEntriesForTaskOnHostSince(taskId, hostName,
                        oldMaxTimestamp);
                System.out.println("HiwayDB: Retrieved InvocStats " + invocStats.toString() + " from database.");
                for (InvocStat stat : invocStats) {
                    newMaxTimestamp = Math.max(newMaxTimestamp, stat.getTimestamp());
                    updateRuntimeEstimate(stat);
                    if (!runId.equals(stat.getRunId())) {
                        numberOfPreviousRunTasks++;
                        numberOfFinishedTasks++;
                    }
                }
            }
            maxTimestampPerHost.put(hostName, newMaxTimestamp);
        }
    }
}