com.scaleoutsoftware.soss.hserver.JobScheduler.java Source code

Introduction

Here is the source code for com.scaleoutsoftware.soss.hserver.JobScheduler.java
Source

/*
 Copyright (c) 2015 by ScaleOut Software, Inc.
    
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    
 http://www.apache.org/licenses/LICENSE-2.0
    
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
*/
package com.scaleoutsoftware.soss.hserver;

import com.scaleoutsoftware.soss.client.*;
import com.scaleoutsoftware.soss.client.da.DataAccessor;
import com.scaleoutsoftware.soss.client.da.StateServerException;
import com.scaleoutsoftware.soss.client.util.SerializationMode;
import com.scaleoutsoftware.soss.client.pmi.MessagingHelper;
import com.scaleoutsoftware.soss.client.util.BitConverter;
import com.scaleoutsoftware.soss.hserver.hadoop.HadoopInvocationParameters;
import com.scaleoutsoftware.soss.hserver.hadoop.HadoopVersionSpecificCode;
import com.scaleoutsoftware.soss.hserver.interop.HServerConstants;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapred.FileOutputCommitter;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.VersionInfo;

import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;

import static com.scaleoutsoftware.soss.hserver.HServerParameters.MAP_SPLITS_PER_CORE;

/**
 * A singleton scheduler which is used to run {@link com.scaleoutsoftware.soss.hserver.HServerJob} as a series of PMIs.
 */
public class JobScheduler {
    private static final Log LOG = LogFactory.getLog(JobScheduler.class);
    private static JobScheduler _instance = new JobScheduler();

    /**
     * Returns the single instance.
     *
     * @return scheduler instance
     */
    public static JobScheduler getInstance() {
        return _instance;
    }

    /**
     * Gets the locations for the split, handles both "mapred" and "mapreduce" splits.
     *
     * @param split split to get locations
     * @return array of split home locations
     */
    private String[] getSplitLocations(Object split) throws IOException, InterruptedException {
        if (split instanceof org.apache.hadoop.mapred.InputSplit) {
            return ((org.apache.hadoop.mapred.InputSplit) split).getLocations();
        } else if (split instanceof org.apache.hadoop.mapreduce.InputSplit) {
            return ((org.apache.hadoop.mapreduce.InputSplit) split).getLocations();
        } else {
            throw new IOException("Invalid split type:" + split);
        }
    }

    /**
     * Returns the SOSS host IPs which correspond to the locations for the given split.
     *
     * @param split                    split to locate
     * @param sossHostAdresses         list of available SOSS hosts
     * @param additionalSplitLocations additional locations for that split, can be null
     * @return list of split location, or empty list if none found
     */
    private List<InetAddress> getSossLocations(Object split, List<InetAddress> sossHostAdresses,
            String[] additionalSplitLocations) {
        List<InetAddress> splitLocations = new ArrayList<InetAddress>();

        try {
            //If GridSplit, just find and return its home location
            if (split instanceof GridSplit) {
                InetAddress location = ((GridSplit) split).getLocation();
                if (location != null && sossHostAdresses.contains(location)) {
                    splitLocations.add(location);
                    return splitLocations;
                }
            }

            //Parse locations in the split object
            String[] locations = getSplitLocations(split);
            if (locations != null) {
                for (String location : locations) {
                    try {
                        splitLocations.addAll(Arrays.asList(InetAddress.getAllByName(location)));
                    } catch (UnknownHostException e) {
                        //Do nothing, must be a bad location
                    }
                }
            }

            //Add additional locations, passed separate from the split
            if (additionalSplitLocations != null) {
                for (String location : additionalSplitLocations) {
                    try {
                        splitLocations.addAll(Arrays.asList(InetAddress.getAllByName(location)));
                    } catch (UnknownHostException e) {
                        //Do nothing, must be a bad location
                    }
                }
            }

            //Remove locations which are not SOSS locations
            Iterator<InetAddress> iterator = splitLocations.iterator();
            while (iterator.hasNext()) {
                if (!sossHostAdresses.contains(iterator.next())) {
                    iterator.remove();
                }
            }

        } catch (InterruptedException e) {
            //Do nothing, split will be assigned to the random location
        } catch (IOException e) {
            //Do nothing, split will be assigned to the random location
        }

        return splitLocations;
    }

    /**
     * Walks through the split list and assigns split to hosts.
     * The result of this method is the map in which host addresses are keys
     * and values are lists of split indexes.
     *
     * @param splitList      list of all splits in the job
     * @param hostAddresses  list of host addresses in the cluster
     * @param splitLocations additional home locations for splits, can be null
     * @return map of lists of splits for each host
     */
    private <INPUT_SPLIT_TYPE> Map<InetAddress, List<Integer>> assignSplitsToHost(List<INPUT_SPLIT_TYPE> splitList,
            List<InetAddress> hostAddresses, Map<Object, String[]> splitLocations) {
        Map<InetAddress, List<Integer>> splitToHostAddress = new HashMap<InetAddress, List<Integer>>();

        int maxSplitsPerHost = splitList.size() / hostAddresses.size();

        for (int splitIndex = 0; splitIndex < splitList.size(); splitIndex++) {
            INPUT_SPLIT_TYPE split = splitList.get(splitIndex);
            InetAddress hostToSendSplitTo = null;
            int minimumNumberOfSplitsAtLocation = Integer.MAX_VALUE;
            List<InetAddress> candidateLocations = getSossLocations(split, hostAddresses,
                    splitLocations == null ? null : splitLocations.get(split));

            LOG.debug("Split " + split + ":" + candidateLocations + "," + hostAddresses);

            //Go through all candidate locations, finding one with smaller number of splits
            for (InetAddress location : candidateLocations) //Iterate through split candidate locations, trying to find the location with less splits
            {
                if (!hostAddresses.contains(location)) {
                    //The SOSS host address for the split is not contained in the list of the host addresses,
                    //that we received from the IG. Means no IG worker on that host.
                    LOG.warn("A split location " + location + " does not have a local worker available. Split "
                            + split);
                } else {
                    int numberOfsplitsAtLocation = splitToHostAddress.containsKey(location)
                            ? splitToHostAddress.get(location).size()
                            : 0;

                    //1.The number of splits at that host is less than max allowed number of splits
                    //2.Host contains less splits that we have previously seen
                    if (numberOfsplitsAtLocation < maxSplitsPerHost
                            && minimumNumberOfSplitsAtLocation > numberOfsplitsAtLocation) {
                        hostToSendSplitTo = location;
                        minimumNumberOfSplitsAtLocation = numberOfsplitsAtLocation; //We will use it at the next iteration
                    }

                    if (numberOfsplitsAtLocation == 0)
                        break; //We found location with no splits, so use it right away
                }
            }

            //We cannot send split to one of its home locations,so find the host with lowest number of splits assigned

            if (hostToSendSplitTo == null) {
                LOG.warn("Cannot assign split " + split + " to its home location. Candidate locations = "
                        + candidateLocations);

                minimumNumberOfSplitsAtLocation = Integer.MAX_VALUE;

                for (InetAddress inetAddress : hostAddresses) {
                    int newNumberOfSplits = splitToHostAddress.containsKey(inetAddress)
                            ? splitToHostAddress.get(inetAddress).size()
                            : 0;
                    if (minimumNumberOfSplitsAtLocation > newNumberOfSplits) {
                        hostToSendSplitTo = inetAddress;
                        minimumNumberOfSplitsAtLocation = newNumberOfSplits;
                    }
                }
            }

            //Add split to the map
            if (!splitToHostAddress.containsKey(hostToSendSplitTo)) {
                splitToHostAddress.put(hostToSendSplitTo, new ArrayList<Integer>());
            }
            splitToHostAddress.get(hostToSendSplitTo).add(splitIndex);
        }
        return splitToHostAddress;
    }

    /**
     * Runs a single-result optimisation of the job (one PMI) and return the result.
     *
     * @param job  job to run
     * @param grid invocation grid to run job on
     * @return result object
     */
    @SuppressWarnings("unchecked")
    Object runOptimisation(HServerJob job, InvocationGrid grid)
            throws IOException, InterruptedException, ClassNotFoundException {
        long time = System.currentTimeMillis();
        CreateUserCredentials.run(grid);
        try {
            //Calculating the region layout
            com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
                    .getCurrent();
            List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

            int numberOfSlotsPerNode = Math
                    .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

            //Set the number of splits to the number of cores
            if (GridInputFormat.class.isAssignableFrom(job.getInputFormatClass())) {
                int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job.getConfiguration())
                        * hostAddresses.size() * numberOfSlotsPerNode;
                GridInputFormat.setSuggestedNumberOfSplits(job,
                        Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS));
            }

            //Generating split to hostname map
            InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(),
                    job.getConfiguration());
            List<InputSplit> splitList = inputFormat.getSplits(job);
            Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null);

            //Generating invocation parameters
            String hadoopVersion = VersionInfo.getVersion();
            Class<? extends InputSplit> splitType = splitList.size() > 0 ? splitList.get(0).getClass() : null;

            HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job.getConfiguration(),
                    job.getJobID(), false);
            HServerInvocationParameters parameters = new HServerInvocationParameters(hadoopParameters,
                    job.getAppId(), new int[0], hostNameToPartition, numberOfSlotsPerNode, splitType, splitList,
                    splitToHostAddress, true, false, hadoopVersion, job.getJobParameter(),
                    SerializationMode.DEFAULT);

            StringBuilder stringBuilder = new StringBuilder();
            stringBuilder.append("Splits created:\n");
            for (InetAddress address : splitToHostAddress.keySet()) {
                stringBuilder.append("Host ");
                stringBuilder.append(address);
                stringBuilder.append(" has ");
                stringBuilder.append(splitToHostAddress.get(address).size());
                stringBuilder.append(" splits.\n");
            }
            System.out.println(stringBuilder.toString());
            System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");

            InvokeResult<MapperResult> invokeResult = MessagingHelper.invoke(grid, RunMapper.MapperInvokable.class,
                    parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            if (invokeResult.getErrors() != null && invokeResult.getErrors().size() > 0) {
                throw new IOException("Map invocation failed.", invokeResult.getErrors().get(0));
            }

            MapperResult result = invokeResult.getResult();

            if (result == null || invokeResult.getNumFailed() != 0) {
                throw new IOException("Mapper invocation failed");
            }

            if (result.getNumberOfSplitsProcessed() != splitList.size()) {
                throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                        + splitList.size() + ", Ninvokes =" + result.getNumberOfSplitsProcessed());
            }

            Map<String, Long> processingTimes = result.getProcessingTimes();

            for (String host : processingTimes.keySet()) {
                System.out.println("Host " + host + ": invoke done in " + processingTimes.get(host) + " ms.");
            }

            return result.getResult();
        } catch (StateServerException e) {
            throw new IOException("ScaleOut hServer access error.", e);
        }
    }

    /**
     * Runs the map-reduce job on ScaleOut hServer.
     *
     * @param job  the job to run
     * @param grid invocation grid to run the job
     */
    @SuppressWarnings("unchecked")
    void runJob(HServerJob job, InvocationGrid grid)
            throws IOException, InterruptedException, ClassNotFoundException {
        //Initialize user credential in advance
        long time = System.currentTimeMillis();
        CreateUserCredentials.run(grid);
        String hadoopVersion = VersionInfo.getVersion();

        try {
            //Check output specs before running the job
            OutputFormat outputFormat = ReflectionUtils.newInstance(job.getOutputFormatClass(),
                    job.getConfiguration());
            outputFormat.checkOutputSpecs(job);

            org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = createOutputCommitter(true,
                    job.getJobID(), job.getConfiguration());

            //clear all temporary objects
            DataAccessor.clearObjects(job.getAppId());

            //Calculating the partition layout
            com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
                    .getCurrent();
            List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

            //Generating mapping of Hadoop partitions to SOSS Regions, so they are equally distributed across hosts
            int numHosts = hostAddresses.size();
            int numberOfSlotsPerNode = Math
                    .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

            //Set the number of splits to the number of cores
            if (GridInputFormat.class.isAssignableFrom(job.getInputFormatClass())) {
                int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job.getConfiguration())
                        * numHosts * numberOfSlotsPerNode;
                GridInputFormat.setSuggestedNumberOfSplits(job,
                        Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS));
            }

            //Generating split to hostname map
            InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(),
                    job.getConfiguration());
            List<InputSplit> splitList = inputFormat.getSplits(job);
            Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null);

            //Choose the optimal number of reducers for GridOutputFormat
            if (GridOutputFormat.class.isAssignableFrom(job.getOutputFormatClass())) {
                job.setNumReduceTasks(numHosts * numberOfSlotsPerNode);
                job.setSortEnabled(false);
            }

            int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(job.getNumReduceTasks());

            //Generating invocation parameters
            Class<? extends InputSplit> splitType = splitList.size() > 0 ? splitList.get(0).getClass() : null;

            HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job.getConfiguration(),
                    job.getJobID(), false);
            HServerInvocationParameters parameters = new HServerInvocationParameters(hadoopParameters,
                    job.getAppId(), partitionMapping, hostNameToPartition, numberOfSlotsPerNode, splitType,
                    splitList, splitToHostAddress, false, job.getSortEnabled(), hadoopVersion,
                    job.getJobParameter(), SerializationMode.DEFAULT);

            StringBuilder stringBuilder = new StringBuilder();
            stringBuilder.append("Splits created:\n");
            for (InetAddress address : splitToHostAddress.keySet()) {
                stringBuilder.append("Host ");
                stringBuilder.append(address);
                stringBuilder.append(" has ");
                stringBuilder.append(splitToHostAddress.get(address).size());
                stringBuilder.append(" splits.\n");
            }
            System.out.println(stringBuilder.toString());

            System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");
            time = System.currentTimeMillis();

            InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                    RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
                throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));
            }

            System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
            time = System.currentTimeMillis();

            MapperResult resultObject = mapInvokeResult.getResult();

            if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
                throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());
            }

            if (resultObject.getNumberOfSplitsProcessed() != splitList.size()) {
                throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                        + splitList.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());
            }

            if (partitionMapping.length > 0) {
                //Running the reduce step
                InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                        job.getAppId(), TimeSpan.INFINITE_TIMEOUT.getSeconds());

                System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

                DataAccessor.clearObjects(job.getAppId()); //clear all temporary objects

                if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                    throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
                }
                if (reduceInvokeResult.getNumFailed() != 0) {
                    throw new IOException("Reduce invocation failed.");
                }
                if (reduceInvokeResult.getResult() != partitionMapping.length) {
                    throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                            + " Actual = " + reduceInvokeResult.getResult());
                }
            }
            outputCommitter.commitJob(job);
        } catch (StateServerException e) {
            throw new IOException("ScaleOut hServer access error.", e);
        }

    }

    /**
     * Runs the map-reduce job on ScaleOut hServer.
     *
     * @param jobID          the id of the job
     * @param jobConf        the job to run
     * @param isNewApi       if the job uses the new MapReduce APIs
     * @param splitType      the type of the split
     * @param inputSplits    the list of input splits
     * @param splitLocations the locations of the splits
     * @param grid           the invocation grid to run the job
     * @throws IOException            if errors occurred during the job
     * @throws InterruptedException   if the processing thread is interrupted
     * @throws ClassNotFoundException if the invocation grid does not contain the dependency class
     */
    @SuppressWarnings("unchecked")
    public void runPredefinedJob(JobID jobID, JobConf jobConf, boolean isNewApi, Class splitType,
            List<?> inputSplits, Map<Object, String[]> splitLocations, InvocationGrid grid)
            throws IOException, InterruptedException, ClassNotFoundException {

        //Initialize user credential in advance
        long time = System.currentTimeMillis();
        CreateUserCredentials.run(grid);
        String hadoopVersion = VersionInfo.getVersion();

        int appID = 0xFFFFFFF & BitConverter.hashStringOneInt(jobID.toString());

        try {

            org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = createOutputCommitter(isNewApi, jobID,
                    jobConf);

            HadoopVersionSpecificCode hadoopVersionSpecificCode = HadoopVersionSpecificCode
                    .getInstance(hadoopVersion, jobConf);

            org.apache.hadoop.mapred.JobContext jobContext = hadoopVersionSpecificCode.createJobContext(jobConf,
                    jobID);
            outputCommitter.setupJob(jobContext);

            //clear all temporary objects
            DataAccessor.clearObjects(appID);

            //Calculating the partition layout
            com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
                    .getCurrent();
            List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

            //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts
            int numHosts = hostAddresses.size();
            int numberOfSlotsPerNode = Math
                    .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

            //Generating split to hostname map
            Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(inputSplits, hostAddresses,
                    splitLocations);

            int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(jobConf.getNumReduceTasks());

            HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(jobConf, jobID, !isNewApi);
            HServerInvocationParameters parameters = new HServerInvocationParameters(hadoopParameters, appID,
                    partitionMapping, hostNameToPartition, numberOfSlotsPerNode, splitType, inputSplits,
                    splitToHostAddress, false,
                    HServerParameters.getBooleanSetting(HServerParameters.SORT_KEYS, jobConf), hadoopVersion, null,
                    SerializationMode.DEFAULT);

            StringBuilder stringBuilder = new StringBuilder();
            stringBuilder.append("Splits created:\n");
            for (InetAddress address : splitToHostAddress.keySet()) {
                stringBuilder.append("Host ");
                stringBuilder.append(address);
                stringBuilder.append(" has ");
                stringBuilder.append(splitToHostAddress.get(address).size());
                stringBuilder.append(" splits.\n");
            }
            System.out.println(stringBuilder.toString());

            System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");

            time = System.currentTimeMillis();

            InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                    RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
                throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));
            }

            System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
            time = System.currentTimeMillis();

            MapperResult resultObject = mapInvokeResult.getResult();

            if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
                throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());
            }

            if (resultObject.getNumberOfSplitsProcessed() != inputSplits.size()) {
                throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                        + inputSplits.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());
            }

            if (partitionMapping.length > 0) {
                //Running the reduce step
                InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                        appID, TimeSpan.INFINITE_TIMEOUT.getSeconds());

                System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

                DataAccessor.clearObjects(appID); //clear all temporary objects

                if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                    throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
                }
                if (reduceInvokeResult.getNumFailed() != 0) {
                    throw new IOException("Reduce invocation failed.");
                }
                if (reduceInvokeResult.getResult() != partitionMapping.length) {
                    throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                            + " Actual = " + reduceInvokeResult.getResult());
                }
            }
            outputCommitter.commitJob(jobContext);
        } catch (StateServerException e) {
            throw new IOException("ScaleOut hServer access error.", e);
        }

    }

    //Taken from LocalJobRunner.java in Apache Hadoop 2.2.0
    private org.apache.hadoop.mapreduce.OutputCommitter createOutputCommitter(boolean newApiCommitter, JobID jobId,
            Configuration conf) throws IOException, InterruptedException, ClassNotFoundException {
        org.apache.hadoop.mapreduce.OutputCommitter committer = null;

        LOG.info("OutputCommitter set in config " + conf.get("mapred.output.committer.class"));

        if (newApiCommitter) {
            HadoopVersionSpecificCode hadoopVersionSpecificCode = HadoopVersionSpecificCode
                    .getInstance(VersionInfo.getVersion(), conf);
            org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID = hadoopVersionSpecificCode
                    .createTaskAttemptId(jobId, true, 0);
            org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = hadoopVersionSpecificCode
                    .createTaskAttemptContext(conf, taskAttemptID);
            OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
            committer = outputFormat.getOutputCommitter(taskContext);
        } else {
            committer = ReflectionUtils.newInstance(conf.getClass("mapred.output.committer.class",
                    FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), conf);
        }
        LOG.info("OutputCommitter is " + committer.getClass().getName());
        return committer;
    }

    /**
     * Runs the map-reduce job on ScaleOut hServer.*
     *
     * @param job          the job to run
     * @param jobId        the id of the job
     * @param sortEnabled  if key sorting is enabled
     * @param jobParameter user defined parameter object for the job
     * @param grid         the invocation grid to run the job
     * @throws IOException            if errors occurred during the job
     * @throws InterruptedException   if the processing thread is interrupted
     * @throws ClassNotFoundException if the invocation grid does not contain the dependency class
     */
    @SuppressWarnings("unchecked")
    public void runOldApiJob(JobConf job, org.apache.hadoop.mapred.JobID jobId, boolean sortEnabled,
            Object jobParameter, InvocationGrid grid)
            throws IOException, InterruptedException, ClassNotFoundException {
        //Initialize user credential in advance
        int jobAppId = 0xFFFFFFF & BitConverter.hashStringOneInt(jobId.toString());
        String hadoopVersion = VersionInfo.getVersion();
        long time = System.currentTimeMillis();
        CreateUserCredentials.run(grid);

        try {
            //Check output specs before running the job
            job.getOutputFormat().checkOutputSpecs(FileSystem.get(job), job);

            JobContext jContext = HadoopVersionSpecificCode.getInstance(hadoopVersion, job).createJobContext(job,
                    jobId);

            org.apache.hadoop.mapred.OutputCommitter outputCommitter = job.getOutputCommitter();
            outputCommitter.setupJob(jContext);

            //clear all temporary objects
            DataAccessor.clearObjects(jobAppId);

            //Calculating the partition layout
            com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
                    .getCurrent();
            List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

            //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts
            int numHosts = hostAddresses.size();
            int numberOfSlotsPerNode = Math
                    .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

            //Set the number of splits to the number of cores
            if (NamedMapInputFormatMapred.class.isAssignableFrom(job.getInputFormat().getClass())) {
                int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job) * numHosts
                        * numberOfSlotsPerNode;
                job.setNumMapTasks(Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS));
            }

            //Generating split to hostname map
            org.apache.hadoop.mapred.InputFormat inputFormat = job.getInputFormat();
            List<org.apache.hadoop.mapred.InputSplit> splitList = Arrays
                    .asList(inputFormat.getSplits(job, job.getNumMapTasks()));
            Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null);

            //Choose the optimal number of reducers for GridOutputFormat
            if (job.getOutputFormat() instanceof NamedMapOutputFormatMapred) {
                job.setNumReduceTasks(numHosts * numberOfSlotsPerNode);
                sortEnabled = false;
            }

            int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(job.getNumReduceTasks());

            //Generating invocation parameters
            Class<? extends org.apache.hadoop.mapred.InputSplit> splitType = splitList.size() > 0
                    ? splitList.get(0).getClass()
                    : null;

            HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job, jobId, true);

            HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit> parameters = new HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit>(
                    hadoopParameters, jobAppId, partitionMapping, hostNameToPartition, numberOfSlotsPerNode,
                    splitType, splitList, splitToHostAddress, false, sortEnabled, hadoopVersion, jobParameter,
                    SerializationMode.DEFAULT);

            StringBuilder stringBuilder = new StringBuilder();
            stringBuilder.append("Splits created:\n");
            for (InetAddress address : splitToHostAddress.keySet()) {
                stringBuilder.append("Host ");
                stringBuilder.append(address);
                stringBuilder.append(" has ");
                stringBuilder.append(splitToHostAddress.get(address).size());
                stringBuilder.append(" splits.\n");
            }
            System.out.println(stringBuilder.toString());

            System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");
            time = System.currentTimeMillis();

            InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                    RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
                throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));
            }

            System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
            time = System.currentTimeMillis();

            MapperResult resultObject = mapInvokeResult.getResult();

            if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
                throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());
            }

            if (resultObject.getNumberOfSplitsProcessed() != splitList.size()) {
                throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                        + splitList.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());
            }

            if (partitionMapping.length > 0) {
                //Running the reduce step
                InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                        jobAppId, TimeSpan.INFINITE_TIMEOUT.getSeconds());

                System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

                DataAccessor.clearObjects(jobAppId); //clear all temporary objects

                if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                    throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
                }
                if (reduceInvokeResult.getNumFailed() != 0) {
                    throw new IOException("Reduce invocation failed.");
                }
                if (reduceInvokeResult.getResult() != partitionMapping.length) {
                    throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                            + " Actual = " + reduceInvokeResult.getResult());
                }
            }
            outputCommitter.commitJob(jContext);
        } catch (StateServerException e) {
            throw new IOException("ScaleOut hServer access error.", e);
        }

    }

    /**
     * Creates user credentials, saving time during the invocation.
     */
    public static class CreateUserCredentials implements Invokable<Integer, String, Integer> {
        public static void run(InvocationGrid grid) {
            try {
                MessagingHelper.invoke(grid, CreateUserCredentials.class, "", 0).getNumSuccessful();
            } catch (Exception e) {
                //Do nothing, this is an optimization
            }
        }

        @Override
        public Integer eval(Integer integer, String user, EvalArgs<Integer> integerEvalArgs)
                throws InvokeException, InterruptedException {
            try {
                UserGroupInformation.getCurrentUser();
            } catch (Throwable t) {
                //Do nothing, this is an optimization
            }
            return 0;
        }

        @Override
        public Integer merge(Integer integer, Integer integer2) throws InvokeException, InterruptedException {
            return 0;
        }
    }

}