org.apache.hama.bsp.BSPJobClient.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hama.bsp.BSPJobClient.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hama.bsp;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Arrays;
import java.util.Map;
import java.util.Random;
import java.util.StringTokenizer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hama.Constants;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.message.MessageManager;
import org.apache.hama.bsp.message.OutgoingMessageManager;
import org.apache.hama.bsp.message.OutgoingPOJOMessageBundle;
import org.apache.hama.bsp.message.queue.MemoryQueue;
import org.apache.hama.bsp.message.queue.MessageQueue;
import org.apache.hama.ipc.HamaRPCProtocolVersion;
import org.apache.hama.ipc.JobSubmissionProtocol;
import org.apache.hama.ipc.RPC;

/**
 * BSPJobClient is the primary interface for the user-job to interact with the
 * BSPMaster.
 * 
 * BSPJobClient provides facilities to submit jobs, track their progress, access
 * component-tasks' reports/logs, get the BSP cluster status information etc.
 */
public class BSPJobClient extends Configured implements Tool {

    private static final Log LOG = LogFactory.getLog(BSPJobClient.class);

    public static enum TaskStatusFilter {
        NONE, KILLED, FAILED, SUCCEEDED, ALL
    }

    private static final long MAX_JOBPROFILE_AGE = 1000 * 2;
    // job files are world-wide readable and owner writable
    final private static FsPermission JOB_FILE_PERMISSION = FsPermission.createImmutable((short) 0644); // rw-r--r--
    // job submission directory is world readable/writable/executable
    final static FsPermission JOB_DIR_PERMISSION = FsPermission.createImmutable((short) 0777); // rwx-rwx-rwx

    private JobSubmissionProtocol jobSubmitClient = null;
    private Path sysDir = null;
    private FileSystem fs = null;

    class NetworkedJob implements RunningJob {
        JobProfile profile;
        JobStatus status;
        long statustime;

        public NetworkedJob() {
        }

        public NetworkedJob(JobStatus job) throws IOException {
            this.status = job;
            this.profile = jobSubmitClient.getJobProfile(job.getJobID());
            this.statustime = System.currentTimeMillis();
        }

        /**
         * Some methods rely on having a recent job profile object. Refresh it, if
         * necessary
         */
        synchronized void ensureFreshStatus() throws IOException {
            if (System.currentTimeMillis() - statustime > MAX_JOBPROFILE_AGE) {
                updateStatus();
            }
        }

        /**
         * Some methods need to update status immediately. So, refresh immediately
         * 
         * @throws IOException
         */
        synchronized void updateStatus() throws IOException {
            this.status = jobSubmitClient.getJobStatus(profile.getJobID());
            this.statustime = System.currentTimeMillis();
        }

        @Override
        public BSPJobID getID() {
            return profile.getJobID();
        }

        @Override
        public String getJobName() {
            return profile.getJobName();
        }

        @Override
        public String getJobFile() {
            return profile.getJobFile();
        }

        @Override
        public long progress() throws IOException {
            ensureFreshStatus();
            return status.progress();
        }

        /**
         * Returns immediately whether the whole job is done yet or not.
         */
        @Override
        public synchronized boolean isComplete() throws IOException {
            updateStatus();
            return (status.getRunState() == JobStatus.SUCCEEDED || status.getRunState() == JobStatus.FAILED
                    || status.getRunState() == JobStatus.KILLED);
        }

        /**
         * True if job completed successfully.
         */
        @Override
        public synchronized boolean isSuccessful() throws IOException {
            updateStatus();
            return status.getRunState() == JobStatus.SUCCEEDED;
        }

        @Override
        public synchronized long getSuperstepCount() throws IOException {
            ensureFreshStatus();
            return status.getSuperstepCount();
        }

        /**
         * Blocks until the job is finished
         */
        @Override
        public void waitForCompletion() throws IOException {
            while (!isComplete()) {
                try {
                    Thread.sleep(5000);
                } catch (InterruptedException ie) {
                }
            }
        }

        /**
         * Tells the service to get the state of the current job.
         */
        @Override
        public synchronized int getJobState() throws IOException {
            updateStatus();
            return status.getRunState();
        }

        @Override
        public JobStatus getStatus() {
            return status;
        }

        /**
         * Tells the service to terminate the current job.
         */
        @Override
        public synchronized void killJob() throws IOException {
            jobSubmitClient.killJob(getID());
        }

        @Override
        public void killTask(TaskAttemptID taskId, boolean shouldFail) throws IOException {
            jobSubmitClient.killTask(taskId, shouldFail);
        }

        @Override
        public TaskCompletionEvent[] getTaskCompletionEvents(int startFrom) {
            return jobSubmitClient.getTaskCompletionEvents(getID(), startFrom, 10);
        }
    }

    public BSPJobClient(Configuration conf) throws IOException {
        setConf(conf);
        init(conf);
    }

    public BSPJobClient() {
    }

    public void init(Configuration conf) throws IOException {
        String masterAdress = conf.get("bsp.master.address");
        if (masterAdress != null && !masterAdress.equals("local")) {
            this.jobSubmitClient = (JobSubmissionProtocol) RPC.getProxy(JobSubmissionProtocol.class,
                    HamaRPCProtocolVersion.versionID, BSPMaster.getAddress(conf), conf,
                    NetUtils.getSocketFactory(conf, JobSubmissionProtocol.class));
        } else {
            LOG.debug("Using local BSP runner.");
            this.jobSubmitClient = new LocalBSPRunner(conf);
        }
    }

    /**
     * Close the <code>JobClient</code>.
     */
    public synchronized void close() throws IOException {
        String masterAdress = this.getConf().get("bsp.master.address");
        if (masterAdress != null && !masterAdress.equals("local")) {
            RPC.stopProxy(jobSubmitClient);
        }
    }

    /**
     * Get a filesystem handle. We need this to prepare jobs for submission to the
     * BSP system.
     * 
     * @return the filesystem handle.
     */
    public synchronized FileSystem getFs() throws IOException {
        if (this.fs == null) {
            Path sysDir = getSystemDir();
            this.fs = sysDir.getFileSystem(getConf());
        }
        return fs;
    }

    /**
     * Gets the jobs that are submitted.
     * 
     * @return array of {@link JobStatus} for the submitted jobs.
     * @throws IOException
     */
    public JobStatus[] getAllJobs() throws IOException {
        return jobSubmitClient.getAllJobs();
    }

    /**
     * Gets the jobs that are not completed and not failed.
     * 
     * @return array of {@link JobStatus} for the running/to-be-run jobs.
     * @throws IOException
     */
    public JobStatus[] jobsToComplete() throws IOException {
        return jobSubmitClient.jobsToComplete();
    }

    /**
     * Submit a job to the BSP system. This returns a handle to the
     * {@link RunningJob} which can be used to track the running-job.
     * 
     * @param job the job configuration.
     * @return a handle to the {@link RunningJob} which can be used to track the
     *         running-job.
     * @throws FileNotFoundException
     * @throws IOException
     */
    public RunningJob submitJob(BSPJob job) throws FileNotFoundException, IOException {
        return submitJobInternal(job, jobSubmitClient.getNewJobId());
    }

    static Random r = new Random();

    public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException {
        BSPJob job = pJob;
        job.setJobID(jobId);

        int maxTasks;
        int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask());

        ClusterStatus clusterStatus = getClusterStatus(true);
        // Re-adjust the maxTasks based on cluster status.
        if (clusterStatus != null) {
            maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks();

            if (configured > maxTasks) {
                LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with "
                        + (maxTasks) + " tasks.");
                job.setNumBspTask(maxTasks);
            }
        } else {
            maxTasks = configured;
        }

        Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36));
        Path submitSplitFile = new Path(submitJobDir, "job.split");
        Path submitJarFile = new Path(submitJobDir, "job.jar");
        Path submitJobFile = new Path(submitJobDir, "job.xml");
        LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir);

        FileSystem fs = getFs();
        // Create a number of filenames in the BSPMaster's fs namespace
        fs.delete(submitJobDir, true);
        submitJobDir = fs.makeQualified(submitJobDir);
        submitJobDir = new Path(submitJobDir.toUri().getPath());
        FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION);
        FileSystem.mkdirs(fs, submitJobDir, bspSysPerms);
        fs.mkdirs(submitJobDir);
        short replication = (short) job.getInt("bsp.submit.replication", 10);

        // only create the splits if we have an input
        if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) {
            // Create the splits for the job
            LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));

            InputSplit[] splits = job.getInputFormat().getSplits(job,
                    (maxTasks > configured) ? configured : maxTasks);

            if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) {
                LOG.info("Run pre-partitioning job");
                job = partition(job, splits, maxTasks);
                maxTasks = job.getInt("hama.partition.count", maxTasks);
            }

            if (job.getBoolean("input.has.partitioned", false)) {
                splits = job.getInputFormat().getSplits(job, maxTasks);
            }

            if (maxTasks < splits.length) {
                throw new IOException(
                        "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: "
                                + splits.length + ", The number of max tasks: " + maxTasks);
            }

            int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks);
            if (numOfSplits > configured
                    || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) {
                job.setNumBspTask(numOfSplits);
            }

            job.set("bsp.job.split.file", submitSplitFile.toString());
        }

        String originalJarPath = job.getJar();

        if (originalJarPath != null) { // copy jar to BSPMaster's fs
            // use jar name if job is not named.
            if ("".equals(job.getJobName())) {
                job.setJobName(new Path(originalJarPath).getName());
            }
            job.setJar(submitJarFile.toString());
            fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);

            fs.setReplication(submitJarFile, replication);
            fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION));
        } else {
            LOG.warn("No job jar file set.  User classes may not be found. "
                    + "See BSPJob#setJar(String) or check Your jar file.");
        }

        // Set the user's name and working directory
        job.setUser(getUnixUserName());
        job.set("group.name", getUnixUserGroupName(job.getUser()));
        if (job.getWorkingDirectory() == null) {
            job.setWorkingDirectory(fs.getWorkingDirectory());
        }

        // Write job file to BSPMaster's fs
        FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION));

        try {
            job.writeXml(out);
        } finally {
            out.close();
        }

        return launchJob(jobId, job, submitJobFile, fs);
    }

    protected BSPJob partition(BSPJob job, InputSplit[] splits, int maxTasks) throws IOException {
        String inputPath = job.getConfiguration().get(Constants.JOB_INPUT_DIR);
        Path partitionDir = new Path("/tmp/hama-parts/" + job.getJobID() + "/");
        if (fs.exists(partitionDir)) {
            fs.delete(partitionDir, true);
        }

        if (job.get("bsp.partitioning.runner.job") != null) {
            return job;
        } // Early exit for the partitioner job.

        if (inputPath != null) {
            int numSplits = splits.length;
            int numTasks = job.getConfiguration().getInt("bsp.peers.num", 0);
            if (LOG.isDebugEnabled()) {
                LOG.debug(" numTasks = " + numTasks + " numSplits = " + numSplits + " enable = "
                        + (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)
                                + " class = " + job.getConfiguration().get(Constants.RUNTIME_PARTITIONING_CLASS)));
            }

            if (numTasks == 0) {
                numTasks = numSplits;
            }

            if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)
                    && job.getConfiguration().get(Constants.RUNTIME_PARTITIONING_CLASS) != null) {

                HamaConfiguration conf = new HamaConfiguration(job.getConfiguration());

                if (job.getConfiguration().get(Constants.RUNTIME_PARTITIONING_DIR) != null) {
                    partitionDir = new Path(job.getConfiguration().get(Constants.RUNTIME_PARTITIONING_DIR));
                }

                conf.set(Constants.RUNTIME_PARTITIONING_CLASS, job.get(Constants.RUNTIME_PARTITIONING_CLASS));
                BSPJob partitioningJob = new BSPJob(conf);
                partitioningJob.setJobName("Runtime partitioning job for " + partitioningJob.getJobName());
                LOG.debug("partitioningJob input: " + partitioningJob.get(Constants.JOB_INPUT_DIR));

                partitioningJob.getConfiguration().setClass(MessageManager.OUTGOING_MESSAGE_MANAGER_CLASS,
                        OutgoingPOJOMessageBundle.class, OutgoingMessageManager.class);
                partitioningJob.getConfiguration().setClass(MessageManager.RECEIVE_QUEUE_TYPE_CLASS,
                        MemoryQueue.class, MessageQueue.class);

                partitioningJob.setBoolean(Constants.FORCE_SET_BSP_TASKS, true);
                partitioningJob.setInputFormat(job.getInputFormat().getClass());
                partitioningJob.setInputKeyClass(job.getInputKeyClass());
                partitioningJob.setInputValueClass(job.getInputValueClass());

                partitioningJob.setOutputFormat(SequenceFileOutputFormat.class);
                partitioningJob.setOutputKeyClass(job.getInputKeyClass());
                partitioningJob.setOutputValueClass(job.getInputValueClass());

                partitioningJob.setBspClass(PartitioningRunner.class);
                partitioningJob.setMessageClass(MapWritable.class);
                partitioningJob.set("bsp.partitioning.runner.job", "true");
                partitioningJob.getConfiguration().setBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false);
                partitioningJob.setOutputPath(partitionDir);

                boolean isPartitioned = false;
                try {
                    isPartitioned = partitioningJob.waitForCompletion(true);
                } catch (InterruptedException e) {
                    LOG.error("Interrupted partitioning run-time.", e);
                } catch (ClassNotFoundException e) {
                    LOG.error("Class not found error partitioning run-time.", e);
                }

                if (isPartitioned) {
                    if (job.getConfiguration().get(Constants.RUNTIME_PARTITIONING_DIR) != null) {
                        job.setInputPath(new Path(conf.get(Constants.RUNTIME_PARTITIONING_DIR)));
                    } else {
                        job.setInputPath(partitionDir);
                    }
                    job.setBoolean("input.has.partitioned", true);
                    job.setInputFormat(NonSplitSequenceFileInputFormat.class);
                } else {
                    LOG.error("Error partitioning the input path.");
                    throw new IOException("Runtime partition failed for the job.");
                }
            }
        }
        return job;
    }

    protected RunningJob launchJob(BSPJobID jobId, BSPJob job, Path submitJobFile, FileSystem fs)
            throws IOException {
        //
        // Now, actually submit the job (using the submit name)
        //
        JobStatus status = jobSubmitClient.submitJob(jobId, submitJobFile.makeQualified(fs).toString());
        if (status != null) {
            return new NetworkedJob(status);
        } else {
            throw new IOException("Could not launch job");
        }
    }

    /**
     * Get the {@link CompressionType} for the output {@link SequenceFile}.
     * 
     * @param job the {@link BSPJob}
     * @return the {@link CompressionType} for the output {@link SequenceFile},
     *         defaulting to {@link CompressionType#RECORD}
     */
    static CompressionType getOutputCompressionType(BSPJob job) {
        String val = job.get("bsp.partitioning.compression.type");
        if (val != null) {
            return CompressionType.valueOf(val);
        } else {
            return CompressionType.NONE;
        }
    }

    /**
     * Get the {@link CompressionCodec} for compressing the job outputs.
     * 
     * @param job the {@link BSPJob} to look in
     * @param defaultValue the {@link CompressionCodec} to return if not set
     * @return the {@link CompressionCodec} to be used to compress the job outputs
     * @throws IllegalArgumentException if the class was specified, but not found
     */
    static Class<? extends CompressionCodec> getOutputCompressorClass(BSPJob job,
            Class<? extends CompressionCodec> defaultValue) {
        Class<? extends CompressionCodec> codecClass = defaultValue;
        Configuration conf = job.getConfiguration();
        String name = conf.get("bsp.partitioning.compression.codec");
        if (name != null) {
            try {
                codecClass = conf.getClassByName(name).asSubclass(CompressionCodec.class);
            } catch (ClassNotFoundException e) {
                throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
            }
        }
        return codecClass;
    }

    private static int writeSplits(BSPJob job, InputSplit[] splits, Path submitSplitFile, int maxTasks)
            throws IOException {
        final DataOutputStream out = writeSplitsFileHeader(job.getConfiguration(), submitSplitFile, splits.length);
        try {
            DataOutputBuffer buffer = new DataOutputBuffer();
            RawSplit rawSplit = new RawSplit();
            for (InputSplit split : splits) {

                // set partitionID to rawSplit
                if (split.getClass().getName().equals(FileSplit.class.getName())
                        && job.getBoolean("input.has.partitioned", false)) {
                    String[] extractPartitionID = ((FileSplit) split).getPath().getName().split("[-]");
                    if (extractPartitionID.length > 1)
                        rawSplit.setPartitionID(Integer.parseInt(extractPartitionID[1]));
                }

                rawSplit.setClassName(split.getClass().getName());
                buffer.reset();
                split.write(buffer);
                rawSplit.setDataLength(split.getLength());
                rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
                rawSplit.setLocations(split.getLocations());
                rawSplit.write(out);
            }
        } finally {
            out.close();
        }
        return splits.length;
    }

    private static final int CURRENT_SPLIT_FILE_VERSION = 0;
    private static final byte[] SPLIT_FILE_HEADER = "SPL".getBytes();

    private static DataOutputStream writeSplitsFileHeader(Configuration conf, Path filename, int length)
            throws IOException {
        // write the splits to a file for the bsp master
        FileSystem fs = filename.getFileSystem(conf);
        FSDataOutputStream out = FileSystem.create(fs, filename, new FsPermission(JOB_FILE_PERMISSION));
        out.write(SPLIT_FILE_HEADER);
        WritableUtils.writeVInt(out, CURRENT_SPLIT_FILE_VERSION);
        WritableUtils.writeVInt(out, length);
        return out;
    }

    /**
     * Read a splits file into a list of raw splits
     * 
     * @param in the stream to read from
     * @return the complete list of splits
     * @throws IOException
     */
    static RawSplit[] readSplitFile(DataInput in) throws IOException {
        byte[] header = new byte[SPLIT_FILE_HEADER.length];
        in.readFully(header);
        if (!Arrays.equals(SPLIT_FILE_HEADER, header)) {
            throw new IOException("Invalid header on split file");
        }
        int vers = WritableUtils.readVInt(in);
        if (vers != CURRENT_SPLIT_FILE_VERSION) {
            throw new IOException("Unsupported split version " + vers);
        }
        int len = WritableUtils.readVInt(in);
        RawSplit[] result = new RawSplit[len];
        for (int i = 0; i < len; ++i) {
            RawSplit split = new RawSplit();
            split.readFields(in);
            if (split.getPartitionID() != Integer.MIN_VALUE)
                result[split.getPartitionID()] = split;
            else
                result[i] = split;
        }
        return result;
    }

    /**
     * Monitor a job and print status in real-time as progress is made and tasks
     * fail.
     * 
     * @param job
     * @param info
     * @return true, if job is successful
     * @throws IOException
     * @throws InterruptedException
     */
    public boolean monitorAndPrintJob(BSPJob job, RunningJob info) throws IOException, InterruptedException {
        String lastReport = null;
        LOG.info("Running job: " + info.getID());
        int eventCounter = 0;

        while (!job.isComplete()) {
            Thread.sleep(3000);
            long step = job.progress();
            String report = "";

            report = "Current supersteps number: " + step;

            if (!report.equals(lastReport)) {
                LOG.info(report);
                lastReport = report;
            }

            TaskCompletionEvent[] events = info.getTaskCompletionEvents(eventCounter);
            eventCounter += events.length;

            for (TaskCompletionEvent event : events) {
                if (event.getTaskStatus() == TaskCompletionEvent.Status.FAILED) {

                    // Displaying the task logs
                    displayTaskLogs(event.getTaskAttemptId(), event.getGroomServerInfo());
                }
            }
        }

        if (job.isSuccessful()) {
            LOG.info("The total number of supersteps: " + info.getSuperstepCount());
            info.getStatus().getCounter().incrCounter(JobInProgress.JobCounter.SUPERSTEPS,
                    info.getSuperstepCount());
            info.getStatus().getCounter().log(LOG);
        } else {
            LOG.info("Job failed.");
        }

        return job.isSuccessful();
    }

    static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) {
        return (baseUrl + "/tasklog?plaintext=true&taskid=" + taskId);
    }

    private static void displayTaskLogs(TaskAttemptID taskId, String baseUrl) throws MalformedURLException {
        // The tasktracker for a 'failed/killed' job might not be around...
        if (baseUrl != null) {
            // Construct the url for the tasklogs
            String taskLogUrl = getTaskLogURL(taskId, baseUrl);

            // Copy tasks's stdout of the JobClient
            getTaskLogs(taskId, new URL(taskLogUrl + "&filter=stdout"), System.out);
        }
    }

    private static void getTaskLogs(TaskAttemptID taskId, URL taskLogUrl, OutputStream out) {
        try {
            URLConnection connection = taskLogUrl.openConnection();
            BufferedReader input = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            BufferedWriter output = new BufferedWriter(new OutputStreamWriter(out));
            try {
                String logData = null;
                while ((logData = input.readLine()) != null) {
                    if (logData.length() > 0) {
                        output.write(taskId + ": " + logData + "\n");
                        output.flush();
                    }
                }
            } finally {
                input.close();
            }
        } catch (IOException ioe) {
            LOG.warn("Error reading task output" + ioe.getMessage());
        }
    }

    /**
     * Grab the bspmaster system directory path where job-specific files are to be
     * placed.
     * 
     * @return the system directory where job-specific files are to be placed.
     */
    public Path getSystemDir() {
        if (sysDir == null) {
            sysDir = new Path(jobSubmitClient.getSystemDir());
        }
        return sysDir;
    }

    public static void runJob(BSPJob job) throws FileNotFoundException, IOException {
        BSPJobClient jc = new BSPJobClient(job.getConfiguration());

        if (job.getNumBspTask() == 0 || job.getNumBspTask() > jc.getClusterStatus(false).getMaxTasks()) {
            job.setNumBspTask(jc.getClusterStatus(false).getMaxTasks());
        }

        RunningJob running = jc.submitJob(job);
        BSPJobID jobId = running.getID();
        LOG.info("Running job: " + jobId.toString());

        while (true) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }

            if (running.isComplete()) {
                break;
            }

            running = jc.getJob(jobId);
        }

        if (running.isSuccessful()) {
            LOG.info("Job complete: " + jobId);
            LOG.info("The total number of supersteps: " + running.getSuperstepCount());
            running.getStatus().getCounter().log(LOG);
        } else {
            LOG.info("Job failed.");
        }

        // TODO if error found, kill job
        // running.killJob();
        jc.close();
    }

    /**
     * Get an RunningJob object to track an ongoing job. Returns null if the id
     * does not correspond to any known job.
     * 
     * @throws IOException
     */
    private RunningJob getJob(BSPJobID jobId) throws IOException {
        JobStatus status = jobSubmitClient.getJobStatus(jobId);
        if (status != null) {
            return new NetworkedJob(status);
        } else {
            return null;
        }
    }

    /**
     * Get status information about the BSP cluster
     * 
     * @param detailed if true then get a detailed status including the
     *          groomserver names
     * 
     * @return the status information about the BSP cluster as an object of
     *         {@link ClusterStatus}.
     * 
     * @throws IOException
     */
    public ClusterStatus getClusterStatus(boolean detailed) throws IOException {
        return (jobSubmitClient != null) ? jobSubmitClient.getClusterStatus(detailed) : null;
    }

    // for the testcase
    JobSubmissionProtocol getJobSubmissionProtocol() {
        return jobSubmitClient;
    }

    @Override
    public int run(String[] args) throws Exception {
        int exitCode = -1;
        if (args.length < 1) {
            displayUsage("");
            return exitCode;
        }

        // process arguments
        String cmd = args[0];
        boolean listJobs = false;
        boolean listAllJobs = false;
        boolean listActiveGrooms = false;
        boolean killJob = false;
        boolean submitJob = false;
        boolean getStatus = false;
        String submitJobFile = null;
        String jobid = null;

        HamaConfiguration conf = new HamaConfiguration(getConf());
        init(conf);

        if ("-list".equals(cmd)) {
            if (args.length != 1 && !(args.length == 2 && "all".equals(args[1]))) {
                displayUsage(cmd);
                return exitCode;
            }
            if (args.length == 2 && "all".equals(args[1])) {
                listAllJobs = true;
            } else {
                listJobs = true;
            }
        } else if ("-list-active-grooms".equals(cmd)) {
            if (args.length != 1) {
                displayUsage(cmd);
                return exitCode;
            }
            listActiveGrooms = true;
        } else if ("-submit".equals(cmd)) {
            if (args.length == 1) {
                displayUsage(cmd);
                return exitCode;
            }

            submitJob = true;
            submitJobFile = args[1];
        } else if ("-kill".equals(cmd)) {
            if (args.length == 1) {
                displayUsage(cmd);
                return exitCode;
            }
            killJob = true;
            jobid = args[1];

        } else if ("-status".equals(cmd)) {
            if (args.length != 2) {
                displayUsage(cmd);
                return exitCode;
            }
            jobid = args[1];
            getStatus = true;

            // TODO Later, below functions should be implemented
            // with the Fault Tolerant mechanism.
        } else if ("-list-attempt-ids".equals(cmd)) {
            System.out.println("This function is not implemented yet.");
            return exitCode;
        } else if ("-kill-task".equals(cmd)) {
            System.out.println("This function is not implemented yet.");
            return exitCode;
        } else if ("-fail-task".equals(cmd)) {
            System.out.println("This function is not implemented yet.");
            return exitCode;
        }

        BSPJobClient jc = new BSPJobClient(new HamaConfiguration());
        if (listJobs) {
            listJobs();
            exitCode = 0;
        } else if (listAllJobs) {
            listAllJobs();
            exitCode = 0;
        } else if (listActiveGrooms) {
            listActiveGrooms();
            exitCode = 0;
        } else if (submitJob) {
            HamaConfiguration tConf = new HamaConfiguration(new Path(submitJobFile));
            RunningJob job = jc.submitJob(new BSPJob(tConf));
            System.out.println("Created job " + job.getID().toString());
        } else if (killJob) {
            RunningJob job = jc.getJob(BSPJobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                job.killJob();
                System.out.println("Killed job " + jobid);
            }
            exitCode = 0;
        } else if (getStatus) {
            RunningJob job = jc.getJob(BSPJobID.forName(jobid));
            if (job == null) {
                System.out.println("Could not find job " + jobid);
            } else {
                JobStatus jobStatus = jobSubmitClient.getJobStatus(job.getID());
                System.out.println("Job name: " + job.getJobName());
                System.out.printf("States are:\n\tRunning : 1\tSucceded : 2" + "\tFailed : 3\tPrep : 4\n");
                System.out.printf("%s\t%d\t%d\t%s\n", jobStatus.getJobID(), jobStatus.getRunState(),
                        jobStatus.getStartTime(), jobStatus.getUsername());

                exitCode = 0;
            }
        }

        return 0;
    }

    /**
     * Display usage of the command-line tool and terminate execution
     */
    private static void displayUsage(String cmd) {
        String prefix = "Usage: hama job ";
        String taskStates = "running, completed";
        if ("-submit".equals(cmd)) {
            System.err.println(prefix + "[" + cmd + " <job-file>]");
        } else if ("-status".equals(cmd) || "-kill".equals(cmd)) {
            System.err.println(prefix + "[" + cmd + " <job-id>]");
        } else if ("-list".equals(cmd)) {
            System.err.println(prefix + "[" + cmd + " [all]]");
        } else if ("-kill-task".equals(cmd) || "-fail-task".equals(cmd)) {
            System.err.println(prefix + "[" + cmd + " <task-id>]");
        } else if ("-list-active-grooms".equals(cmd)) {
            System.err.println(prefix + "[" + cmd + "]");
        } else if ("-list-attempt-ids".equals(cmd)) {
            System.err.println(prefix + "[" + cmd + " <job-id> <task-state>]. "
                    + "Valid values for <task-state> are " + taskStates);
        } else {
            System.err.printf(prefix + "<command> <args>\n");
            System.err.printf("\t[-submit <job-file>]\n");
            System.err.printf("\t[-status <job-id>]\n");
            System.err.printf("\t[-kill <job-id>]\n");
            System.err.printf("\t[-list [all]]\n");
            System.err.printf("\t[-list-active-grooms]\n");
            System.err.println("\t[-list-attempt-ids <job-id> " + "<task-state>]\n");
            System.err.printf("\t[-kill-task <task-id>]\n");
            System.err.printf("\t[-fail-task <task-id>]\n\n");
        }
    }

    /**
     * Dump a list of currently running jobs
     * 
     * @throws IOException
     */
    private void listJobs() throws IOException {
        JobStatus[] jobs = jobsToComplete();
        if (jobs == null)
            jobs = new JobStatus[0];

        System.out.printf("%d jobs currently running\n", jobs.length);
        displayJobList(jobs);
    }

    /**
     * Dump a list of all jobs submitted.
     * 
     * @throws IOException
     */
    private void listAllJobs() throws IOException {
        JobStatus[] jobs = getAllJobs();
        if (jobs == null)
            jobs = new JobStatus[0];
        System.out.printf("%d jobs submitted\n", jobs.length);
        System.out.printf("States are:\n\tRunning : 1\tSucceded : 2" + "\tFailed : 3\tPrep : 4\n");
        displayJobList(jobs);
    }

    void displayJobList(JobStatus[] jobs) {
        System.out.printf("JobId\tState\tStartTime\tUserName\n");
        for (JobStatus job : jobs) {
            System.out.printf("%s\t%d\t%d\t%s\n", job.getJobID(), job.getRunState(), job.getStartTime(),
                    job.getUsername());
        }
    }

    /**
     * Display the list of active groom servers
     */
    private void listActiveGrooms() throws IOException {
        ClusterStatus c = jobSubmitClient.getClusterStatus(true);
        Map<String, String> grooms = c.getActiveGroomNames();
        for (String groomName : grooms.keySet()) {
            System.out.println(groomName);
        }
    }

    /*
     * Helper methods for unix operations
     */

    protected static String getUnixUserName() throws IOException {
        String[] result = executeShellCommand(new String[] { Shell.USER_NAME_COMMAND });
        if (result.length != 1) {
            throw new IOException(
                    "Expect one token as the result of " + Shell.USER_NAME_COMMAND + ": " + toString(result));
        }
        String fixResult = fixCygwinName(result[0]);
        return fixResult;
    }

    private static String fixCygwinName(String in) {
        String string = in;
        if (string.contains("\\")) {
            // this is for cygwin systems
            string = string.substring(string.indexOf("\\"));
        }
        return string;
    }

    static String getUnixUserGroupName(String user) throws IOException {
        String[] result = executeShellCommand(new String[] { "bash", "-c", "id -Gn " + user });
        if (result.length < 1) {
            throw new IOException(
                    "Expect one token as the result of " + "bash -c id -Gn " + user + ": " + toString(result));
        }
        return result[0];
    }

    protected static String toString(String[] strArray) {
        if (strArray == null || strArray.length == 0) {
            return "";
        }
        StringBuilder buf = new StringBuilder(strArray[0]);
        for (int i = 1; i < strArray.length; i++) {
            buf.append(' ');
            buf.append(strArray[i]);
        }
        return buf.toString();
    }

    protected static String[] executeShellCommand(String[] command) throws IOException {
        String groups = Shell.execCommand(command);
        StringTokenizer tokenizer = new StringTokenizer(groups);
        int numOfTokens = tokenizer.countTokens();
        String[] tokens = new String[numOfTokens];
        for (int i = 0; tokenizer.hasMoreTokens(); i++) {
            tokens[i] = tokenizer.nextToken();
        }

        return tokens;
    }

    public static class RawSplit implements Writable {
        private String splitClass;
        private BytesWritable bytes = new BytesWritable();
        private String[] locations;
        private int partitionID = Integer.MIN_VALUE;
        long dataLength;

        public void setBytes(byte[] data, int offset, int length) {
            bytes.set(data, offset, length);
        }

        public void setPartitionID(int id) {
            this.partitionID = id;
        }

        public int getPartitionID() {
            return partitionID;
        }

        public void setClassName(String className) {
            splitClass = className;
        }

        public String getClassName() {
            return splitClass;
        }

        public BytesWritable getBytes() {
            return bytes;
        }

        public void clearBytes() {
            bytes = null;
        }

        public void setLocations(String[] locations) {
            this.locations = locations;
        }

        public String[] getLocations() {
            return locations;
        }

        @Override
        public void readFields(DataInput in) throws IOException {
            splitClass = Text.readString(in);
            dataLength = in.readLong();
            bytes.readFields(in);
            partitionID = in.readInt();
            int len = WritableUtils.readVInt(in);
            locations = new String[len];
            for (int i = 0; i < len; ++i) {
                locations[i] = Text.readString(in);
            }
        }

        @Override
        public void write(DataOutput out) throws IOException {
            Text.writeString(out, splitClass);
            out.writeLong(dataLength);
            bytes.write(out);
            out.writeInt(partitionID);
            WritableUtils.writeVInt(out, locations.length);
            for (String location : locations) {
                Text.writeString(out, location);
            }
        }

        public long getDataLength() {
            return dataLength;
        }

        public void setDataLength(long l) {
            dataLength = l;
        }

    }

    /**
     */
    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new BSPJobClient(), args);
        System.exit(res);
    }

}