it.crs4.pydoop.mapreduce.pipes.Application.java Source code

Java tutorial

Introduction

Here is the source code for it.crs4.pydoop.mapreduce.pipes.Application.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package it.crs4.pydoop.mapreduce.pipes;

import java.io.File;
import java.io.IOException;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

import javax.crypto.SecretKey;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.conf.Configuration;
/*
  FIXME org.apache.hadoop.mapred.TaskLog is clearly not what it is expected to
  be used with org.apache.hadoop.mapreduce.* 
    
  For the time being, we use the following as a stand-in.
    
  it.crs4.pydoop.mapreduce.pipes.TaskLog;
*/

import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;

/**
 * This class is responsible for launching and communicating with the child 
 * process.
 */
class Application<K1 extends Writable, V1 extends Writable, K2 extends WritableComparable, V2 extends Writable> {
    private static final Log LOG = LogFactory.getLog(Application.class.getName());
    private ServerSocket serverSocket;
    private Process process;
    private Socket clientSocket;
    private OutputHandler<K2, V2> handler;
    private DownwardProtocol<K1, V1> downlink;
    static final boolean WINDOWS = System.getProperty("os.name").startsWith("Windows");

    /**
     * Start the child process to handle the task for us.
     * @throws IOException
     * @throws InterruptedException
     */
    Application(TaskInputOutputContext<K1, V1, K2, V2> context, DummyRecordReader input)
            throws IOException, InterruptedException {

        Configuration conf = context.getConfiguration();
        serverSocket = new ServerSocket(0);
        Map<String, String> env = new HashMap<String, String>();
        // add TMPDIR environment variable with the value of java.io.tmpdir
        env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
        env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

        //Add token to the environment if security is enabled
        Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(context.getCredentials());
        // This password is used as shared secret key between this application and
        // child pipes process
        byte[] password = jobToken.getPassword();
        String localPasswordFile = new File(".") + Path.SEPARATOR + "jobTokenPassword";
        writePasswordToLocalFile(localPasswordFile, password, conf);
        // FIXME why is this not Submitter.SECRET_LOCATION ?
        env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

        List<String> cmd = new ArrayList<String>();
        String interpretor = conf.get(Submitter.INTERPRETOR);
        if (interpretor != null) {
            cmd.add(interpretor);
        }
        String executable = context.getLocalCacheFiles()[0].toString();
        if (!(new File(executable).canExecute())) {
            // LinuxTaskController sets +x permissions on all distcache files already.
            // In case of DefaultTaskController, set permissions here.
            FileUtil.chmod(executable, "u+x");
        }
        cmd.add(executable);
        // wrap the command in a stdout/stderr capture
        // we are starting map/reduce task of the pipes job. this is not a cleanup
        // attempt. 
        TaskAttemptID taskid = context.getTaskAttemptID();

        File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
        File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
        long logLength = TaskLog.getTaskLogLength(conf);
        cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);
        process = runClient(cmd, env);
        clientSocket = serverSocket.accept();

        String challenge = getSecurityChallenge();
        String digestToSend = createDigest(password, challenge);
        String digestExpected = createDigest(password, digestToSend);

        handler = new OutputHandler<K2, V2>(context, input, digestExpected);
        K2 outputKey = (K2) ReflectionUtils.newInstance(context.getOutputKeyClass(), conf);
        V2 outputValue = (V2) ReflectionUtils.newInstance(context.getOutputValueClass(), conf);
        downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

        downlink.authenticate(digestToSend, challenge);
        waitForAuthentication();
        LOG.debug("Authentication succeeded");
        downlink.start();
        downlink.setJobConf(conf);
    }

    private String getSecurityChallenge() {
        Random rand = new Random(System.currentTimeMillis());
        //Use 4 random integers so as to have 16 random bytes.
        StringBuilder strBuilder = new StringBuilder();
        strBuilder.append(rand.nextInt(0x7fffffff));
        strBuilder.append(rand.nextInt(0x7fffffff));
        strBuilder.append(rand.nextInt(0x7fffffff));
        strBuilder.append(rand.nextInt(0x7fffffff));
        return strBuilder.toString();
    }

    private void writePasswordToLocalFile(String localPasswordFile, byte[] password, Configuration conf)
            throws IOException {
        FileSystem localFs = FileSystem.getLocal(conf);
        Path localPath = new Path(localPasswordFile);
        FSDataOutputStream out = FileSystem.create(localFs, localPath, new FsPermission("400"));
        out.write(password);
        out.close();
    }

    /**
     * Get the downward protocol object that can send commands down to the
     * application.
     * @return the downlink proxy
     */
    DownwardProtocol<K1, V1> getDownlink() {
        return downlink;
    }

    /**
     * Wait for authentication response.
     * @throws IOException
     * @throws InterruptedException
     */
    void waitForAuthentication() throws IOException, InterruptedException {
        downlink.flush();
        LOG.debug("Waiting for authentication response");
        handler.waitForAuthentication();
    }

    /**
     * Wait for the application to finish
     * @return did the application finish correctly?
     * @throws Throwable
     */
    boolean waitForFinish() throws Throwable {
        downlink.flush();
        return handler.waitForFinish();
    }

    /**
     * Abort the application and wait for it to finish.
     * @param t the exception that signalled the problem
     * @throws IOException A wrapper around the exception that was passed in
     */
    void abort(Throwable t) throws IOException {
        LOG.info("Aborting because of " + StringUtils.stringifyException(t));
        try {
            downlink.abort();
            downlink.flush();
        } catch (IOException e) {
            // IGNORE cleanup problems
        }
        try {
            handler.waitForFinish();
        } catch (Throwable ignored) {
            process.destroy();
        }
        IOException wrapper = new IOException("pipe child exception");
        wrapper.initCause(t);
        throw wrapper;
    }

    /**
     * Clean up the child procress and socket.
     * @throws IOException
     */
    void cleanup() throws IOException {
        serverSocket.close();
        try {
            downlink.close();
        } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
        }
    }

    /**
     * Run a given command in a subprocess, including threads to copy its stdout
     * and stderr to our stdout and stderr.
     * @param command the command and its arguments
     * @param env the environment to run the process in
     * @return a handle on the process
     * @throws IOException
     */
    static Process runClient(List<String> command, Map<String, String> env) throws IOException {
        ProcessBuilder builder = new ProcessBuilder(command);
        if (env != null) {
            builder.environment().putAll(env);
        }
        Process result = builder.start();
        return result;
    }

    public static String createDigest(byte[] password, String data) throws IOException {
        SecretKey key = JobTokenSecretManager.createSecretKey(password);
        return SecureShuffleUtils.hashFromString(data, key);
    }

}