com.lightboxtechnologies.spectrum.MRCoffeeJob.java Source code

Java tutorial

Introduction

Here is the source code for com.lightboxtechnologies.spectrum.MRCoffeeJob.java

Source

/*
   Copyright 2011, Lightbox Technologies, Inc
    
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    
   http://www.apache.org/licenses/LICENSE-2.0
    
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/

package com.lightboxtechnologies.spectrum;

import java.io.File;
import java.io.InputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.KeyValueSortReducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import org.apache.commons.codec.DecoderException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.lightboxtechnologies.io.IOUtils;

public class MRCoffeeJob {

    protected static class MRCoffeeMapper
            extends Mapper<ImmutableHexWritable, FsEntry, ImmutableHexWritable, JsonWritable> {

        private static final Log LOG = LogFactory.getLog(MRCoffeeMapper.class.getName());

        protected long timestamp;

        //    protected FsEntryFilter filter = new AllFsEntryFilter();

        protected FsEntryFilter filter = new FsEntryFilter() {
            public boolean accept(byte[] id, FsEntry entry) {
                /*
                        final Object type = entry.get("type");
                        LOG.info("type == " + type);
                        if (type instanceof Number) {
                          if (((Number) type).longValue() == 1)  {
                            final Object name_type = entry.get("name_type");
                            LOG.info("name_type == " + name_type);
                            if (name_type instanceof Number) {
                              if (((Number) name_type).longValue() == 5) {
                                return true;
                              }
                            }
                          }
                        }
                        return false;
                */
                final Object name_type = entry.get("name_type");
                LOG.info("name_type == " + name_type);
                if (name_type instanceof Number) {
                    if (((Number) name_type).longValue() == 5) {
                        return true;
                    }
                }
                return false;
            }
        };

        protected byte[] command;

        protected MRCoffeeClient client;

        protected final byte[] buf = new byte[4096];

        protected final JsonWritable json = new JsonWritable();

        protected String pipe_path = "/tmp";
        protected final String mrcoffee_path = "/tmp/mrcoffee";

        protected Process mrcoffee;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            LOG.info("Setup called");

            final Configuration conf = context.getConfiguration();

            // ensure that all mappers have the same timestamp
            try {
                timestamp = Long.parseLong(conf.get("timestamp"));
            } catch (NumberFormatException e) {
                throw new RuntimeException(e);
            }

            // construct the command byte array as a series of C-style strings
            final StringBuilder sb = new StringBuilder();
            for (String arg : conf.getStrings("command")) {
                sb.append(arg).append('\0');
            }
            command = sb.toString().getBytes();

            // name pipe path after this job
            pipe_path += '/' + context.getJobID().toString() + '_' + UUID.randomUUID();

            // delete old socket file, if it exists
            final File pipe_file = new File(pipe_path);
            pipe_file.delete();

            // start MRCoffee server
            final ProcessBuilder pb = new ProcessBuilder(mrcoffee_path, pipe_path);
            mrcoffee = pb.start();

            // give MRCoffee time to create a socket
            while (!pipe_file.exists()) {
                Thread.sleep(100);
            }

            // create a MRCoffee client
            client = new MRCoffeeClient();

            try {
                client.open(pipe_file);
            } catch (IOException e) {
                IOUtils.closeQuietly(client);
                throw (IOException) new IOException().initCause(e);
            }
        }

        @Override
        protected void map(ImmutableHexWritable key, FsEntry entry, Context context)
                throws IOException, InterruptedException {
            final String path = entry.fullPath();

            // check whether the filter accepts this entry
            if (!filter.accept(key.get(), entry)) {
                LOG.info("Skipping " + path);
                return;
            }

            LOG.info("Processing " + path);

            // try to get the size of this entry
            final Object o = entry.get("size");
            if (!(o instanceof Number)) {
                LOG.info("Stream length for " + path + " was " + o + ", not a number");
                return;
            }

            final long size = ((Number) o).longValue();
            if (size < 0) {
                LOG.info("Stream length for " + path + " was " + size);
                return;
            }

            // feed the data to MRCoffee
            InputStream in = null;
            try {
                // TODO: support operations on other streams?
                in = entry.getInputStream();
                if (in == null) {
                    LOG.info("Stream for " + path + " was null");
                    return;
                }

                // send the command
                client.writeCommand(command);

                // send the data
                LOG.info("Streaming " + path + ", " + size + " bytes");
                client.writeLength(size);

                final OutputStream out = client.getOutputStream();
                IOUtils.copy(in, out, buf, size);
                out.flush();
                in.close();
            } finally {
                IOUtils.closeQuietly(in);
            }

            // get the result
            final MRCoffeeClient.Result result = client.readResult();

            // convert the result to JSON
            final Map<String, Object> map = new HashMap<String, Object>();
            map.put("stdout", result.stdout);
            map.put("stderr", result.stderr);
            json.set(map);

            // write the result
            context.write(key, json);

            LOG.info("Finished " + path);
        }

        @Override
        protected void cleanup(Context context) throws IOException {
            LOG.info("Cleanup called");

            client.close();

            // TODO: do this more gracefully
            // shut down MRCoffee
            mrcoffee.destroy();

            // remove socket file
            new File(pipe_path).delete();
        }
    }

    public static int run(String imageID, String outpath, String[] command, Configuration conf)
            throws ClassNotFoundException, DecoderException, IOException, InterruptedException {
        conf.setStrings("command", command);
        conf.setLong("timestamp", System.currentTimeMillis());

        final Job job = new Job(conf, "MRCoffeeJob");
        job.setJarByClass(MRCoffeeJob.class);

        job.setMapperClass(MRCoffeeMapper.class);

        //    job.setReducerClass(KeyValueSortReducer.class);
        //    job.setNumReduceTasks(1);
        job.setNumReduceTasks(0);

        FsEntryHBaseInputFormat.setupJob(job, imageID);
        job.setInputFormatClass(FsEntryHBaseInputFormat.class);

        job.setOutputKeyClass(ImmutableHexWritable.class);
        //    job.setOutputValueClass(KeyValue.class);
        job.setOutputValueClass(JsonWritable.class);
        //    job.setOutputFormatClass(HFileOutputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        //    HFileOutputFormat.setOutputPath(job, new Path(outpath));
        TextOutputFormat.setOutputPath(job, new Path(outpath));

        return job.waitForCompletion(true) ? 0 : 1;
    }

    public static void main(String[] args)
            throws ClassNotFoundException, DecoderException, IOException, InterruptedException {
        final Configuration conf = HBaseConfiguration.create();
        final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        if (otherArgs.length < 3) {
            System.err.println("Usage: MRCoffeeJob <image_id> <outpath> <command>...");
            System.exit(2);
        }

        // get command and arguments
        final String[] command = new String[otherArgs.length - 2];
        System.arraycopy(args, 2, command, 0, command.length);

        System.exit(run(args[0], args[1], command, conf));
    }
}