Egg.java Source code

Java tutorial

Introduction

Here is the source code for Egg.java

Source

/*
This file is part of Eggshell.
Copyright 2013 George Magiros
    
Eggshell is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
    
Eggshell is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for more details.
    
You should have received a copy of the GNU General Public License
along with Eggshell.  If not, see <http://www.gnu.org/licenses/>.
*/

import org.mozilla.javascript.*;
import org.mozilla.javascript.annotations.JSConstructor;
import org.mozilla.javascript.annotations.JSFunction;
import org.mozilla.javascript.annotations.JSGetter;
import org.mozilla.javascript.annotations.JSSetter;

import java.io.IOException;

// hadoop classes
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Reducer;

// hadoop input formats
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;

// hadoop output formats
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;

/** An instance of this Javascript class representates a Hadoop
 *  Job. This object includes properties that reference the standard
 *  output and standard error print streams.  A constructor call to
 *  this class with no arguments creates a new Hadoop Job.  A
 *  construct call with a Hadoop Job instance creates an object which
 *  encapsulates that job.  When the function 'eggshell' is invoked by
 *  the run method of the Hadoop tool runner, an instance of this
 *  class is used as the function's 'this' object.
 */
public class Egg extends ScriptableObject {
    private static final long serialVersionUID = 2069490938230603150L;

    /* public static fields */

    /** Holds the javascript interpreter */
    public static Script script;
    /** Holds the initial Hadoop configuration object */
    public static Configuration conf;
    /** Holds the name of the script */
    public static String name;

    /* private fields */

    /** Holds a Hadoop job object for the instance */
    private Job job;

    /* constructors */

    /** Called when first defined as a Javascript class 
     *  @return This class
     */
    public Egg() {
    }

    /** The name of this Javascript class as a string 
     *  @return  The string name of this class
     */
    public String getClassName() {
        return "Egg";
    }

    /** Creates a Hadoop job with a default configuration of
     *  TextInputFormat and TextOutputFormat.  If invoked with no
     *  parameters, uses the initially created job as the parent to
     *  spawn a new job.  The name of the parent job is used as the name
     *  of the child job.  The object is used a the 'this' object of the
     *  eggshell function
     *  @param o    The Hadoop Job
     */
    @JSConstructor
    public Egg(Object o) throws IOException {
        Configuration cf = conf; // new Configuration(conf);
        job = new Job(cf, name);
        job.setJarByClass(this.getClass()); // set jar file
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Tuple.class); // K2
        job.setOutputValueClass(Tuple.class); // V2
        job.setMapperClass(Payload.TextMap.class);
        job.setReducerClass(Reducer.class);
        job.setCombinerClass(Reducer.class);
    }

    /* chainable public methods */

    /** Defines how to read data from a file into the Mapper instances.
     *  This method sets the input format to the 'TextInputFormat'
     *  implementation.
     *  @return The 'this' object
     */
    @JSFunction
    public Egg textInputFormat() {
        job.setInputFormatClass(TextInputFormat.class);
        return this;
    }

    /** Defines how to read data from a file into the Mapper instances.
     *  This method sets the input format to the
     *  'KeyValueTextInputFormat' implementation.
     *  @return The 'this' object
     */
    @JSFunction
    public Egg keyValueTextInputFormat() {
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        return this;
    }

    /** Defines how to read data from a file into the Mapper instances.
     *  This method sets the input format to the
     *  'SequenceFileInputFormat' implementation.
     *  @return The 'this' object
     */
    @JSFunction
    public Egg sequenceFileInputFormat() {
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(Payload.TupleMap.class);
        return this;
    }

    /** Defines how to read data from a file into the Mapper instances.
     *  This method sets the input format to the
     *  'NLineInputFormat' implementation.
     *  @return The 'this' object
     */
    @JSFunction
    public Egg nLineInputFormat() {
        job.setInputFormatClass(NLineInputFormat.class);
        return this;
    }

    /** Defines how to write the results of a job back into a file.
     *  This method sets the output format to the 'TextOutputFormat'
     *  implementation.
     *  @return The 'this' object
     */
    @JSFunction
    public Egg textOutputFormat() {
        job.setOutputFormatClass(TextOutputFormat.class);
        return this;
    }

    /** Defines how to write the results of a job back into a file.
     *  This method sets the output format to the 'NullOutputFormat'
     *  implementation.
     *  @return The 'this' object
     */
    @JSFunction
    public Egg nullOutputFormat() {
        job.setOutputFormatClass(NullOutputFormat.class);
        return this;
    }

    /** Defines how to write the results of a job back into a file.
     *  This method sets the output format to the
     *  'SequenceFileOutputFormat' implementation.
     *  @return The 'this' object
     */
    @JSFunction
    public Egg sequenceFileOutputFormat() {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        return this;
    }

    /** Set the user-specified job name.
     *  @param name The job name
     *  @return The 'this' object
     */
    @JSFunction
    public Egg name(String name) {
        job.setJobName(name);
        return this;
    }

    /** Adds a path to the list of inputs for the map-reduce job
     *  @param pathString  The name of the path
     *  @return            The 'this' object
     */
    @JSFunction
    public Egg addInput(String pathString) throws IOException {
        Path path = new Path(pathString);
        FileInputFormat.addInputPath(job, path);
        return this;
    }

    /** Sets the list of inputs for the map-reduce job to the path
     *  @param pathString  The name of the path
     *  @return            The 'this' object
     */
    @JSFunction
    public Egg input(String pathString) throws IOException {
        Path path = new Path(pathString);
        FileInputFormat.setInputPaths(job, path);
        return this;
    }

    /** Sets the output for the map-reduce job to the path
     *  @param pathString  The name of the path
     *  @return            The 'this' object
     */
    @JSFunction
    public Egg output(String pathString) {
        Path path = new Path(pathString);
        FileOutputFormat.setOutputPath(job, path);
        return this;
    }

    /** Sets the number of reduce tasks for the map-reduce job
     *  @param tasks       The number of reduce tasks
     *  @return            The 'this' object
     */
    @JSFunction
    public Egg numReduceTasks(int tasks) {
        job.setNumReduceTasks(tasks);
        return this;
    }

    /** Turns speculative execution on or off for the map tasks
     *  @param enable      On or off
     *  @return            The 'this' object
     */
    @JSFunction
    public Egg mapSpeculativeExecution(boolean enable) {
        job.setMapSpeculativeExecution(enable);
        return this;
    }

    /** Turns speculative execution on or off for the reduce tasks
     *  @param enable      On or off
     *  @return            The 'this' object
     */
    @JSFunction
    public Egg reduceSpeculativeExecution(boolean enable) {
        job.setReduceSpeculativeExecution(enable);
        return this;
    }

    /** Turns speculative execution on or off for the map-reduce job
     *  @param enable      On or off
     *  @return            The 'this' object
     */
    @JSFunction
    public Egg speculativeExecution(boolean enable) {
        job.setSpeculativeExecution(enable);
        return this;
    }

    /** Sets the mapper function for the job
     *  @param o  The javascript map function
     *  @return   The 'this' object
     */
    @JSFunction
    public Egg map(Object o) throws IOException {
        script.serialize(job.getConfiguration(), o, Eggshell.MAP_FILE);
        return this;
    }

    /** Sets the reducer function for the job
     *  @param o  The javascript reduce function
     *  @return   The 'this' object
     */
    @JSFunction
    public Egg reduce(Object o) throws IOException {
        script.serialize(job.getConfiguration(), o, Eggshell.REDUCE_FILE);
        job.setReducerClass(Payload.Reduce.class);
        return this;
    }

    /** Sets the combiner function for the job
     *  @param o  The javascript combine function
     *  @return   The 'this' object
     */
    @JSFunction
    public Egg combine(Object o) throws IOException {
        script.serialize(job.getConfiguration(), o, Eggshell.COMBINE_FILE);
        job.setCombinerClass(Payload.Combine.class);
        return this;
    }

    /* non-chainable public methods */

    /** Kill the running job
     */
    @JSFunction
    public void kill() throws IOException {
        job.killJob();
    }

    /** Submit the job to the cluster and return immediately
     */
    @JSFunction
    public void submit() throws IOException, InterruptedException, ClassNotFoundException {
        job.submit();
    }

    /** Submit the job to the cluster and wait for it to finish
     *  @param verbose  Verbose output or not
     *  @return         Job completion sucess
     */
    @JSFunction
    public boolean waitForCompletion(boolean verbose)
            throws IOException, InterruptedException, ClassNotFoundException {
        return job.waitForCompletion(verbose);
    }

    /** Get the user specified job name
     *  @return The job name
     */
    @JSFunction
    public String getName() {
        return job.getJobName();
    }

    /* getter methods */

    /** Get the encapsulated job object
     *  @return The job object
     */
    @JSGetter
    public Job getJob() {
        return job;
    }

    /** Get the pathname of the job's jar
     *  @return The pathname
     */
    @JSGetter
    public String getJar() {
        return job.getJar();
    }

    /** Get the URL for tracking the job's progress
     *  @return The URL
     */
    @JSGetter
    public String getTrackingURL() {
        return job.getTrackingURL();
    }

    /** Check if the job finished or not
     *  @return Job completion status
     */
    @JSGetter
    public Boolean getIsComplete() throws IOException {
        return job.isComplete();
    }

    /** Check if the job finished successfully or not
     *  @return Job success status
     */
    @JSGetter
    public Boolean getIsSuccessful() throws IOException {
        return job.isSuccessful();
    }

    /** Get the progress of the job's map tasks
     *  @return  A progress value between 0.0 and 1.0.
     */
    @JSGetter
    public Double getMapProgress() throws IOException {
        return (double) job.mapProgress();
    }

    /** Get the progress of the job's reduce tasks
     *  @return  A progress value between 0.0 and 1.0.
     */
    @JSGetter
    public Double getReduceProgress() throws IOException {
        return (double) job.reduceProgress();
    }

    /** Get the progress of the job's setup
     *  @return  A progress value between 0.0 and 1.0.
     */
    @JSGetter
    public Double getSetupProgress() throws IOException {
        return (double) job.setupProgress();
    }
}