com.ibm.jaql.lang.expr.hadoop.NativeMapReduceExpr.java Source code

Introduction

Here is the source code for com.ibm.jaql.lang.expr.hadoop.NativeMapReduceExpr.java
Source

/*
 * Copyright (C) IBM Corp. 2008.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.ibm.jaql.lang.expr.hadoop;

import java.io.File;
import java.io.IOException;
import java.util.EnumMap;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.log4j.Logger;

import com.ibm.jaql.json.schema.Schema;
import com.ibm.jaql.json.schema.SchemaFactory;
import com.ibm.jaql.json.type.BufferedJsonRecord;
import com.ibm.jaql.json.type.JsonBool;
import com.ibm.jaql.json.type.JsonRecord;
import com.ibm.jaql.json.type.JsonString;
import com.ibm.jaql.json.type.JsonType;
import com.ibm.jaql.json.type.JsonValue;
import com.ibm.jaql.lang.core.Context;
import com.ibm.jaql.lang.expr.core.Expr;
import com.ibm.jaql.lang.expr.core.ExprProperty;
import com.ibm.jaql.lang.expr.function.DefaultBuiltInFunctionDescriptor;
import com.ibm.jaql.util.ClassLoaderMgr;

// TODO: add an async option and return a handle. add an additional expression to manage an MR handle.
//       it will be able to do things like kill the job, report on its status, etc.
/**
 * @jaqlDescription launch a natively specified MapReduce job
 * Usage: 
 * { status: boolean } nativeMR( { job conf } conf , { apiVersion: "0.0" | "1.0", useSessionJar: boolean } options );
 * 
 * Launch a stand-alone map-reduce job that is exclusively described by job conf settings.
 * The conf can be obtained using loadJobConf or it can be specified using a record literal
 * that lists the needed name/value pairs for the job. If apiVersion is set to "0.0", then
 * the old Hadoop MapReduce API is used. Otherwise, the new API is used.
 * 
 * The useSessionJar is convenient for those native MapReduce jobs that use jaql libraries.
 * Since the jaql client already packages up jars when submitting jobs to Hadoop's MapReduce,
 * the useSessionJar is used to specify that the job's jar should use the client's currently
 * packaged jar.
 * 
 * @jaqlExample nativeMR( loadJobConf( "myJob.conf" ) );
 * { status: true } 
 */
public class NativeMapReduceExpr extends Expr {
    protected static final Logger LOG = Logger.getLogger(NativeMapReduceExpr.class.getName());

    public static class Descriptor extends DefaultBuiltInFunctionDescriptor.Par12 {
        public Descriptor() {
            super("nativeMR", NativeMapReduceExpr.class);
        }
    }

    public static final JsonString STATUS = new JsonString("status");
    private static final Map<ExprProperty, Boolean> properties = new EnumMap<ExprProperty, Boolean>(
            ExprProperty.class);
    static {
        properties.put(ExprProperty.ALLOW_COMPILE_TIME_COMPUTATION, false);
        properties.put(ExprProperty.HAS_CAPTURES, true);
        properties.put(ExprProperty.HAS_SIDE_EFFECTS, true);
        properties.put(ExprProperty.IS_NONDETERMINISTIC, true);
        properties.put(ExprProperty.READS_EXTERNAL_DATA, true);
    };
    private static final String VERSION_0_0 = "0.0";
    private static final String VERSION_1_0 = "1.0";
    private static final JsonString VERSION_NAME = new JsonString("apiVersion");
    private String apiVersion = VERSION_0_0;

    private static final JsonString USE_SESSION_JAR = new JsonString("useSessionJar");
    private JsonBool useSessionJarDefault = JsonBool.FALSE;

    /**
     * @param exprs
     */
    public NativeMapReduceExpr(Expr[] exprs) {
        super(exprs);
    }

    /* (non-Javadoc)
     * @see com.ibm.jaql.lang.expr.core.Expr#getProperties()
     */
    @Override
    public Map<ExprProperty, Boolean> getProperties() {
        return properties;
    }

    /* (non-Javadoc)
     * @see com.ibm.jaql.lang.expr.core.Expr#getSchema()
     */
    @Override
    public Schema getSchema() {
        return SchemaFactory.recordSchema();
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.ibm.jaql.lang.expr.core.Expr#eval(com.ibm.jaql.lang.core.Context)
     */
    @Override
    public JsonValue eval(Context context) throws Exception {
        // get the conf values from the parameter
        JsonRecord confRec = (JsonRecord) exprs[0].eval(context);

        // get the options (if they exist) and process them
        JsonRecord optsRec = (JsonRecord) exprs[1].eval(context);
        if (optsRec != null) {
            // option that specifies hadoop API version
            JsonValue v = optsRec.get(VERSION_NAME);
            if (v != null && v.getType() == JsonType.STRING) {
                String vs = ((JsonString) v).toString();
                if (vs.equals(VERSION_1_0)) {
                    apiVersion = vs;
                }
            }
            // option that specifies whether jaql's session jar is to be used
            v = optsRec.get(USE_SESSION_JAR);
            if (v != null && v.getType() == JsonType.BOOLEAN) {
                useSessionJarDefault = (JsonBool) v;
            }
        }

        // set up the conf
        Configuration conf = new Configuration();
        for (Map.Entry<JsonString, JsonValue> e : confRec) {
            String k = e.getKey().toString();
            JsonValue val = e.getValue();
            if (!(val instanceof JsonString)) {
                throw new Exception("conf value must be of type String: " + val.getType() + "," + val);
            }
            String v = ((JsonString) val).toString();
            conf.set(k, v);
        }

        if (apiVersion.equals(VERSION_0_0)) {
            return eval_0_0(conf);
        } else {
            return eval_1_0(conf);
        }
    }

    private JsonRecord eval_0_0(Configuration conf) throws Exception {

        JobConf job = new JobConf(conf);
        // set the jar if needed
        if (useSessionJarDefault.get()) {
            File jFile = ClassLoaderMgr.getExtensionJar();
            if (jFile != null) {
                job.setJar(jFile.getAbsolutePath());
            } else {
                job.setJarByClass(NativeMapReduceExpr.class);
            }
        }

        // submit the job
        boolean status = true;
        try {
            //JobClient.runJob(job);
            Util.submitJob(new JsonString(NativeMapReduceExpr.class.getName()), job);
        } catch (IOException e) {
            status = false;
            e.printStackTrace();
            LOG.warn("native map-reduce job failed", e);
        }
        // setup the return value
        BufferedJsonRecord ret = new BufferedJsonRecord();
        ret.add(STATUS, (status) ? JsonBool.TRUE : JsonBool.FALSE);

        return ret;
    }

    private JsonRecord eval_1_0(Configuration conf) throws Exception {

        boolean status = true;

        Job job = null;
        // set the jar if needed
        if (useSessionJarDefault.get()) {
            File jFile = ClassLoaderMgr.getExtensionJar();
            conf.set("mapred.jar", jFile.getAbsolutePath());
            job = new Job(conf);
        } else {
            job = new Job(conf);
            job.setJarByClass(NativeMapReduceExpr.class);
        }

        try {
            job.waitForCompletion(true);
        } catch (Exception e) {
            status = false;
            e.printStackTrace();
            LOG.warn("native map-reduce job failed", e);
        }
        // setup the return value
        BufferedJsonRecord ret = new BufferedJsonRecord();
        ret.add(STATUS, (status) ? JsonBool.TRUE : JsonBool.FALSE);

        return ret;
    }
}