com.ibm.jaql.MiniCluster.java Source code

Introduction

Here is the source code for com.ibm.jaql.MiniCluster.java
Source

/*
 * Copyright (C) IBM Corp. 2010.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.ibm.jaql;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.MiniMRCluster;

/**
 * Start up a hadoop minicluster, which runs all servers inside a single JVM.
 * 
 * To access the hadoop webapps, it requires that the hadoop webapps directory
 * be in the classpath.  It _should_ work with them in the hadoop .jar file, but
 * because Jetty doesn't unpack the directory properly without a trailing '/'
 * on the name (in particular, Hadoop sets up webapps/hdfs instead of webapps/hdfs/,
 * and jetty then unpacks into a file $TEMP/.../webapps/hdfs instead of
 * $TEMP/.../.  Then it cannot find the WEB-INF/web.xml file off the root, so it
 * cannot run the hadoop JSPs.
 */
public class MiniCluster {
    public static String tempDir = "/temp";
    public static final Log LOG = LogFactory.getLog(MiniCluster.class.getName());

    protected Configuration conf;
    protected MiniDFSCluster dfsCluster;
    protected MiniMRCluster mrCluster;

    /**
     * @param args
     */
    public static void main(String[] args) throws IOException {
        String clusterHome = System.getProperty("hadoop.minicluster.dir");
        if (clusterHome == null) {
            clusterHome = "./minicluster";
            System.setProperty("hadoop.minicluster.dir", clusterHome);
        }
        LOG.info("hadoop.minicluster.dir=" + clusterHome);
        File clusterFile = new File(clusterHome);
        if (!clusterFile.exists()) {
            clusterFile.mkdirs();
        }
        if (!clusterFile.isDirectory()) {
            throw new IOException("minicluster home directory must be a directory: " + clusterHome);
        }
        if (!clusterFile.canRead() || !clusterFile.canWrite()) {
            throw new IOException("minicluster home directory must be readable and writable: " + clusterHome);
        }

        String logDir = System.getProperty("hadoop.log.dir");
        if (logDir == null) {
            logDir = clusterHome + File.separator + "logs";
            System.setProperty("hadoop.log.dir", logDir);
        }
        File logFile = new File(logDir);
        if (!logFile.exists()) {
            logFile.mkdirs();
        }

        String confDir = System.getProperty("hadoop.conf.override");
        if (confDir == null) {
            confDir = clusterHome + File.separator + "conf";
            System.setProperty("hadoop.conf.override", confDir);
        }
        File confFile = new File(confDir);
        if (!confFile.exists()) {
            confFile.mkdirs();
        }

        System.out.println("starting minicluster in " + clusterHome);
        MiniCluster mc = new MiniCluster(args);
        // To find the ports in the 
        // hdfs: search for: Web-server up at: localhost:####
        // mapred: search for: mapred.JobTracker: JobTracker webserver: ####
        Configuration conf = mc.getConf();
        System.out.println("fs.default.name: " + conf.get("fs.default.name"));
        System.out.println("dfs.http.address: " + conf.get("dfs.http.address"));
        System.out.println("mapred.job.tracker.http.address: " + conf.get("mapred.job.tracker.http.address"));

        boolean waitForInterrupt;
        try {
            System.out.println("press enter to end minicluster (or eof to run forever)...");
            waitForInterrupt = System.in.read() < 0; // wait for any input or eof
        } catch (Exception e) {
            // something odd happened.  Just shutdown. 
            LOG.error("error reading from stdin", e);
            waitForInterrupt = false;
        }

        // eof means that we will wait for a kill signal
        while (waitForInterrupt) {
            System.out.println("minicluster is running until interrupted...");
            try {
                Thread.sleep(60 * 60 * 1000);
            } catch (InterruptedException e) {
                waitForInterrupt = false;
            }
        }

        System.out.println("shutting down minicluster...");
        try {
            mc.tearDown();
        } catch (Exception e) {
            LOG.error("error while shutting down minicluster", e);
        }
    }

    private Configuration getConf() {
        return conf;
    }

    public MiniCluster(String[] args) throws IOException {
        setUp();
        Runtime.getRuntime().addShutdownHook(new Thread() {
            @Override
            public void run() {
                try {
                } catch (Exception e) {
                    LOG.error("failure during minicluster teardown", e);
                }
            }
        });
    }

    /*
     * (non-Javadoc)
     *
     * @see com.ibm.jaql.lang.JaqlBaseTestCase#setUp()
     */
    protected void setUp() throws IOException {
        final int numNodes = 1;

        conf = new Configuration();

        if (System.getProperty("os.name").startsWith("Windows")) {
            // There is a bug in hadoop 0.20.1 on windows
            // ... INFO mapred.JobClient: Task Id : attempt_..., Status : FAILED
            // java.io.FileNotFoundException: 
            //    File C:/tmp/hadoop-xxx/mapred/local/1_0/taskTracker/jobcache/job_xxx/attempt_xxx/0_2/work/tmp 
            //    does not exist.
            // at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:361)
            // at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245)
            // at org.apache.hadoop.mapred.TaskRunner.setupWorkDir(TaskRunner.java:519)
            // at org.apache.hadoop.mapred.Child.main(Child.java:155)
            //
            // The following is reported to work around the problem
            String tmp = conf.get("hadoop.tmp.dir", "c:/temp");
            conf.set("mapred.child.tmp", tmp + "/mapred/child.tmp");
        }

        dfsCluster = new MiniDFSCluster(conf, numNodes, true, null);
        mrCluster = new MiniMRCluster(numNodes, dfsCluster.getFileSystem().getUri().getAuthority(), 1);
        setupOverride(mrCluster.createJobConf(), conf);

        // this.conf = conf = new Configuration();
        //    FileSystem fs = FileSystem.get(conf);

        //    // make the home directory if it does not exist
        //    Path hd = fs.getWorkingDirectory();
        //    if (!fs.exists(hd)) fs.mkdirs(hd);
        //
        //    // make a tmp directory if it does not exist
        //    Path t = new Path(tempDir);
        //    if (!fs.exists(t))
        //    {
        //      fs.mkdirs(t);
        //    }
    }

    /*
     * (non-Javadoc)
     *
     * @see com.ibm.jaql.lang.JaqlBaseTestCase#tearDown()
     */
    protected void tearDown() throws IOException {
        if (mrCluster != null)
            mrCluster.shutdown();
        if (dfsCluster != null) {
            dfsCluster.getFileSystem().close();
            dfsCluster.shutdown();
        }
    }

    /**
     * @param jconf
     * @param conf
     * @throws IOException
     */
    protected void setupOverride(Configuration jconf, Configuration conf) throws IOException {
        // require that override dir exists
        File overrideDir = new File(System.getProperty("hadoop.conf.override"));
        if (!overrideDir.exists()) {
            throw new IOException("hadoop-override dir must exist");
        }

        // write out the JobConf from MiniMR to the override dir
        jconf.writeXml(System.out);
        System.out.println();
        System.out.flush();
        conf.set("mapred.job.tracker", jconf.get("mapred.job.tracker", null));
        String name = "mapred.job.tracker.info.port";
        String addr = jconf.get(name, null);
        if (addr == null) {
            name = "mapred.job.tracker.http.address";
            addr = jconf.get(name, null);
        }
        conf.set(name, addr);

        //    this.dfsCluster.getNameNode().get.getHttpAddress()

        String path = overrideDir.getCanonicalPath() + File.separator;
        System.out.println("writing conf to: " + path);
        OutputStream outCore = new FileOutputStream(path + "core-default.xml");
        OutputStream outMapred = new FileOutputStream(path + "mapred-default.xml");
        OutputStream outHdfs = new FileOutputStream(path + "hdfs-default.xml");
        conf.writeXml(outCore);
        conf.writeXml(outMapred);
        conf.writeXml(outHdfs);
        outCore.close();
        outMapred.close();
        outHdfs.close();
        System.out.println("setup complete");
        System.out.flush();
    }
}