org.kiji.maven.plugins.hbase.StartMojo.java Source code

Java tutorial

Introduction

Here is the source code for org.kiji.maven.plugins.hbase.StartMojo.java

Source

/**
 * Licensed to WibiData, Inc. under one or more contributor license
 * agreements.  See the NOTICE file distributed with this work for
 * additional information regarding copyright ownership.  WibiData, Inc.
 * licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 */

package org.kiji.maven.plugins.hbase;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.maven.artifact.Artifact;
import org.apache.maven.artifact.DependencyResolutionRequiredException;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.project.MavenProject;

/**
 * A maven goal that starts a mini HBase cluster in a new daemon thread.
 *
 * <p>A new daemon thread is created that starts a mini HBase cluster.  The main thread
 * blocks until the HBase cluster has full started.  The daemon thread with the
 * in-process HBase cluster will continue to run in the background until stopped by the
 * 'stop' goal of the plugin.</p>
 *
 * <p>The configuration of the started mini HBase cluster will be written to a
 * hbase-site.xml file in the test classpath ('${basedir}/target/test-classes' by
 * default).  The path to the generated configuration file may be customized with the
 * 'hbaseSiteFile' property</p>
 *
 * <p>A configuration index can be written by this goal.  The configuration index is a file that
 * contains one line for each configuration file written by this goal, where the line contains the
 * path to the configuration file.  By default, the goal does not write a configuration index.
 * Setting the property 'writeConfIndex' to true will cause a configuration index to be written.
 * By default, the configuration index will be written to
 * '${basedir}/target/test-classes/conf-index.conf'. The path to the generated configuration index
 * can be customized by setting the 'hbaseConfIndex' property.</p>
 *
 * @goal start
 * @phase pre-integration-test
 * @requiresDependencyResolution test
 */
public class StartMojo extends BaseClusterMojo {

    /**
     * The file that will store the configuration required to connect to the started mini HBase
     * cluster.  This file will be generated by the goal.
     *
     * @parameter property="hbaseSiteFile" expression="${hbase.site.file}" default-value="${project.build.testOutputDirectory}/hbase-site.xml"
     * @required
     */
    private File mHBaseSiteFile;

    /**
     * Whether two separate XML configuration files should be generated
     * (one for HDFS+MapReduce, and one for HBase).
     *
     * @parameter property="separateConfigurationFiles" expression="${separate.configuration.files}" default-value="true"
     */
    private boolean mSeparateConfigurationFiles;

    /**
     * The file that will store the configuration required to connect to the started mini HDFS and
     * MapReduce clusters. This file will be generated by the goal.
     *
     * @parameter property="coreSiteFile" expression="${core.site.file}" default-value="${project.build.testOutputDirectory}/core-site.xml"
     * @required
     */
    private File mCoreSiteFile;

    /**
     * If true, this goal should write an index file that provides the paths to the HBase
     * configuration files written by this goal.
     *
     * @parameter property="writeConfIndex" expression="${hbase.conf.index}" default-value="false"
     */
    private boolean mWriteConfIndex;

    /**
     * The file that will store paths to the configuration files generated by the goal.  This file
     * will be generated by the goal and will contain one line for each configuration file giving the
     * path to that configuration file.
     *
     * @parameter property="hbaseConfIndex" expression="${hbase.conf.index.file}" default-value="${project.build.testOutputDirectory}/conf-index.conf"
     */
    private File mHBaseConfIndex;

    /**
     * If true, also start a mini MapReduce cluster.
     *
     * @parameter property="mapReduceEnabled" expression="${mapreduce.enabled}" default-value="false"
     */
    private boolean mIsMapReduceEnabled;

    /**
     * Extra Hadoop configuration properties to use.
     *
     * @parameter property="hadoopConfiguration"
     */
    private Properties mHadoopConfiguration;

    /**
     * A list of this plugin's dependency artifacts.
     *
     * @parameter default-value="${plugin.artifacts}"
     * @required
     * @readonly
     */
    private List<Artifact> mPluginDependencyArtifacts;

    /**
     * The maven project this plugin is running within.
     *
     * @parameter default-value="${project}"
     * @required
     * @readonly
     */
    private MavenProject mMavenProject;

    /**
     * Sets the file that we should write the HBase cluster configuration to.
     *
     * <p>Note: The property "hbaseSiteFile" defined in this mojo means this method must be
     * named setHbaseSiteFile instead of setHBaseSiteFile.</p>
     *
     * @param hbaseSiteFile The file we should write to.
     */
    public void setHbaseSiteFile(File hbaseSiteFile) {
        mHBaseSiteFile = hbaseSiteFile;
    }

    /**
     * Sets whether separate core-site.xml and hbase-site.xml files
     * should be generated (otherwise they are combined into a single
     * hbase-site.xml file).
     *
     * @param separateConfigurationFiles Whether they should be separated.
     */
    public void setSeparateConfigurationFiles(boolean separateConfigurationFiles) {
        mSeparateConfigurationFiles = separateConfigurationFiles;
    }

    /**
     * Sets the file that we should write the MapReduce/HDFS cluster configuration to.
     *
     * @param coreSiteFile The file we should write to.
     */
    public void setCoreSiteFile(File coreSiteFile) {
        mCoreSiteFile = coreSiteFile;
    }

    /**
     * Sets whether this goal should write a configuration index file.
     *
     * @param writeConfIndex True if an index file should be written, false otherwise.
     */
    public void setWriteConfIndex(boolean writeConfIndex) {
        mWriteConfIndex = writeConfIndex;
    }

    /**
     * Sets the file that the HBase configuration index should be written to.
     *
     * <p>Note: The property "hbaseConfIndex" defined in this mojo means this method should be named
     * setHbaseConfIndex.</p>
     *
     * @param hbaseConfIndex The file we should write to.
     */
    public void setHbaseConfIndex(File hbaseConfIndex) {
        mHBaseConfIndex = hbaseConfIndex;
    }

    /**
     * Sets whether we should start a mini MapReduce cluster in addition to the HBase cluster.
     *
     * @param enabled Whether to start a mini MapReduce cluster.
     */
    public void setMapReduceEnabled(boolean enabled) {
        mIsMapReduceEnabled = enabled;
    }

    /**
     * Sets Hadoop configuration properties.
     *
     * @param properties Hadoop configuration properties to use in the mini cluster.
     */
    public void setHadoopConfiguration(Properties properties) {
        mHadoopConfiguration = properties;
    }

    /**
     * Starts a mini HBase cluster in a new thread.
     *
     * <p>This method is called by the maven plugin framework to run the goal.</p>
     *
     * @throws MojoExecutionException If there is a fatal error during this goal's execution.
     */
    @Override
    public void execute() throws MojoExecutionException {
        if (mSkip) {
            getLog().info("Not starting an HBase cluster because skip=true.");
            return;
        }

        System.setProperty("java.class.path", getClassPath());
        getLog().info("Set java.class.path to: " + System.getProperty("java.class.path"));

        // Set any extra hadoop options.
        Configuration conf = new Configuration();
        // ensure that the base HBase properties also get added
        conf.addResource("hbase-default.xml");
        // ignore the version of the users of the running cluster.
        conf.setBoolean("hbase.defaults.for.version.skip", true);
        if (null != mHadoopConfiguration) {
            for (Map.Entry<Object, Object> property : mHadoopConfiguration.entrySet()) {
                String confKey = property.getKey().toString();
                String confValue = property.getValue().toString();
                getLog().info("Setting hadoop conf property '" + confKey + "' to '" + confValue + "'");
                conf.set(confKey, confValue);
            }
        }

        removeHadoopTmpDir(conf);

        // Start the cluster.
        try {
            MiniHBaseClusterSingleton.INSTANCE.startAndWaitUntilReady(getLog(), mIsMapReduceEnabled, conf);
        } catch (IOException e) {
            throw new MojoExecutionException("Unable to start HBase cluster.", e);
        }

        if (mSeparateConfigurationFiles) {
            // Write separate core-site and hbase-site files.
            writeHBaseSiteFile(conf);
            writeCoreSiteFile(conf);
        } else {
            // Combine the configs into a single hbase-site file.
            writeSiteFile(conf, mHBaseSiteFile);
        }

        // Write the configuration index.
        if (mWriteConfIndex) {
            writeConfigurationIndex();
        }
    }

    /**
     * Gets the runtime classpath required to run the mini clusters.
     *
     * <p>The maven classloading scheme is nonstandard.  They only put the "classworlds" jar
     * on the classpath, and it takes care of ClassLoading the rest of the jars.  This a
     * problem if we are going to start a mini MapReduce cluster.  The TaskTracker will
     * start a child JVM with the same classpath as this process, and it won't have
     * configured the classworlds class loader.  To work around this, we will put all of
     * our dependencies into the java.class.path system property, which will be read by
     * the TaskRunner's child JVM launcher to build the child JVM classpath.</p>
     *
     * <p>Note that when we say "all of our dependencies" we mean both the dependencies of
     * this plugin as well as the test classes and dependencies of the project that is
     * running the plugin.  We need to include the latter on the classpath because tests are
     * still just .class files at integration-test-time.  There will be no jars available
     * yet to put on the distributed cache via job.setJarByClass().  Hence, all of the
     * test-classes in the project running this plugin need to already be on the classpath
     * of the MapReduce cluster.<p>
     */
    private String getClassPath() throws MojoExecutionException {
        // Maintain a set of classpath components added so we can de-dupe.
        Set<String> alreadyAddedComponents = new HashSet<String>();

        // Use this to build up the classpath string.
        StringBuilder classpath = new StringBuilder();

        // Add the existing classpath.
        String existingClasspath = System.getProperty("java.class.path");
        classpath.append(existingClasspath);
        alreadyAddedComponents.addAll(Arrays.asList(existingClasspath.split(":")));

        // Add the test classes and dependencies of the maven project running this plugin.
        //
        // Note: It is important that we add these classes and dependencies before we add this
        // plugin's dependencies in case the maven project needs to override a jar version.
        List<?> testClasspathComponents;
        try {
            testClasspathComponents = mMavenProject.getTestClasspathElements();
        } catch (DependencyResolutionRequiredException e) {
            throw new MojoExecutionException("Unable to retrieve project test classpath", e);
        }
        for (Object testClasspathComponent : testClasspathComponents) {
            String dependency = testClasspathComponent.toString();
            if (alreadyAddedComponents.contains(dependency)) {
                continue;
            }
            classpath.append(":");
            classpath.append(dependency);
            alreadyAddedComponents.add(dependency);
        }

        // Add this plugin's dependencies.
        for (Artifact artifact : mPluginDependencyArtifacts) {
            String dependency = artifact.getFile().getPath();
            if (alreadyAddedComponents.contains(dependency)) {
                continue;
            }
            classpath.append(":");
            classpath.append(dependency);
            alreadyAddedComponents.add(dependency);
        }

        return classpath.toString();
    }

    /**
     * Writes the HBase-specific contents of the specified configuration to the HBase site file.
     *
     * @param conf The configuration to write.
     * @throws MojoExecutionException If there is an error writing the file.
     */
    private void writeHBaseSiteFile(Configuration conf) throws MojoExecutionException {
        writeSiteFile(getHBaseOnlyConfiguration(conf), mHBaseSiteFile);
    }

    /**
     * Writes the MapReduce/HDFS-specific contents of the specified configuration to the core
     * site file.
     *
     * @param conf The configuration to write.
     * @throws MojoExecutionException If there is an error writing the file.
     */
    private void writeCoreSiteFile(Configuration conf) throws MojoExecutionException {
        writeSiteFile(getMapReduceOnlyConfiguration(conf), mCoreSiteFile);
    }

    /**
     * Writes the specified configuration to the specified file.
     *
     * @param conf The configuration to write.
     * @param siteFile The file to write the configuration to.
     * @throws MojoExecutionException If there is an error writing the file.
     */
    private void writeSiteFile(Configuration conf, File siteFile) throws MojoExecutionException {
        // Create the parent directory for the site file if it does not already exist.
        createFileParentDir(siteFile);

        // Write the file.
        FileOutputStream fileOutputStream = null;
        try {
            fileOutputStream = new FileOutputStream(siteFile);
            conf.writeXml(fileOutputStream);
        } catch (IOException e) {
            throw new MojoExecutionException("Unable to write to site file: " + siteFile.getPath(), e);
        } finally {
            closeFileOutputStream(fileOutputStream);
        }
        getLog().info("Wrote " + siteFile.getPath() + ".");
    }

    /**
     * Gets a new configuration created from the specified configuration, including only HBase
     * configuration variables.
     *
     * @param conf The configuration to filter.
     * @return A new configuration containing copies of the appropriate configuration variables.
     */
    private Configuration getHBaseOnlyConfiguration(Configuration conf) {
        return getFilteredConfiguration(conf, true);
    }

    /**
     * Gets a new configuration created from the specified configuration, including only
     * MapReduce/HDFS configuration variables.
     *
     * @param conf The configuration to filter.
     * @return A new configuration containing copies of the appropriate configuration variables.
     */
    private Configuration getMapReduceOnlyConfiguration(Configuration conf) {
        return getFilteredConfiguration(conf, false);
    }

    /**
     * Gets a new configuration created from the specified configuration,
     * including only MapReduce/HDFS configuration variables or HBase only configuration variables.
     *
     * @param conf The configuration to filter.
     * @param hBaseOnly <code>true</code> if only HBase configuration variables should be included,
     *     <code>false</code> if only MapReduce/HDFS configuration variables should be included.
     * @return A new configuration with copies of the appropriate configuration variables.
     */
    private Configuration getFilteredConfiguration(Configuration conf, boolean hBaseOnly) {
        Configuration filteredConf = new Configuration(false);
        for (Map.Entry<String, String> entry : conf) {
            boolean startsWithHBase = entry.getKey().startsWith("hbase");
            if ((startsWithHBase && hBaseOnly) || (!startsWithHBase && !hBaseOnly)) {
                filteredConf.set(entry.getKey(), entry.getValue());
            }
        }
        return filteredConf;
    }

    /**
     * Writes a configuration index.
     *
     * @throws MojoExecutionException If there is an error writing the configuration file.
     */
    private void writeConfigurationIndex() throws MojoExecutionException {
        // Create the parent directory of the file we are writing.
        createFileParentDir(mHBaseConfIndex);

        // Write the file.
        FileOutputStream fileOutputStream = null;
        PrintWriter fileWriter = null;
        try {
            fileOutputStream = new FileOutputStream(mHBaseConfIndex);
            fileWriter = new PrintWriter(fileOutputStream);
            fileWriter.println(mHBaseSiteFile.getPath());
        } catch (IOException e) {
            throw new MojoExecutionException(
                    "Unable to write to configuration index file: " + mHBaseConfIndex.getPath(), e);
        } finally {
            if (null != fileWriter) {
                fileWriter.close();
            }
            closeFileOutputStream(fileOutputStream);
        }
        getLog().info("Wrote " + mHBaseConfIndex.getPath() + ".");
    }

    /**
     * Gets the parent directory of the specified file.  Creates the directory if it does not already
     * exist.
     *
     * @return The parent directory.
     * @throws MojoExecutionException If there is an error getting or creating the parent directory.
     */
    private static File createFileParentDir(File file) throws MojoExecutionException {
        File parentDir = file.getParentFile();
        if (null != parentDir && !parentDir.exists() && !parentDir.mkdirs()) {
            throw new MojoExecutionException("Unable to create or access parent directory of: " + file.getParent());
        }
        return parentDir;
    }

    /**
     * Closes the specified FileOutputStream.  The specified stream may be null, in which case this
     * operation is a no-op.
     *
     * @throws MojoExecutionException If there is an error closing the stream.
     */
    private static void closeFileOutputStream(FileOutputStream stream) throws MojoExecutionException {
        if (null != stream) {
            try {
                stream.close();
            } catch (IOException e) {
                throw new MojoExecutionException("Unable to close file stream.", e);
            }
        }
    }
}