tajo.TajoTestingUtility.java Source code

Java tutorial

Introduction

Here is the source code for tajo.TajoTestingUtility.java

Source

/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package tajo;

import com.google.common.base.Charsets;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import tajo.catalog.*;
import tajo.catalog.proto.CatalogProtos;
import tajo.client.TajoClient;
import tajo.conf.TajoConf;
import tajo.conf.TajoConf.ConfVars;
import tajo.zookeeper.MiniZooKeeperCluster;

import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.net.InetSocketAddress;
import java.sql.ResultSet;
import java.util.UUID;

public class TajoTestingUtility {
    private static Log LOG = LogFactory.getLog(TajoTestingUtility.class);
    private TajoConf conf;

    /**
     * Set if we were passed a zkCluster.  If so, we won't shutdown zk as
     * part of general shutdown.
     */
    private boolean passedZkCluster = false;

    private FileSystem defaultFS = null;
    private MiniDFSCluster dfsCluster;
    private MiniZooKeeperCluster zkCluster = null;
    private MiniTajoCluster tajoCluster;
    private MiniCatalogServer catalogCluster;

    // If non-null, then already a cluster running.
    private File clusterTestBuildDir = null;

    /**
     * System property key to get test directory value.
     * Name is as it is because mini dfs has hard-codings to put test data here.
     */
    public static final String TEST_DIRECTORY_KEY = "test.build.data";

    /**
     * Default parent directory for test output.
     */
    public static final String DEFAULT_TEST_DIRECTORY = "target/test-data";

    public TajoTestingUtility() {
        this.conf = new TajoConf();
    }

    public TajoConf getConfiguration() {
        return this.conf;
    }

    public void initTestDir() {
        if (System.getProperty(TEST_DIRECTORY_KEY) == null) {
            clusterTestBuildDir = setupClusterTestBuildDir();
            System.setProperty(TEST_DIRECTORY_KEY, clusterTestBuildDir.getAbsolutePath());
        }
    }

    /**
     * @return Where to write test data on local filesystem; usually
     * {@link #DEFAULT_TEST_DIRECTORY}
     * @see #setupClusterTestBuildDir()
     */
    public static File getTestDir() {
        return new File(System.getProperty(TEST_DIRECTORY_KEY, DEFAULT_TEST_DIRECTORY));
    }

    /**
     * @param subdirName
     * @return Path to a subdirectory named <code>subdirName</code> under
     * {@link #getTestDir()}.
     * @see #setupClusterTestBuildDir()
     */
    public static File getTestDir(final String subdirName) {
        return new File(getTestDir(), subdirName);
    }

    public File setupClusterTestBuildDir() {
        String randomStr = UUID.randomUUID().toString();
        String dirStr = getTestDir(randomStr).toString();
        File dir = new File(dirStr).getAbsoluteFile();
        // Have it cleaned up on exit
        dir.deleteOnExit();
        return dir;
    }

    ////////////////////////////////////////////////////////
    // HDFS Section
    ////////////////////////////////////////////////////////
    /**
     * Start a minidfscluster.
     * @param servers How many DNs to start.
     * @throws Exception
     * @see {@link #shutdownMiniDFSCluster()}
     * @return The mini dfs cluster created.
     */
    public MiniDFSCluster startMiniDFSCluster(int servers) throws Exception {
        return startMiniDFSCluster(servers, null, null);
    }

    /**
     * Start a minidfscluster.
     * Can only create one.
     * @param servers How many DNs to start.
     * @param dir Where to home your dfs cluster.
     * @param hosts hostnames DNs to run on.
     * @throws Exception
     * @see {@link #shutdownMiniDFSCluster()}
     * @return The mini dfs cluster created.
     * @throws IOException
     */
    public MiniDFSCluster startMiniDFSCluster(int servers, final File dir, final String hosts[])
            throws IOException {
        // This does the following to home the minidfscluster
        // base_dir = new File(System.getProperty("test.build.data", "build/test/data"), "dfs/");
        // Some tests also do this:
        //  System.getProperty("test.cache.data", "build/test/cache");
        if (dir == null) {
            this.clusterTestBuildDir = setupClusterTestBuildDir();
        } else {
            this.clusterTestBuildDir = dir;
        }

        System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.toString());
        System.setProperty("test.cache.data", this.clusterTestBuildDir.toString());
        this.dfsCluster = new MiniDFSCluster(0, this.conf, servers, true, true, true, null, null, hosts, null);
        // Set this just-started cluser as our filesystem.
        this.defaultFS = this.dfsCluster.getFileSystem();
        this.conf.set("fs.defaultFS", defaultFS.getUri().toString());
        // Do old style too just to be safe.
        this.conf.set("fs.default.name", defaultFS.getUri().toString());

        return this.dfsCluster;
    }

    public void shutdownMiniDFSCluster() throws Exception {
        if (this.dfsCluster != null) {
            // The below throws an exception per dn, AsynchronousCloseException.
            this.dfsCluster.shutdown();
        }
    }

    public boolean isRunningDFSCluster() {
        return this.defaultFS != null;
    }

    public MiniDFSCluster getMiniDFSCluster() {
        return this.dfsCluster;
    }

    public FileSystem getDefaultFileSystem() {
        return this.defaultFS;
    }

    ////////////////////////////////////////////////////////
    // Zookeeper Section
    ////////////////////////////////////////////////////////
    public MiniZooKeeperCluster startMiniZKCluster() throws Exception {
        return startMiniZKCluster(1);
    }

    public MiniZooKeeperCluster startMiniZKCluster(int zookeeperServerNum) throws Exception {
        return startMiniZKCluster(setupClusterTestBuildDir(), zookeeperServerNum);
    }

    private MiniZooKeeperCluster startMiniZKCluster(final File dir) throws Exception {
        return startMiniZKCluster(dir, 1);
    }

    private MiniZooKeeperCluster startMiniZKCluster(final File dir, int zookeeperServerNum) throws Exception {
        this.passedZkCluster = false;
        if (this.zkCluster != null) {
            throw new IOException("Zoookeeper Cluster already running at " + dir);
        }
        this.zkCluster = new MiniZooKeeperCluster();
        int clientPort = this.zkCluster.startup(dir, zookeeperServerNum);
        this.conf.setVar(ConfVars.ZOOKEEPER_ADDRESS, "127.0.0.1:" + clientPort);

        return this.zkCluster;
    }

    public MiniZooKeeperCluster getZKCluster() {
        return this.zkCluster;
    }

    public void shutdownMiniZKCluster() throws IOException {
        if (this.zkCluster != null) {
            this.zkCluster.shutdown();
            this.zkCluster = null;
        }
    }

    ////////////////////////////////////////////////////////
    // Catalog Section
    ////////////////////////////////////////////////////////
    public MiniCatalogServer startCatalogCluster() throws Exception {
        TajoConf c = getConfiguration();
        c.setVar(ConfVars.CATALOG_ADDRESS, "localhost:0");

        if (clusterTestBuildDir == null) {
            clusterTestBuildDir = setupClusterTestBuildDir();
        }
        String testDir = clusterTestBuildDir.getAbsolutePath();

        conf.set(TConstants.JDBC_URI, "jdbc:derby:" + testDir + "/db");
        LOG.info("derby repository is set to " + conf.get(TConstants.JDBC_URI));

        this.catalogCluster = new MiniCatalogServer(conf);
        CatalogServer catServer = this.catalogCluster.getCatalogServer();
        InetSocketAddress sockAddr = catServer.getBindAddress();
        c.setVar(ConfVars.CATALOG_ADDRESS, sockAddr.getHostName() + ":" + sockAddr.getPort());

        return this.catalogCluster;
    }

    public void shutdownCatalogCluster() {
        this.catalogCluster.shutdown();
    }

    public MiniCatalogServer getMiniCatalogCluster() {
        return this.catalogCluster;
    }

    ////////////////////////////////////////////////////////
    // Tajo Cluster Section
    ////////////////////////////////////////////////////////
    private MiniTajoCluster startMiniTajoCluster(File testBuildDir, final int numSlaves, boolean local)
            throws Exception {
        TajoConf c = getConfiguration();
        c.setVar(ConfVars.MASTER_ADDRESS, "localhost:0");
        c.setVar(ConfVars.CATALOG_ADDRESS, "localhost:0");
        conf.set(TConstants.JDBC_URI, "jdbc:derby:" + clusterTestBuildDir.getAbsolutePath() + "/db");
        LOG.info("derby repository is set to " + conf.get(TConstants.JDBC_URI));
        if (!local) {
            c.setVar(ConfVars.ENGINE_BASE_DIR, getMiniDFSCluster().getFileSystem().getUri() + "/tajo");
        } else {
            c.setVar(ConfVars.ENGINE_BASE_DIR, clusterTestBuildDir.getAbsolutePath() + "/tajo");
        }
        c.setVar(ConfVars.WORKER_BASE_DIR, clusterTestBuildDir + "/worker");
        c.setVar(ConfVars.WORKER_TMP_DIR, clusterTestBuildDir + "/worker/tmp");
        this.tajoCluster = new MiniTajoCluster(c, numSlaves);

        this.conf.setVar(ConfVars.MASTER_ADDRESS, c.getVar(ConfVars.MASTER_ADDRESS));
        this.conf.setVar(ConfVars.CATALOG_ADDRESS, c.getVar(ConfVars.CATALOG_ADDRESS));

        LOG.info("Mini Tajo cluster is up");
        return this.tajoCluster;
    }

    public void restartTajoCluster(int numSlaves) throws Exception {
        this.tajoCluster.shutdown();
        this.tajoCluster = new MiniTajoCluster(new TajoConf(this.conf), numSlaves);

        LOG.info("Minicluster has been restarted");
    }

    public MiniTajoCluster getMiniTajoCluster() {
        return this.tajoCluster;
    }

    public void shutdownMiniTajoCluster() {
        if (this.tajoCluster != null) {
            this.tajoCluster.shutdown();
            this.tajoCluster.join();
        }
        this.tajoCluster = null;
    }

    ////////////////////////////////////////////////////////
    // Meta Cluster Section
    ////////////////////////////////////////////////////////
    /**
     * @throws IOException If a cluster -- dfs or engine -- already running.
     */
    void isRunningCluster(String passedBuildPath) throws IOException {
        if (this.clusterTestBuildDir == null || passedBuildPath != null)
            return;
        throw new IOException("Cluster already running at " + this.clusterTestBuildDir);
    }

    /**
     * This method starts up a tajo cluster with a given number of clusters in
     * distributed mode.
     *
     * @param numSlaves the number of tajo cluster to start up
     * @return a mini tajo cluster
     * @throws Exception
     */
    public MiniTajoCluster startMiniCluster(final int numSlaves) throws Exception {
        return startMiniCluster(numSlaves, null);
    }

    public MiniTajoCluster startMiniCluster(final int numSlaves, final String[] dataNodeHosts) throws Exception {
        // the conf is set to the distributed mode.
        this.conf.setBoolVar(ConfVars.CLUSTER_DISTRIBUTED, true);

        int numDataNodes = numSlaves;
        if (dataNodeHosts != null && dataNodeHosts.length != 0) {
            numDataNodes = dataNodeHosts.length;
        }

        LOG.info("Starting up minicluster with 1 master(s) and " + numSlaves + " worker(s) and " + numDataNodes
                + " datanode(s)");

        // If we already put up a cluster, fail.
        String testBuildPath = conf.get(TEST_DIRECTORY_KEY, null);
        isRunningCluster(testBuildPath);
        if (testBuildPath != null) {
            LOG.info("Using passed path: " + testBuildPath);
        }

        // Make a new random dir to home everything in.  Set it as system property.
        // minidfs reads home from system property.
        this.clusterTestBuildDir = testBuildPath == null ? setupClusterTestBuildDir() : new File(testBuildPath);

        System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.getAbsolutePath());

        startMiniDFSCluster(numDataNodes, this.clusterTestBuildDir, dataNodeHosts);
        this.dfsCluster.waitClusterUp();

        // Start up a zk cluster.
        if (this.zkCluster == null) {
            startMiniZKCluster(this.clusterTestBuildDir);
        }
        // TODO: to be fixed
        /*
          if (this.catalogCluster == null) {
            startCatalogCluster();
          }
          */

        return startMiniTajoCluster(this.clusterTestBuildDir, numSlaves, false);
    }

    public MiniTajoCluster startMiniClusterInLocal(final int numSlaves) throws Exception {
        // the conf is set to the distributed mode.
        this.conf.setBoolVar(ConfVars.CLUSTER_DISTRIBUTED, true);

        // If we already put up a cluster, fail.
        String testBuildPath = conf.get(TEST_DIRECTORY_KEY, null);
        isRunningCluster(testBuildPath);
        if (testBuildPath != null) {
            LOG.info("Using passed path: " + testBuildPath);
        }

        // Make a new random dir to home everything in.  Set it as system property.
        // minidfs reads home from system property.
        this.clusterTestBuildDir = testBuildPath == null ? setupClusterTestBuildDir() : new File(testBuildPath);

        System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.getAbsolutePath());

        // Start up a zk cluster.
        if (this.zkCluster == null) {
            startMiniZKCluster(this.clusterTestBuildDir);
        }

        return startMiniTajoCluster(this.clusterTestBuildDir, numSlaves, true);
    }

    public void shutdownMiniCluster() throws IOException {
        LOG.info("Shutting down minicluster");
        shutdownMiniTajoCluster();

        if (!this.passedZkCluster)
            shutdownMiniZKCluster();
        if (this.dfsCluster != null) {
            this.dfsCluster.shutdown();
        }

        if (this.clusterTestBuildDir != null && this.clusterTestBuildDir.exists()) {
            LocalFileSystem localFS = LocalFileSystem.getLocal(conf);
            localFS.delete(new Path(clusterTestBuildDir.toString()), true);
            this.clusterTestBuildDir = null;
        }
        if (this.catalogCluster != null) {
            shutdownCatalogCluster();
        }

        LOG.info("Minicluster is down");
    }

    public static ResultSet runInLocal(String[] tableNames, Schema[] schemas, Options option, String[][] tables,
            String query) throws Exception {
        TajoTestingUtility util = new TajoTestingUtility();
        util.startMiniClusterInLocal(1);
        TajoConf conf = util.getConfiguration();
        TajoClient client = new TajoClient(conf);

        File tmpDir = util.setupClusterTestBuildDir();
        for (int i = 0; i < tableNames.length; i++) {
            File tableDir = new File(tmpDir, tableNames[i]);
            tableDir.mkdirs();
            File dataDir = new File(tableDir, "data");
            dataDir.mkdirs();
            File tableFile = new File(dataDir, tableNames[i]);
            writeLines(tableFile, tables[i]);
            TableMeta meta = TCatUtil.newTableMeta(schemas[i], CatalogProtos.StoreType.CSV, option);
            client.createTable(tableNames[i], new Path(tableDir.getAbsolutePath()), meta);
        }
        Thread.sleep(1000);
        ResultSet res = client.executeQuery(query);
        util.shutdownMiniCluster();
        return res;
    }

    public static ResultSet run(String[] names, String[] tablepaths, Schema[] schemas, Options option, String query)
            throws Exception {
        TajoTestingUtility util = new TajoTestingUtility();
        util.startMiniCluster(1);
        TajoConf conf = util.getConfiguration();
        TajoClient client = new TajoClient(conf);

        FileSystem fs = util.getDefaultFileSystem();
        Path rootDir = util.getMiniTajoCluster().getMaster().getStorageManager().getDataRoot();
        fs.mkdirs(rootDir);
        for (int i = 0; i < tablepaths.length; i++) {
            Path localPath = new Path(tablepaths[i]);
            Path tablePath = new Path(rootDir, names[i]);
            fs.mkdirs(tablePath);
            Path dataPath = new Path(tablePath, "data");
            fs.mkdirs(dataPath);
            Path dfsPath = new Path(dataPath, localPath.getName());
            fs.copyFromLocalFile(localPath, dfsPath);
            TableMeta meta = TCatUtil.newTableMeta(schemas[i], CatalogProtos.StoreType.CSV, option);
            client.createTable(names[i], tablePath, meta);
        }
        Thread.sleep(1000);
        ResultSet res = client.executeQuery(query);
        util.shutdownMiniCluster();
        return res;
    }

    public static ResultSet run(String[] names, Schema[] schemas, Options option, String[][] tables, String query)
            throws Exception {
        TajoTestingUtility util = new TajoTestingUtility();
        util.startMiniCluster(1);
        TajoConf conf = util.getConfiguration();
        TajoClient client = new TajoClient(conf);

        FileSystem fs = util.getDefaultFileSystem();
        Path rootDir = util.getMiniTajoCluster().getMaster().getStorageManager().getDataRoot();
        fs.mkdirs(rootDir);
        for (int i = 0; i < names.length; i++) {
            Path tablePath = new Path(rootDir, names[i]);
            fs.mkdirs(tablePath);
            Path dataPath = new Path(tablePath, "data");
            fs.mkdirs(dataPath);
            Path dfsPath = new Path(dataPath, names[i] + ".tbl");
            FSDataOutputStream out = fs.create(dfsPath);
            for (int j = 0; j < tables[i].length; j++) {
                out.write((tables[i][j] + "\n").getBytes());
            }
            out.close();
            TableMeta meta = TCatUtil.newTableMeta(schemas[i], CatalogProtos.StoreType.CSV, option);
            client.createTable(names[i], tablePath, meta);
        }
        Thread.sleep(1000);
        ResultSet res = client.executeQuery(query);
        util.shutdownMiniCluster();
        return res;
    }

    /**
    * Write lines to a file.
    *
    * @param file File to write lines to
    * @param lines Strings written to the file
    * @throws IOException
    */
    private static void writeLines(File file, String... lines) throws IOException {
        Writer writer = Files.newWriter(file, Charsets.UTF_8);
        try {
            for (String line : lines) {
                writer.write(line);
                writer.write('\n');
            }
        } finally {
            Closeables.closeQuietly(writer);
        }
    }

    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        TajoTestingUtility cluster = new TajoTestingUtility();
        File f = cluster.setupClusterTestBuildDir();
        System.out.println("first setupClusterTestBuildDir: " + f);
        f = cluster.setupClusterTestBuildDir();
        System.out.println("second setupClusterTestBuildDir: " + f);
        f = cluster.getTestDir();
        System.out.println("getTestDir() after second: " + f);
        f = cluster.getTestDir("abc");
        System.out.println("getTestDir(\"abc\") after second: " + f);

        cluster.initTestDir();
        f = cluster.getTestDir();
        System.out.println("getTestDir() after initTestDir: " + f);
        f = cluster.getTestDir("abc");
        System.out.println("getTestDir(\"abc\") after initTestDir: " + f);
        f = cluster.setupClusterTestBuildDir();
        System.out.println("setupClusterTestBuildDir() after initTestDir: " + f);

        TajoTestingUtility cluster2 = new TajoTestingUtility();
        File f2 = cluster2.setupClusterTestBuildDir();
        System.out.println("first setupClusterTestBuildDir of cluster2: " + f2);
        /*
        String [] names = {"table1"};
        String [][] tables = new String[1][];
        tables[0] = new String[] {"a,b,c", "b,c,d"};
            
        Schema [] schemas = new Schema[1];
        schemas[0] = new Schema()
              .addColumn("f1", CatalogProtos.DataType.STRING)
              .addColumn("f2", CatalogProtos.DataType.STRING)
              .addColumn("f3", CatalogProtos.DataType.STRING);
            
        ResultSet res = runInLocal(names, schemas, tables, "select f1 from table1");
        res.next();
        System.out.println(res.getString(0));
        res.next();
        System.out.println(res.getString(0));
        System.exit(0);
        */
    }
}