org.apache.phoenix.schema.stats.UpdateStatisticsTool.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.phoenix.schema.stats.UpdateStatisticsTool.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.schema.stats;

import org.antlr.runtime.CharStream;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.metrics.Gauge;
import org.apache.hadoop.hbase.metrics.impl.MetricRegistriesImpl;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.htrace.SpanReceiver;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.mapreduce.util.ConnectionUtil;
import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.MRJobType;
import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
import org.apache.phoenix.util.SchemaUtil;
import org.apache.tephra.TransactionNotInProgressException;
import org.apache.tephra.TransactionSystemClient;
import org.apache.tephra.hbase.coprocessor.TransactionProcessor;
import org.apache.thrift.transport.TTransportException;
import org.apache.twill.common.Cancellable;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.discovery.ZKDiscoveryService;
import org.apache.twill.zookeeper.ZKClient;
import org.joda.time.Chronology;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
import java.sql.Connection;

import static org.apache.phoenix.query.QueryServices.IS_NAMESPACE_MAPPING_ENABLED;
import static org.apache.phoenix.query.QueryServicesOptions.DEFAULT_IS_NAMESPACE_MAPPING_ENABLED;

/**
 * Tool to collect table level statistics on HBase snapshot
 */
public class UpdateStatisticsTool extends Configured implements Tool {

    private static final Logger LOG = LoggerFactory.getLogger(UpdateStatisticsTool.class);

    private static final Option TABLE_NAME_OPTION = new Option("t", "table", true, "Phoenix Table Name");
    private static final Option SNAPSHOT_NAME_OPTION = new Option("s", "snapshot", true, "HBase Snapshot Name");
    private static final Option RESTORE_DIR_OPTION = new Option("d", "restore-dir", true,
            "Restore Directory for HBase snapshot");
    private static final Option RUN_FOREGROUND_OPTION = new Option("runfg", "run-foreground", false,
            "If specified, runs UpdateStatisticsTool in Foreground. Default - Runs the build in background");
    private static final Option MANAGE_SNAPSHOT_OPTION = new Option("ms", "manage-snapshot", false,
            "Creates a new snapshot, runs the tool and deletes it");

    private static final Option HELP_OPTION = new Option("h", "help", false, "Help");

    private String tableName;
    private String snapshotName;
    private Path restoreDir;
    private boolean manageSnapshot;
    private boolean isForeground;

    private Job job;

    @Override
    public int run(String[] args) throws Exception {
        parseArgs(args);
        preJobTask();
        configureJob();
        TableMapReduceUtil.initCredentials(job);
        int ret = runJob();
        postJobTask();
        return ret;
    }

    /**
     * Run any tasks before the MR job is launched
     * Currently being used for snapshot creation
     */
    private void preJobTask() throws Exception {
        if (!manageSnapshot) {
            return;
        }

        try (final Connection conn = ConnectionUtil.getInputConnection(getConf())) {
            Admin admin = conn.unwrap(PhoenixConnection.class).getQueryServices().getAdmin();
            boolean namespaceMapping = getConf().getBoolean(IS_NAMESPACE_MAPPING_ENABLED,
                    DEFAULT_IS_NAMESPACE_MAPPING_ENABLED);
            String physicalTableName = SchemaUtil.getPhysicalTableName(tableName.getBytes(), namespaceMapping)
                    .getNameAsString();
            admin.snapshot(snapshotName, TableName.valueOf(physicalTableName));
            LOG.info("Successfully created snapshot " + snapshotName + " for " + physicalTableName);
        }
    }

    /**
     * Run any tasks before the MR job is completed successfully
     * Currently being used for snapshot deletion
     */
    private void postJobTask() throws Exception {
        if (!manageSnapshot) {
            return;
        }

        try (final Connection conn = ConnectionUtil.getInputConnection(getConf())) {
            Admin admin = conn.unwrap(PhoenixConnection.class).getQueryServices().getAdmin();
            admin.deleteSnapshot(snapshotName);
            LOG.info("Successfully deleted snapshot " + snapshotName);
        }
    }

    void parseArgs(String[] args) {
        CommandLine cmdLine = null;
        try {
            cmdLine = parseOptions(args);
        } catch (IllegalStateException e) {
            printHelpAndExit(e.getMessage(), getOptions());
        }

        if (getConf() == null) {
            setConf(HBaseConfiguration.create());
        }

        tableName = cmdLine.getOptionValue(TABLE_NAME_OPTION.getOpt());
        snapshotName = cmdLine.getOptionValue(SNAPSHOT_NAME_OPTION.getOpt());
        if (snapshotName == null) {
            snapshotName = "UpdateStatisticsTool_" + tableName + "_" + System.currentTimeMillis();
        }

        String restoreDirOptionValue = cmdLine.getOptionValue(RESTORE_DIR_OPTION.getOpt());
        if (restoreDirOptionValue == null) {
            restoreDirOptionValue = getConf().get(FS_DEFAULT_NAME_KEY) + "/tmp";
        }

        restoreDir = new Path(restoreDirOptionValue);
        manageSnapshot = cmdLine.hasOption(MANAGE_SNAPSHOT_OPTION.getOpt());
        isForeground = cmdLine.hasOption(RUN_FOREGROUND_OPTION.getOpt());
    }

    private void configureJob() throws Exception {
        job = Job.getInstance(getConf(), "UpdateStatistics-" + tableName + "-" + snapshotName);
        PhoenixMapReduceUtil.setInput(job, NullDBWritable.class, snapshotName, tableName, restoreDir);

        PhoenixConfigurationUtil.setMRJobType(job.getConfiguration(), MRJobType.UPDATE_STATS);

        // DO NOT allow mapper splits using statistics since it may result into many smaller chunks
        PhoenixConfigurationUtil.setSplitByStats(job.getConfiguration(), false);

        job.setJarByClass(UpdateStatisticsTool.class);
        job.setMapperClass(TableSnapshotMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), PhoenixConnection.class,
                Chronology.class, CharStream.class, TransactionSystemClient.class,
                TransactionNotInProgressException.class, ZKClient.class, DiscoveryServiceClient.class,
                ZKDiscoveryService.class, Cancellable.class, TTransportException.class, SpanReceiver.class,
                TransactionProcessor.class, Gauge.class, MetricRegistriesImpl.class);
        LOG.info("UpdateStatisticsTool running for: " + tableName + " on snapshot: " + snapshotName
                + " with restore dir: " + restoreDir);
    }

    private int runJob() {
        try {
            if (isForeground) {
                LOG.info("Running UpdateStatisticsTool in Foreground. "
                        + "Runs full table scans. This may take a long time!");
                return (job.waitForCompletion(true)) ? 0 : 1;
            } else {
                LOG.info("Running UpdateStatisticsTool in Background - Submit async and exit");
                job.submit();
                return 0;
            }
        } catch (Exception e) {
            LOG.error("Caught exception " + e + " trying to update statistics.");
            return 1;
        }
    }

    private void printHelpAndExit(String errorMessage, Options options) {
        System.err.println(errorMessage);
        printHelpAndExit(options, 1);
    }

    private void printHelpAndExit(Options options, int exitCode) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("help", options);
        System.exit(exitCode);
    }

    /**
     * Parses the commandline arguments, throws IllegalStateException if mandatory arguments are
     * missing.
     * @param args supplied command line arguments
     * @return the parsed command line
     */
    CommandLine parseOptions(String[] args) {

        final Options options = getOptions();

        CommandLineParser parser = new PosixParser();
        CommandLine cmdLine = null;
        try {
            cmdLine = parser.parse(options, args);
        } catch (ParseException e) {
            printHelpAndExit("Error parsing command line options: " + e.getMessage(), options);
        }

        if (cmdLine.hasOption(HELP_OPTION.getOpt())) {
            printHelpAndExit(options, 0);
        }

        if (!cmdLine.hasOption(TABLE_NAME_OPTION.getOpt())) {
            throw new IllegalStateException(TABLE_NAME_OPTION.getLongOpt() + " is a mandatory " + "parameter");
        }

        if (cmdLine.hasOption(MANAGE_SNAPSHOT_OPTION.getOpt())
                && !cmdLine.hasOption(RUN_FOREGROUND_OPTION.getOpt())) {
            throw new IllegalStateException("Snapshot cannot be managed if job is running in background");
        }

        return cmdLine;
    }

    private Options getOptions() {
        final Options options = new Options();
        options.addOption(TABLE_NAME_OPTION);
        options.addOption(SNAPSHOT_NAME_OPTION);
        options.addOption(HELP_OPTION);
        options.addOption(RESTORE_DIR_OPTION);
        options.addOption(RUN_FOREGROUND_OPTION);
        options.addOption(MANAGE_SNAPSHOT_OPTION);
        return options;
    }

    public Job getJob() {
        return job;
    }

    public String getSnapshotName() {
        return snapshotName;
    }

    public Path getRestoreDir() {
        return restoreDir;
    }

    /**
     * Empty Mapper class since stats collection happens as part of scanner object
     */
    public static class TableSnapshotMapper
            extends Mapper<NullWritable, NullDBWritable, NullWritable, NullWritable> {

        @Override
        protected void map(NullWritable key, NullDBWritable value, Context context) {
        }
    }

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run(new UpdateStatisticsTool(), args);
        System.exit(result);
    }
}