com.cloudera.accumulo.upgrade.util.MapreduceInputCli.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.accumulo.upgrade.util.MapreduceInputCli.java

Source

/*
 * Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */
package com.cloudera.accumulo.upgrade.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.Set;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Range;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.log4j.Logger;

import com.beust.jcommander.Parameter;

public class MapreduceInputCli {

    private static final Logger log = Logger.getLogger(MapreduceInputCli.class);

    public MapreduceInputCli(ConnectionCli connection) {
        this.connection = connection;
    }

    protected static ConnectionCli connection;

    @Parameter(names = {
            "--offline" }, required = false, description = "When given, we'll clone the tables we're going to scan, then take the clones offline and read HDFS files directly.")
    public boolean offlineMode = false;

    @Parameter(names = {
            "--max-maps" }, required = false, description = "Set a maximum number of map slots to use. Default: no maximum.")
    public int maxMaps = -1;

    /**
     * Relies on a command line arg to specify offline mode, so you should always call close if using this version.
     */
    public void useAccumuloInputFormat(Job job, String table) throws IOException, AccumuloException,
            AccumuloSecurityException, TableExistsException, TableNotFoundException {
        useAccumuloInputFormat(job, table, offlineMode);
    }

    List<String> clones = new ArrayList<String>();

    /**
     * Iff you use offline mode, you have to call close() when you're done or manually delete the cloned table yourself.
     */
    public void useAccumuloInputFormat(Job job, String table, boolean offline) throws IOException,
            AccumuloException, AccumuloSecurityException, TableExistsException, TableNotFoundException {
        job.setInputFormatClass(AccumuloInputFormat.class);
        /* XXX Need to use a method that exists in 1.4 adn 1.5  :( */
        Configuration configuration = job.getConfiguration();
        AccumuloInputFormat.setZooKeeperInstance(job, connection.instance, connection.zookeepers);
        final TableOperations ops = connection.getConnector().tableOperations();

        String scan = table;
        if (offline) {
            Random random = new Random();
            scan = table + "_" + String.format("%016x", Math.abs(random.nextLong()));
            ops.clone(table, scan, true, Collections.<String, String>emptyMap(), Collections.<String>emptySet());
            try {
                ops.offline(scan);
            } finally {
                clones.add(scan);
            }
            AccumuloInputFormat.setOfflineTableScan(job, true);
        }

        PasswordToken token = new PasswordToken(connection.password);

        AccumuloInputFormat.setConnectorInfo(job, connection.principal, token);
        AccumuloInputFormat.setInputTableName(job, scan);
        AccumuloInputFormat.setScanAuthorizations(job, connection.auths);

        if (0 < maxMaps) {
            // set up ranges
            try {
                Set<Range> ranges = ops.splitRangeByTablets(table, new Range(), maxMaps);
                AccumuloInputFormat.setRanges(job, ranges);
                AccumuloInputFormat.setAutoAdjustRanges(job, false);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    }

    /**
     * Clean up any cloned offline tables from offline mode.
     */
    public void close() throws AccumuloException, AccumuloSecurityException {
        final TableOperations ops = connection.getConnector().tableOperations();
        for (String table : clones) {
            try {
                ops.delete(table);
            } catch (Exception exception) {
                log.error("Couldn't delete table '" + table + "'", exception);
            }
        }
    }

}