org.apache.hive.hcatalog.streaming.StreamingIntegrationTester.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hive.hcatalog.streaming.StreamingIntegrationTester.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hive.hcatalog.streaming;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.LogUtils;
import org.apache.hadoop.util.StringUtils;

import java.util.Arrays;
import java.util.Random;

/**
 * A stand alone utility to write data into the streaming ingest interface.
 */
public class StreamingIntegrationTester {

    static final private Logger LOG = LoggerFactory.getLogger(StreamingIntegrationTester.class.getName());

    public static void main(String[] args) {

        try {
            LogUtils.initHiveLog4j();
        } catch (LogUtils.LogInitializationException e) {
            System.err.println("Unable to initialize log4j " + StringUtils.stringifyException(e));
            System.exit(-1);
        }

        Options options = new Options();

        options.addOption(OptionBuilder.hasArg().withArgName("abort-pct")
                .withDescription("Percentage of transactions to abort, defaults to 5").withLongOpt("abortpct")
                .create('a'));

        options.addOption(OptionBuilder.hasArgs().withArgName("column-names")
                .withDescription("column names of table to write to").withLongOpt("columns").withValueSeparator(',')
                .isRequired().create('c'));

        options.addOption(OptionBuilder.hasArg().withArgName("database")
                .withDescription("Database of table to write to").withLongOpt("database").isRequired().create('d'));

        options.addOption(OptionBuilder.hasArg().withArgName("frequency")
                .withDescription("How often to commit a transaction, in seconds, defaults to 1")
                .withLongOpt("frequency").create('f'));

        options.addOption(OptionBuilder.hasArg().withArgName("iterations")
                .withDescription("Number of batches to write, defaults to 10").withLongOpt("num-batches")
                .create('i'));

        options.addOption(OptionBuilder.hasArg().withArgName("metastore-uri")
                .withDescription("URI of Hive metastore").withLongOpt("metastore-uri").isRequired().create('m'));

        options.addOption(OptionBuilder.hasArg().withArgName("num_transactions")
                .withDescription("Number of transactions per batch, defaults to 100").withLongOpt("num-txns")
                .create('n'));

        options.addOption(OptionBuilder.hasArgs().withArgName("partition-values")
                .withDescription("partition values, must be provided in order of partition columns, "
                        + "if not provided table is assumed to not be partitioned")
                .withLongOpt("partition").withValueSeparator(',').create('p'));

        options.addOption(OptionBuilder.hasArg().withArgName("records-per-transaction")
                .withDescription("records to write in each transaction, defaults to 100")
                .withLongOpt("records-per-txn").withValueSeparator(',').create('r'));

        options.addOption(OptionBuilder.hasArgs().withArgName("column-types")
                .withDescription("column types, valid values are string, int, float, decimal, date, " + "datetime")
                .withLongOpt("schema").withValueSeparator(',').isRequired().create('s'));

        options.addOption(OptionBuilder.hasArg().withArgName("table").withDescription("Table to write to")
                .withLongOpt("table").isRequired().create('t'));

        options.addOption(OptionBuilder.hasArg().withArgName("num-writers")
                .withDescription("Number of writers to create, defaults to 2").withLongOpt("writers").create('w'));

        options.addOption(OptionBuilder.hasArg(false).withArgName("pause")
                .withDescription("Wait on keyboard input after commit & batch close. default: disabled")
                .withLongOpt("pause").create('x'));

        Parser parser = new GnuParser();
        CommandLine cmdline = null;
        try {
            cmdline = parser.parse(options, args);
        } catch (ParseException e) {
            System.err.println(e.getMessage());
            usage(options);
        }

        boolean pause = cmdline.hasOption('x');
        String db = cmdline.getOptionValue('d');
        String table = cmdline.getOptionValue('t');
        String uri = cmdline.getOptionValue('m');
        int txnsPerBatch = Integer.parseInt(cmdline.getOptionValue('n', "100"));
        int writers = Integer.parseInt(cmdline.getOptionValue('w', "2"));
        int batches = Integer.parseInt(cmdline.getOptionValue('i', "10"));
        int recordsPerTxn = Integer.parseInt(cmdline.getOptionValue('r', "100"));
        int frequency = Integer.parseInt(cmdline.getOptionValue('f', "1"));
        int ap = Integer.parseInt(cmdline.getOptionValue('a', "5"));
        float abortPct = ((float) ap) / 100.0f;
        String[] partVals = cmdline.getOptionValues('p');
        String[] cols = cmdline.getOptionValues('c');
        String[] types = cmdline.getOptionValues('s');

        StreamingIntegrationTester sit = new StreamingIntegrationTester(db, table, uri, txnsPerBatch, writers,
                batches, recordsPerTxn, frequency, abortPct, partVals, cols, types, pause);
        sit.go();
    }

    static void usage(Options options) {
        HelpFormatter hf = new HelpFormatter();
        hf.printHelp(HelpFormatter.DEFAULT_WIDTH, "sit [options]", "Usage: ", options, "");
        System.exit(-1);
    }

    private String db;
    private String table;
    private String uri;
    private int txnsPerBatch;
    private int writers;
    private int batches;
    private int recordsPerTxn;
    private int frequency;
    private float abortPct;
    private String[] partVals;
    private String[] cols;
    private String[] types;
    private boolean pause;

    private StreamingIntegrationTester(String db, String table, String uri, int txnsPerBatch, int writers,
            int batches, int recordsPerTxn, int frequency, float abortPct, String[] partVals, String[] cols,
            String[] types, boolean pause) {
        this.db = db;
        this.table = table;
        this.uri = uri;
        this.txnsPerBatch = txnsPerBatch;
        this.writers = writers;
        this.batches = batches;
        this.recordsPerTxn = recordsPerTxn;
        this.frequency = frequency;
        this.abortPct = abortPct;
        this.partVals = partVals;
        this.cols = cols;
        this.types = types;
        this.pause = pause;
    }

    private void go() {
        HiveEndPoint endPoint = null;
        try {
            if (partVals == null) {
                endPoint = new HiveEndPoint(uri, db, table, null);
            } else {
                endPoint = new HiveEndPoint(uri, db, table, Arrays.asList(partVals));
            }

            for (int i = 0; i < writers; i++) {
                Writer w = new Writer(endPoint, i, txnsPerBatch, batches, recordsPerTxn, frequency, abortPct, cols,
                        types, pause);
                w.start();
            }

        } catch (Throwable t) {
            System.err.println("Caught exception while testing: " + StringUtils.stringifyException(t));
        }
    }

    private static class Writer extends Thread {
        private HiveEndPoint endPoint;
        private int txnsPerBatch;
        private int batches;
        private int writerNumber;
        private int recordsPerTxn;
        private int frequency;
        private float abortPct;
        private String[] cols;
        private String[] types;
        private boolean pause;
        private Random rand;

        Writer(HiveEndPoint endPoint, int writerNumber, int txnsPerBatch, int batches, int recordsPerTxn,
                int frequency, float abortPct, String[] cols, String[] types, boolean pause) {
            this.endPoint = endPoint;
            this.txnsPerBatch = txnsPerBatch;
            this.batches = batches;
            this.writerNumber = writerNumber;
            this.recordsPerTxn = recordsPerTxn;
            this.frequency = frequency * 1000;
            this.abortPct = abortPct;
            this.cols = cols;
            this.types = types;
            this.pause = pause;
            rand = new Random();
        }

        @Override
        public void run() {
            StreamingConnection conn = null;
            try {
                conn = endPoint.newConnection(true, "UT_" + Thread.currentThread().getName());
                RecordWriter writer = new DelimitedInputWriter(cols, ",", endPoint);

                for (int i = 0; i < batches; i++) {
                    long start = System.currentTimeMillis();
                    LOG.info("Starting batch " + i);
                    TransactionBatch batch = conn.fetchTransactionBatch(txnsPerBatch, writer);
                    try {
                        while (batch.remainingTransactions() > 0) {
                            batch.beginNextTransaction();
                            for (int j = 0; j < recordsPerTxn; j++) {
                                batch.write(generateRecord(cols, types));
                            }
                            if (rand.nextFloat() < abortPct)
                                batch.abort();
                            else
                                batch.commit();
                            if (pause) {
                                System.out.println(
                                        "Writer " + writerNumber + " committed... press Enter to continue. "
                                                + Thread.currentThread().getId());
                                System.in.read();
                            }
                        }
                        long end = System.currentTimeMillis();
                        if (end - start < frequency)
                            Thread.sleep(frequency - (end - start));
                    } finally {
                        batch.close();
                        if (pause) {
                            System.out.println(
                                    "Writer " + writerNumber + " has closed a Batch.. press Enter to continue. "
                                            + Thread.currentThread().getId());
                            System.in.read();
                        }
                    }
                }
            } catch (Throwable t) {
                System.err.println("Writer number " + writerNumber + " caught exception while testing: "
                        + StringUtils.stringifyException(t));
            } finally {
                if (conn != null)
                    conn.close();
            }
        }

        private byte[] generateRecord(String[] cols, String[] types) {
            // TODO make it so I can randomize the column order

            StringBuilder buf = new StringBuilder();
            for (int i = 0; i < types.length; i++) {
                buf.append(generateColumn(types[i]));
                buf.append(",");
            }
            return buf.toString().getBytes();
        }

        private String generateColumn(String type) {
            if ("string".equals(type.toLowerCase())) {
                return "When that Aprilis with his showers swoot";
            } else if (type.toLowerCase().startsWith("int")) {
                return "42";
            } else if (type.toLowerCase().startsWith("dec") || type.toLowerCase().equals("float")) {
                return "3.141592654";
            } else if (type.toLowerCase().equals("datetime")) {
                return "2014-03-07 15:33:22";
            } else if (type.toLowerCase().equals("date")) {
                return "1955-11-12";
            } else {
                throw new RuntimeException("Sorry, I don't know the type " + type);
            }
        }
    }
}