Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.hcatalog.streaming; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.Parser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.util.StringUtils; import java.util.Arrays; import java.util.Random; /** * A stand alone utility to write data into the streaming ingest interface. */ public class StreamingIntegrationTester { static final private Logger LOG = LoggerFactory.getLogger(StreamingIntegrationTester.class.getName()); public static void main(String[] args) { try { LogUtils.initHiveLog4j(); } catch (LogUtils.LogInitializationException e) { System.err.println("Unable to initialize log4j " + StringUtils.stringifyException(e)); System.exit(-1); } Options options = new Options(); options.addOption(OptionBuilder.hasArg().withArgName("abort-pct") .withDescription("Percentage of transactions to abort, defaults to 5").withLongOpt("abortpct") .create('a')); options.addOption(OptionBuilder.hasArgs().withArgName("column-names") .withDescription("column names of table to write to").withLongOpt("columns").withValueSeparator(',') .isRequired().create('c')); options.addOption(OptionBuilder.hasArg().withArgName("database") .withDescription("Database of table to write to").withLongOpt("database").isRequired().create('d')); options.addOption(OptionBuilder.hasArg().withArgName("frequency") .withDescription("How often to commit a transaction, in seconds, defaults to 1") .withLongOpt("frequency").create('f')); options.addOption(OptionBuilder.hasArg().withArgName("iterations") .withDescription("Number of batches to write, defaults to 10").withLongOpt("num-batches") .create('i')); options.addOption(OptionBuilder.hasArg().withArgName("metastore-uri") .withDescription("URI of Hive metastore").withLongOpt("metastore-uri").isRequired().create('m')); options.addOption(OptionBuilder.hasArg().withArgName("num_transactions") .withDescription("Number of transactions per batch, defaults to 100").withLongOpt("num-txns") .create('n')); options.addOption(OptionBuilder.hasArgs().withArgName("partition-values") .withDescription("partition values, must be provided in order of partition columns, " + "if not provided table is assumed to not be partitioned") .withLongOpt("partition").withValueSeparator(',').create('p')); options.addOption(OptionBuilder.hasArg().withArgName("records-per-transaction") .withDescription("records to write in each transaction, defaults to 100") .withLongOpt("records-per-txn").withValueSeparator(',').create('r')); options.addOption(OptionBuilder.hasArgs().withArgName("column-types") .withDescription("column types, valid values are string, int, float, decimal, date, " + "datetime") .withLongOpt("schema").withValueSeparator(',').isRequired().create('s')); options.addOption(OptionBuilder.hasArg().withArgName("table").withDescription("Table to write to") .withLongOpt("table").isRequired().create('t')); options.addOption(OptionBuilder.hasArg().withArgName("num-writers") .withDescription("Number of writers to create, defaults to 2").withLongOpt("writers").create('w')); options.addOption(OptionBuilder.hasArg(false).withArgName("pause") .withDescription("Wait on keyboard input after commit & batch close. default: disabled") .withLongOpt("pause").create('x')); Parser parser = new GnuParser(); CommandLine cmdline = null; try { cmdline = parser.parse(options, args); } catch (ParseException e) { System.err.println(e.getMessage()); usage(options); } boolean pause = cmdline.hasOption('x'); String db = cmdline.getOptionValue('d'); String table = cmdline.getOptionValue('t'); String uri = cmdline.getOptionValue('m'); int txnsPerBatch = Integer.parseInt(cmdline.getOptionValue('n', "100")); int writers = Integer.parseInt(cmdline.getOptionValue('w', "2")); int batches = Integer.parseInt(cmdline.getOptionValue('i', "10")); int recordsPerTxn = Integer.parseInt(cmdline.getOptionValue('r', "100")); int frequency = Integer.parseInt(cmdline.getOptionValue('f', "1")); int ap = Integer.parseInt(cmdline.getOptionValue('a', "5")); float abortPct = ((float) ap) / 100.0f; String[] partVals = cmdline.getOptionValues('p'); String[] cols = cmdline.getOptionValues('c'); String[] types = cmdline.getOptionValues('s'); StreamingIntegrationTester sit = new StreamingIntegrationTester(db, table, uri, txnsPerBatch, writers, batches, recordsPerTxn, frequency, abortPct, partVals, cols, types, pause); sit.go(); } static void usage(Options options) { HelpFormatter hf = new HelpFormatter(); hf.printHelp(HelpFormatter.DEFAULT_WIDTH, "sit [options]", "Usage: ", options, ""); System.exit(-1); } private String db; private String table; private String uri; private int txnsPerBatch; private int writers; private int batches; private int recordsPerTxn; private int frequency; private float abortPct; private String[] partVals; private String[] cols; private String[] types; private boolean pause; private StreamingIntegrationTester(String db, String table, String uri, int txnsPerBatch, int writers, int batches, int recordsPerTxn, int frequency, float abortPct, String[] partVals, String[] cols, String[] types, boolean pause) { this.db = db; this.table = table; this.uri = uri; this.txnsPerBatch = txnsPerBatch; this.writers = writers; this.batches = batches; this.recordsPerTxn = recordsPerTxn; this.frequency = frequency; this.abortPct = abortPct; this.partVals = partVals; this.cols = cols; this.types = types; this.pause = pause; } private void go() { HiveEndPoint endPoint = null; try { if (partVals == null) { endPoint = new HiveEndPoint(uri, db, table, null); } else { endPoint = new HiveEndPoint(uri, db, table, Arrays.asList(partVals)); } for (int i = 0; i < writers; i++) { Writer w = new Writer(endPoint, i, txnsPerBatch, batches, recordsPerTxn, frequency, abortPct, cols, types, pause); w.start(); } } catch (Throwable t) { System.err.println("Caught exception while testing: " + StringUtils.stringifyException(t)); } } private static class Writer extends Thread { private HiveEndPoint endPoint; private int txnsPerBatch; private int batches; private int writerNumber; private int recordsPerTxn; private int frequency; private float abortPct; private String[] cols; private String[] types; private boolean pause; private Random rand; Writer(HiveEndPoint endPoint, int writerNumber, int txnsPerBatch, int batches, int recordsPerTxn, int frequency, float abortPct, String[] cols, String[] types, boolean pause) { this.endPoint = endPoint; this.txnsPerBatch = txnsPerBatch; this.batches = batches; this.writerNumber = writerNumber; this.recordsPerTxn = recordsPerTxn; this.frequency = frequency * 1000; this.abortPct = abortPct; this.cols = cols; this.types = types; this.pause = pause; rand = new Random(); } @Override public void run() { StreamingConnection conn = null; try { conn = endPoint.newConnection(true, "UT_" + Thread.currentThread().getName()); RecordWriter writer = new DelimitedInputWriter(cols, ",", endPoint); for (int i = 0; i < batches; i++) { long start = System.currentTimeMillis(); LOG.info("Starting batch " + i); TransactionBatch batch = conn.fetchTransactionBatch(txnsPerBatch, writer); try { while (batch.remainingTransactions() > 0) { batch.beginNextTransaction(); for (int j = 0; j < recordsPerTxn; j++) { batch.write(generateRecord(cols, types)); } if (rand.nextFloat() < abortPct) batch.abort(); else batch.commit(); if (pause) { System.out.println( "Writer " + writerNumber + " committed... press Enter to continue. " + Thread.currentThread().getId()); System.in.read(); } } long end = System.currentTimeMillis(); if (end - start < frequency) Thread.sleep(frequency - (end - start)); } finally { batch.close(); if (pause) { System.out.println( "Writer " + writerNumber + " has closed a Batch.. press Enter to continue. " + Thread.currentThread().getId()); System.in.read(); } } } } catch (Throwable t) { System.err.println("Writer number " + writerNumber + " caught exception while testing: " + StringUtils.stringifyException(t)); } finally { if (conn != null) conn.close(); } } private byte[] generateRecord(String[] cols, String[] types) { // TODO make it so I can randomize the column order StringBuilder buf = new StringBuilder(); for (int i = 0; i < types.length; i++) { buf.append(generateColumn(types[i])); buf.append(","); } return buf.toString().getBytes(); } private String generateColumn(String type) { if ("string".equals(type.toLowerCase())) { return "When that Aprilis with his showers swoot"; } else if (type.toLowerCase().startsWith("int")) { return "42"; } else if (type.toLowerCase().startsWith("dec") || type.toLowerCase().equals("float")) { return "3.141592654"; } else if (type.toLowerCase().equals("datetime")) { return "2014-03-07 15:33:22"; } else if (type.toLowerCase().equals("date")) { return "1955-11-12"; } else { throw new RuntimeException("Sorry, I don't know the type " + type); } } } }