Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hcatalog.hcatmix; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.*; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hcatalog.hcatmix.conf.HiveTableSchema; import org.apache.hcatalog.hcatmix.conf.TableSchemaXMLParser; import org.apache.pig.test.utils.datagen.ColSpec; import org.apache.pig.test.utils.datagen.DataGenerator; import org.apache.pig.test.utils.datagen.DataGeneratorConf; import org.apache.pig.tools.cmdline.CmdLineParser; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import javax.xml.parsers.ParserConfigurationException; import java.io.File; import java.io.IOException; import java.text.MessageFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; /** * Main class to * 1. create hcat tables * 2. Generate data for it * 3. Generate pig scripts to load the data in hcat tables * This class is named HCatMixSetup as it does the initial setup for doing performance test */ public class HCatMixSetup extends Configured implements Tool { private static final Logger LOG = LoggerFactory.getLogger(HCatMixSetup.class); HiveMetaStoreClient hiveClient; public final static char SEPARATOR = ','; public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new HCatMixSetup(), args); } public static void usage() { System.err.println("Error"); throw new RuntimeException(); } public HCatMixSetup() throws MetaException { HiveConf hiveConf = new HiveConf(HCatMixSetup.class); hiveClient = new HiveMetaStoreClient(hiveConf); } public void setupFromConf(HCatMixSetupConf conf) throws IOException, SAXException, ParserConfigurationException, MetaException, NoSuchObjectException, TException, InvalidObjectException { TableSchemaXMLParser configParser = new TableSchemaXMLParser(conf.getConfFileName()); List<HiveTableSchema> multiInstanceList = configParser.getHiveTableList(); for (HiveTableSchema hiveTableSchema : multiInstanceList) { if (conf.isCreateTable()) { try { createTable(hiveTableSchema); } catch (AlreadyExistsException e) { LOG.info("Couldn't create table, " + hiveTableSchema.getName() + ". Already exists ignored and proceeding", e); } } if (conf.isGenerateData()) { generateDataForTable(hiveTableSchema, conf.getNumMappers(), conf.getOutputDir()); } if (conf.isGeneratePigScripts()) { generatePigScripts(conf.getOutputDir(), conf.getPigDataOutputDir(), hiveTableSchema, conf.getPigScriptDir()); } } } public void generatePigScripts(final String outputDir, final String pigOutputDir, final HiveTableSchema hiveTableSchema, final String pigScriptDir) throws IOException { PigScriptGenerator pigScriptGenerator = new PigScriptGenerator( HCatMixUtils.getDataLocation(outputDir, hiveTableSchema.getName()), pigOutputDir, hiveTableSchema); LOG.info(MessageFormat.format( "About to generate pig scripts in {0}, for table: {1} for input data in location: {2}", pigScriptDir, hiveTableSchema.getName(), outputDir)); // 1. Script for loading using pig store using hcatStorer() final String pigLoadHCatStoreScript = HCatMixUtils.getHCatStoreScriptName(pigScriptDir, hiveTableSchema.getName()); FileUtils.writeStringToFile(new File(pigLoadHCatStoreScript), pigScriptGenerator.getPigLoaderHCatStorerScript()); LOG.info(MessageFormat.format("1. Successfully created the pig loader/hcat storer script: {0}", pigLoadHCatStoreScript)); // 2. Script for loading/storing using pigstorage() final String pigLoadPigStorerScript = HCatMixUtils.getPigLoadStoreScriptName(pigScriptDir, hiveTableSchema.getName()); FileUtils.writeStringToFile(new File(pigLoadPigStorerScript), pigScriptGenerator.getPigLoaderPigStorerScript()); LOG.info(MessageFormat.format("2. Successfully created the pig loader/pig storer script: {0}", pigLoadPigStorerScript)); // 3. Script for loading using HCatLoader() and store using pigStorage() final String hcatLoadPigStorerScript = HCatMixUtils.getHCatLoadScriptName(pigScriptDir, hiveTableSchema.getName()); FileUtils.writeStringToFile(new File(hcatLoadPigStorerScript), pigScriptGenerator.getHCatLoaderPigStorerScript()); LOG.info(MessageFormat.format("3. Successfully created the hcat loader/pig storer script: {0}", hcatLoadPigStorerScript)); // 3. Script for loading using HCatLoader() and store using HCatStorage() final String hcatLoadHCatStorerScript = HCatMixUtils.getHCatLoadStoreScriptName(pigScriptDir, hiveTableSchema.getName()); FileUtils.writeStringToFile(new File(hcatLoadHCatStorerScript), pigScriptGenerator.getHCatLoaderHCatStorerScript()); LOG.info(MessageFormat.format("4. Successfully created the hcat loader/hcat storer script: {0}", hcatLoadHCatStorerScript)); } public void generateDataForTable(HiveTableSchema hiveTableSchema, final int numMappers, String outputDir) throws IOException { String outputFile = HCatMixUtils.getDataLocation(outputDir, hiveTableSchema.getName()); LOG.info(MessageFormat.format( "About to generate data for table: {0}, with number of mappers: {1}, output location: {2}", hiveTableSchema.getName(), numMappers, outputFile)); if (!HCatMixHDFSUtils.exists(outputFile)) { List<ColSpec> colSpecs = new ArrayList<ColSpec>(hiveTableSchema.getColumnColSpecs()); colSpecs.addAll(hiveTableSchema.getPartitionColSpecs()); DataGeneratorConf dgConf = new DataGeneratorConf.Builder() .colSpecs(colSpecs.toArray(new ColSpec[colSpecs.size()])).separator(SEPARATOR) .numMappers(numMappers).numRows(hiveTableSchema.getRowCount()).outputFile(outputFile).build(); DataGenerator dataGenerator = new DataGenerator(); dataGenerator.runJob(dgConf, getConf()); LOG.info(MessageFormat.format("Successfully created input data in: {0}", outputFile)); } else { LOG.info(MessageFormat.format("Output location {0} already exists, skipping data generation", outputFile)); } } public void createTable(HiveTableSchema hiveTableSchema) throws IOException, TException, NoSuchObjectException, MetaException, AlreadyExistsException, InvalidObjectException { LOG.info("About to create table: " + hiveTableSchema.getName()); Table table = new Table(); table.setDbName(hiveTableSchema.getDatabaseName()); table.setTableName(hiveTableSchema.getName()); try { table.setOwner(UserGroupInformation.getCurrentUser().getUserName()); } catch (IOException e) { throw new IOException("Couldn't get user information. Cannot create table", e); } table.setOwnerIsSet(true); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(hiveTableSchema.getColumnFieldSchemas()); table.setSd(sd); sd.setParameters(new HashMap<String, String>()); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setName(table.getTableName()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); table.setPartitionKeys(hiveTableSchema.getPartitionFieldSchemas()); hiveClient.createTable(table); LOG.info("Successfully created table: " + table.getTableName()); } public void deleteTable(String dbName, String tableName) throws TException, MetaException, NoSuchObjectException { LOG.info(MessageFormat.format("About to delete table: {0}.{1}", dbName, tableName)); hiveClient.dropTable(dbName, tableName); LOG.info(MessageFormat.format("Successfully deleted table: {0}.{1}", dbName, tableName)); } @Override public int run(String[] args) throws Exception { CmdLineParser opts = new CmdLineParser(args); opts.registerOpt('f', "file", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('m', "mappers", CmdLineParser.ValueExpected.OPTIONAL); opts.registerOpt('o', "output-dir", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('p', "pig-script-output-dir", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('a', "pig-data-output-dir", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('t', "create-table", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('d', "generate-data", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('s', "generate-pig-scripts", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('e', "do-everything", CmdLineParser.ValueExpected.NOT_ACCEPTED); HCatMixSetupConf.Builder builder = new HCatMixSetupConf.Builder(); char opt; try { while ((opt = opts.getNextOpt()) != CmdLineParser.EndOfOpts) { switch (opt) { case 'f': builder.confFileName(opts.getValStr()); break; case 'o': builder.outputDir(opts.getValStr()); break; case 'm': builder.numMappers(Integer.valueOf(opts.getValStr())); break; case 'p': builder.pigScriptDir(opts.getValStr()); break; case 'e': builder.doEverything(); break; case 't': builder.createTable(); break; case 's': builder.generatePigScripts(); break; case 'd': builder.generateData(); break; case 'a': builder.pigDataOutputDir(opts.getValStr()); break; default: usage(); break; } } } catch (ParseException pe) { System.err.println("Couldn't parse the command line arguments, " + pe.getMessage()); usage(); } try { setupFromConf(builder.build()); } catch (Exception e) { e.printStackTrace(); } return 0; } }