Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ery.hadoop.mrddx.hive; import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import com.ery.hadoop.mrddx.DBGroupReducer; import com.ery.hadoop.mrddx.DBPartitionReducer; import com.ery.hadoop.mrddx.DBRecord; import com.ery.hadoop.mrddx.DBReducer; import com.ery.hadoop.mrddx.IHandleFormat; import com.ery.hadoop.mrddx.MRConfiguration; import com.ery.hadoop.mrddx.db.mapreduce.FileWritable; import com.ery.hadoop.mrddx.log.MRLog; import com.ery.hadoop.mrddx.util.HDFSUtils; /** * Hive? * * @createDate 2013-1-18 * @version v1.0 * @param <K> * @param <V> */ @InterfaceAudience.Public @InterfaceStability.Stable public class HiveOutputFormat<K extends FileWritable, V> extends FileOutputFormat<K, NullWritable> implements IHandleFormat { // private static final Log LOG = LogFactory.getLog(HiveOutputFormat.class); @Override public RecordWriter<K, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new HiveRecordWriter<K, NullWritable>(context, this); // // // public RecordWriter<K, NullWritable> getRecordWriter(FileSystem // // ignored, JobConf job, String name, // // Progressable progress) throws IOException { // HiveConfiguration hiveConf = new // HiveConfiguration(context.getConfiguration()); // // String fieldSeparator = hiveConf.getOutputHiveFileFieldSplitChars(); // String rowSeparator = hiveConf.getOutputHiveFileRowsSplitChars(); // String[] fieldNames = hiveConf.getOutputFieldNames(); // boolean isCompressed = hiveConf.getOutputHiveCompress(); // // // // if (!isCompressed) { // // Path file = FileOutputFormat.getTaskOutputPath(context); // String name = context.getConfiguration().get(name, "outData"); // String extension = ".data"; // Path file = null; // file = FileOutputFormat.getPathForWorkFile((TaskInputOutputContext) // context, name, extension); // file = this.getDefaultWorkFile(context, extension); // // FileSystem fs = FileSystem.get(context.getConfiguration());// // file.getFileSystem(job); // FSDataOutputStream fileOut = fs.create(file, context); // // } // // // ? // // ? // String compresseCodec = hiveConf.getOutputHiveCompressCodec(); // CompressionCodec codec = HDFSUtils.getCompressCodec(compresseCodec, // context.getConfiguration()); // String name = // context.getConfiguration().get(getOutputHiveOrderFileNamePrefix, // "outData"); // String extension = codec.getDefaultExtension(); // Path file = null; // file = FileOutputFormat.getPathForWorkFile((TaskInputOutputContext) // context, name, extension); // file = this.getDefaultWorkFile(context, extension); // // // build the filename including the extension // Path file = FileOutputFormat.getTaskOutputPath(job, name + // codec.getDefaultExtension()); // FileSystem fs = file.getFileSystem(job); // FSDataOutputStream fileOut = fs.create(file, progress); // DataOutputStream dos = new // DataOutputStream(codec.createOutputStream(fileOut)); // return new HiveRecordWriter<K, NullWritable>(job, dos, // fieldSeparator, rowSeparator, fieldNames); } /** * ??? * * @param job The job * @param tableName The table to insert data into * @param fieldNames The field names in the table. * @param ddlHQL require execute HQL before mapreduce running */ public static void setOutputParameter(Configuration job, boolean compress, String compressCodec, String fieldSplitChars, String rowsSplitChars, String ddlHQL) { HiveConfiguration hiveConf = new HiveConfiguration(job); hiveConf.setOutputHiveCompress(compress); hiveConf.setOutputHiveCompressCodec(compressCodec); hiveConf.setOutputHiveFileFieldSplitChars(fieldSplitChars); hiveConf.setOutputHiveFileRowsSplitChars(rowsSplitChars); hiveConf.setOutputHiveExecuteDDLHQL(ddlHQL); try { executeDDLHQL(hiveConf); MRLog.info(LOG, "execute ddl hive sql success!"); } catch (SQLException e) { MRLog.error(LOG, "execute ddl hive sql error!"); e.printStackTrace(); } } /** * ??? * * @param job jobconf * @param tableName ?? */ public static void setOutput(Job job, String tableName) { job.setOutputFormatClass(HiveOutputFormat.class); job.setReduceSpeculativeExecution(false); HiveConfiguration dbConf = new HiveConfiguration(job.getConfiguration()); dbConf.setOutputHiveTableName(tableName); } /** * ddlHQL? * * @param hiveConf hive? * @throws SQLException */ public static void executeDDLHQL(HiveConfiguration hiveConf) throws SQLException { String ddls = hiveConf.getOutputHiveExecuteDDLHQL(); if (null == ddls || ddls.trim().length() <= 0) { return; } String ddl[] = ddls.split(";"); Connection conn = null; try { conn = hiveConf.getOutputConnection(); } catch (ClassNotFoundException e) { MRLog.error(LOG, "create hive conn error!"); e.printStackTrace(); } Statement stat = conn.createStatement(); for (int i = 0; i < ddl.length; i++) { try { stat.executeQuery(ddl[i]); } catch (Exception e) { MRLog.errorException(LOG, "execute ddl error, hql:" + ddl[i], e); } } // close(conn); } /** * * * @param conn */ public static void close(Connection conn) { if (null != conn) { try { conn.close(); } catch (SQLException e) { MRLog.error(LOG, "Close connection error!"); } } } /** * ddlHQL? * * @param hiveConf hive? * @throws SQLException */ public static void executeDDLHQL(HiveConfiguration hiveConf, String ddl) throws SQLException { if (null == ddl || ddl.trim().length() <= 0) { return; } Connection conn = null; try { conn = hiveConf.getOutputConnection(); } catch (ClassNotFoundException e) { MRLog.error(LOG, "create hive conn error!"); e.printStackTrace(); } Statement stat = conn.createStatement(); try { stat.execute(ddl); } catch (Exception e) { MRLog.errorException(LOG, "execute ddl error, hql:" + ddl, e); } // close(conn); } @Override public void checkOutputSpecs(JobContext context) throws IOException { } @Override public void handle(Job conf) throws Exception { /** * ? */ HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration()); // ? String outRowChars = hconf.getOutputHiveFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = hconf.getOutputHiveCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = hconf.getOutputHiveCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = hconf.getOutputTargetFilePath(); hconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">"); } // ?hive?? String hiveUrl = hconf.getOutPutHiveConfigUrl(); if (null == hiveUrl || hiveUrl.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL + ">?."; LOG.error(meg); throw new Exception(meg); } // hive??? String hiveUser = hconf.getOutPutHiveConfigUser(); if (null == hiveUser || hiveUser.trim().length() <= 0) { LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">."); } // hive?? String hivePwd = hconf.getOutPutHiveConfigPassword(); if (null == hivePwd || hivePwd.trim().length() <= 0) { LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">."); } // ?? String tableName = hconf.getOutputHiveTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ?? String partitionField[] = hconf.getOutputHivePartitionField(); if (null != partitionField && partitionField.length > 0) { // String[] outputFieldName = hconf.getOutputFieldNames(); if (null == outputFieldName || outputFieldName.length <= 0) { String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } for (int i = 0; i < partitionField.length; i++) { boolean isExist = false; for (String s : outputFieldName) { if (s.equals(partitionField[i])) { isExist = true; break; } } if (!isExist) { String meg = "" + partitionField[i] + "<" + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ""; MRLog.error(LOG, meg); throw new Exception(meg); } } String orderOutputTempPath = hconf.getOutputHiveOrderTempPath(); if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix(); if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) { String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.warn(LOG, meg); } long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount(); if (orderOutputFileMaxCount == 0) { String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT + ">0 -1(??)."; MRLog.error(LOG, meg); throw new Exception(meg); } } // DDL? String ddlHQL = hconf.getOutputHiveExecuteDDLHQL(); if (null == ddlHQL || ddlHQL.trim().length() <= 0) { LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">."); } try { executeDDLHQL(hconf); MRLog.info(LOG, "execute ddl hive sql success!"); } catch (SQLException e) { MRLog.error(LOG, "execute ddl hive sql error!"); e.printStackTrace(); } conf.setReduceSpeculativeExecution(false); conf.setOutputFormatClass(HiveOutputFormat.class); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); if (null != partitionField && partitionField.length > 0) { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBPartitionReducer.class); } else { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBReducer.class); } } }