Java tutorial
package com.ery.hadoop.mrddx.file; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.io.RCFile; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import com.ery.hadoop.mrddx.DBReducer; import com.ery.hadoop.mrddx.IHandleFormat; import com.ery.hadoop.mrddx.hive.HiveConfiguration; import com.ery.hadoop.mrddx.log.MRLog; import com.ery.hadoop.mrddx.util.HDFSUtils; /** * RCFile? * * @createDate 2013-1-10 * @version v1.0 * @param <K> * @param <V> */ public class RCFileOutputFormat<K, V> extends FileOutputFormat<K, V> implements IHandleFormat { public static final Log LOG = LogFactory.getLog(RCFileOutputFormat.class); public static void setColumnNumber(Configuration conf, int columnNum) { assert columnNum > 0; conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum); } @Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new RCFileRecordWriter<K, V>(context, this); } /** * ? * * @param conf ? * @param pOutputCompress ? * @param pOutputCompressCodec ? * @param pOutputFileFieldSplitChars * @param pOutputFileRowsSplitChars * @throws Exception */ public static void setOutputParameter(Configuration conf, boolean pOutputCompress, String pOutputCompressCodec, String pOutputFileFieldSplitChars, String pOutputFileRowsSplitChars) throws Exception { FileConfiguration dbconf = new FileConfiguration(conf, FileConfiguration.FLAG_FILE_OUTPUT); dbconf.setOutputFileCompress(pOutputCompress); dbconf.setOutputFileCompressCodec(pOutputCompressCodec); dbconf.setOutputFileFieldSplitChars(pOutputFileFieldSplitChars); dbconf.setOutputFileRowsSplitChars(pOutputFileRowsSplitChars); } @Override public void handle(Job conf) throws Exception { /** * ? */ FileConfiguration dbconf = new FileConfiguration(conf.getConfiguration(), FileConfiguration.FLAG_FILE_OUTPUT); // ? String outRowChars = dbconf.getOutputFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + FileConfiguration.OUTPUT_FILE_ROWS_SPLIT_CHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = dbconf.getOutputFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + FileConfiguration.OUTPUT_FILE_FIELD_SPLIT_CHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = dbconf.getOutputFileCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = dbconf.getOutputFileCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + FileConfiguration.OUTPUT_FILE_COMPRESSCODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?BZip2Codec if (HDFSUtils.isBZip2CompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">??BZip2Codec."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = dbconf.getOutputTargetFilePath(); dbconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + FileConfiguration.OUTPUT_FILE_TARGET_PATH + ">"); } setColumnNumber(conf.getConfiguration(), dbconf.getOutputFieldNames().length); conf.setOutputFormatClass(RCFileOutputFormat.class); conf.setReducerClass(DBReducer.class); } }