Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ery.hadoop.mrddx.hbase; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import com.ery.hadoop.mrddx.DBGroupReducer; import com.ery.hadoop.mrddx.DBMapper; import com.ery.hadoop.mrddx.DBRecord; import com.ery.hadoop.mrddx.IHandleFormat; import com.ery.hadoop.mrddx.db.mapreduce.DBWritable; import com.ery.hadoop.mrddx.log.MRLog; import com.ery.hadoop.mrddx.util.StringUtil; /** * HBase? * * @createDate 2013-1-4 * @version v1.0 * @param <T> */ @InterfaceAudience.Public @InterfaceStability.Stable public class HbaseInputFormat<T extends HbaseWritable> extends InputFormat<LongWritable, T> implements Configurable, IHandleFormat { // public static final Log LOG = LogFactory.getLog(HbaseInputFormat.class); /** * ? */ private HbaseConfiguration dbConf; public Configuration getConf() { return dbConf.getConf(); } public HbaseConfiguration getDBConf() { return dbConf; } @Override public void setConf(Configuration conf) { this.dbConf = new HbaseConfiguration(conf, HbaseConfiguration.FLAG_HBASE_INPUT); } /** * Initializes the map-part of the job with the appropriate input settings. * * @param job The map-reduce job * @param inputClass * @param srcTargetFileNames * @param tableName ?? */ public static void setInput(Job job, Class<? extends DBWritable> inputClass, String tableName, String srcTargetFieldNames) { job.setInputFormatClass(HbaseInputFormat.class); HbaseConfiguration dbConf = new HbaseConfiguration(job.getConfiguration(), HbaseConfiguration.FLAG_HBASE_INPUT); dbConf.setInputClass(inputClass); dbConf.setInputTableName(tableName); dbConf.setInputHBaseColumnRelation(srcTargetFieldNames); } /** * ? ???? * * @param timerange (long) * @param startrow * @param stoprow ? * @param timestamp * @param filters * @param familyColumns ? * @param familys ? */ public static void setInputQueryCondition(Configuration job, long[] timerange, String startrow, String stoprow, long timestamp, String[] filters, String[] familyColumns, String[] familys) { HbaseConfiguration dbConf = new HbaseConfiguration(job, HbaseConfiguration.FLAG_HBASE_INPUT); dbConf.setInputHBaseQueryTimerange(StringUtil.valueOfLongToString(timerange)); dbConf.setInputHBaseQueryStartRow(startrow); dbConf.setInputHBaseQueryStopRow(stoprow); dbConf.setInputHBaseQueryTimestamp(timestamp); dbConf.setInputHBaseQueryFilters(filters); dbConf.setInputHBaseQueryFamilyColumns(familyColumns); dbConf.setInputHBaseQueryFamilys(familys); } /** * ?RegionInfo? * * @param job job * @return RegionInfo? * @throws Exception */ public static int getTableHRegionInfoCount(Configuration job, String startKey, String endKey) throws Exception { HbaseConfiguration dbConf = new HbaseConfiguration(job, HbaseConfiguration.FLAG_HBASE_INPUT); String tableName = dbConf.getInputTableName(); if (null == tableName) { String meg = "The name of table is null!"; MRLog.error(LOG, meg); throw new IOException(meg); } if (!validateCondition(dbConf, dbConf.getInputTableName())) { String meg = "validate condition error!"; MRLog.error(LOG, meg); throw new IOException(meg); } List<HRegionLocation> lstHRegionLocation = getTableHRegionInfo(job, tableName, startKey, endKey); if (null == lstHRegionLocation || lstHRegionLocation.size() <= 0) { String meg = "The account of table'regionInfo is zero!"; MRLog.error(LOG, meg); throw new IOException(meg); } // ?TableRegionInfo? printTableAllRegionInfo(job, tableName); printTableRequestRegionInfo(lstHRegionLocation, tableName); return lstHRegionLocation.size(); } /** * ?RegionInfo * * @param job job * @param tableName ?? * @param startKey rowkey * @param endKey ?rowkey * @return RegionInfo * @throws Exception */ public static List<HRegionLocation> getTableHRegionInfo(Configuration job, String tableName, String startKey, String endKey) throws IOException { // ?startkey,endkey HTable table = new HTable(job, tableName); byte[][] startKeys = table.getStartKeys(); byte[][] endKeys = table.getEndKeys(); // 1?regionlocation if (null == startKey && null == endKey) { return table.getRegionsInRange(startKeys[0], endKeys[endKeys.length - 1]); } // 2: ?regionlocation if (startKey == null) { startKey = new String(startKeys[0]); } if (endKey == null) { endKey = new String(endKeys[endKeys.length - 1]); } // ?HRegionInfo List<HRegionLocation> lsHRegionInfo = table.getRegionsInRange(startKey.getBytes(), endKey.getBytes()); return lsHRegionInfo; } /** * ?? * * @param dbConf ?? * @param tableName ?? * @return true ? * @throws TableNotFoundException ? * @throws IOException */ protected static boolean validateCondition(HbaseConfiguration dbConf, String tableName) throws TableNotFoundException, IOException { if (null == tableName) { return false; } HBaseAdmin admin = new HBaseAdmin(dbConf.getConf()); HTableDescriptor tableDes = admin.getTableDescriptor(tableName.getBytes()); // ??? Set<byte[]> setByte = tableDes.getFamiliesKeys(); Set<String> familySets = new HashSet<String>(); Iterator<byte[]> iterator = setByte.iterator(); while (iterator.hasNext()) { familySets.add(new String(iterator.next())); } // check column and family String familyColumns[] = dbConf.getInputHBaseQueryFamilyColumns(); if (null != familyColumns) { for (int i = 0; i < familyColumns.length; i++) { if (null == familyColumns[i] || familyColumns[i].trim().length() <= 0) { String meg = "The parameter of columnfamily is null!"; MRLog.error(LOG, meg); return false; } String fcolumn[] = familyColumns[i].split(HbaseConfiguration.sign_colon); if (fcolumn.length != 2) { String meg = "The parameter of columnfamily is format error !"; MRLog.error(LOG, meg); return false; } // check column HColumnDescriptor hcDesc = tableDes.getFamily(fcolumn[0].getBytes()); if (!(null != hcDesc && familySets.contains(fcolumn[0]) && fcolumn[0].equals(new String(hcDesc.getName())))) { String meg = "Column is not exist! column:" + fcolumn[0]; MRLog.error(LOG, meg); return false; } } } // check family String familys[] = dbConf.getInputHBaseQueryFamilys(); if (null != familys) { for (int i = 0; i < familys.length; i++) { if (!familySets.contains(familys[i])) { String meg = "Family is not exist! family:" + familys[i]; MRLog.error(LOG, meg); return false; } } } return true; } @SuppressWarnings("unchecked") @Override public RecordReader<LongWritable, T> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Class<T> inputClass = (Class<T>) (this.dbConf.getInputClass()); return new HbaseRecordReader<T>((HbaseInputSplit) split, inputClass, this.getConf(), this.getDBConf(), this.dbConf.getInputTableName(), this.dbConf.getInputHBaseColumnRelation()); } @Override public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { String startRow = this.dbConf.getInputHBaseQueryStartRow(); String stopRow = this.dbConf.getInputHBaseQueryStopRow(); String tableName = this.dbConf.getInputTableName(); List<InputSplit> splits = new ArrayList<InputSplit>(); List<HRegionLocation> lstHRegionLocation = getTableHRegionInfo(job.getConfiguration(), tableName, startRow, stopRow); for (int i = 0; i < lstHRegionLocation.size(); i++) { HRegionLocation hRegionLocation = lstHRegionLocation.get(i); String tempStart = null; String tempEnd = null; HRegionInfo hRegionInfo = hRegionLocation.getRegionInfo(); // NO.1? if (null == startRow && null == stopRow) { tempStart = new String(hRegionInfo.getStartKey()); tempEnd = new String(hRegionInfo.getEndKey()); HbaseInputSplit split = new HbaseInputSplit(tempStart, tempEnd); splits.add(split); continue; } // NO.2? byte[] startKeyByte = hRegionInfo.getStartKey(); byte[] endKeyByte = hRegionInfo.getEndKey(); if (null != startRow && hRegionInfo.containsRow(startRow.getBytes())) { tempStart = startRow; } if (null != stopRow && hRegionInfo.containsRow(stopRow.getBytes())) { tempEnd = stopRow; } tempStart = tempStart != null ? tempStart : new String(startKeyByte); tempEnd = tempEnd != null ? tempEnd : new String(endKeyByte); HbaseInputSplit split = new HbaseInputSplit(tempStart, tempEnd); splits.add(split); } MRLog.info(LOG, "Finished hbase split!"); return splits; } /** * ?TableRegionInfo? * * @param job job * @param tableName ?? * @throws IOException IO */ private static void printTableAllRegionInfo(Configuration job, String tableName) throws IOException { HTable table = new HTable(job, tableName); StringBuilder regionLog = new StringBuilder(); regionLog.append("<<Table["); regionLog.append(new String(table.getTableName())); regionLog.append("]all RegionInfo>>"); NavigableMap<HRegionInfo, ServerName> mapHRegionInfo = table.getRegionLocations(); Iterator<HRegionInfo> hregionIterator = mapHRegionInfo.keySet().iterator(); while (hregionIterator.hasNext()) { regionLog.append("\nHRegionInfo:"); HRegionInfo key = hregionIterator.next(); ServerName value = mapHRegionInfo.get(key); regionLog.append(key.toString()); regionLog.append("ServerInfo:"); regionLog.append("{name=>:"); regionLog.append(value.getServerName()); regionLog.append(" ,HostAndPort=>:"); regionLog.append(value.getHostAndPort()); regionLog.append("}"); } MRLog.info(LOG, regionLog.toString()); } /** * ??TableRegionInfo? * * @param lstHRegionInfo HRegionInfo * @param tableName ?? */ private static void printTableRequestRegionInfo(List<HRegionLocation> lstHRegionInfo, String tableName) { StringBuilder regionLog = new StringBuilder(); regionLog.append("<<Table["); regionLog.append(tableName); regionLog.append("]request RegionInfo>>"); for (HRegionLocation hRegionLocation : lstHRegionInfo) { regionLog.append("\nHRegionInfo:"); if (null == hRegionLocation) { MRLog.warn(LOG, "??HRegionLocationnull!"); continue; } HRegionInfo hRegionInfo = hRegionLocation.getRegionInfo(); if (null == hRegionInfo) { MRLog.warn(LOG, "??HRegionInfonull!"); continue; } regionLog.append(hRegionInfo.toString()); regionLog.append("ServerInfo:"); regionLog.append("{HostAndPort=>:"); regionLog.append(hRegionLocation.getHostnamePort()); regionLog.append("}"); } MRLog.info(LOG, regionLog.toString()); } /** * A InputSplit that spans a set of rows */ public static class HbaseInputSplit extends InputSplit implements Writable { private String start; private String end; public HbaseInputSplit() { } /** * Convenience Constructor * * @param start the index of the first row to select * @param end the index of the last row to select */ public HbaseInputSplit(String start, String end) { this.start = start; this.end = end; MRLog.info(LOG, "HBase Split rowkey range=>" + this.start + ":" + this.end); } public String getStart() { return start; } public String getEnd() { return end; } @Override public String[] getLocations() throws IOException { return new String[] {}; } @Override public long getLength() throws IOException { return 0; } @Override public void readFields(DataInput input) throws IOException { this.start = input.readUTF(); this.end = input.readUTF(); } @Override public void write(DataOutput output) throws IOException { output.writeUTF(this.start); output.writeUTF(this.end); } } /** * ?? * * @param paraMap ? * @return * @throws Exception */ protected String getParamInputFieldNames(Map<String, String> paraMap) throws Exception { String para = paraMap.get("inputFieldNames"); if (null == para || para.trim().length() <= 0) { String meg = "?<inputFieldNames>."; MRLog.error(LOG, meg); throw new Exception(meg); } return para; } @Override public void handle(Job conf) throws Exception { // HBase?? HbaseConfiguration hconf = new HbaseConfiguration(conf.getConfiguration(), HbaseConfiguration.FLAG_HBASE_INPUT); String tableName = hconf.getInputTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]HBase??<" + HbaseConfiguration.INPUT_TABLE + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ? String inputFieldName[] = hconf.getInputFieldNames(); this.vParamSrcTargetFieldNames(hconf, inputFieldName); if (hconf.getInputIsCombiner()) { conf.setCombinerClass(DBGroupReducer.class); } // ?TIMERANGE String timerange[] = hconf.getInputHBaseQueryTimerange(); this.vParamQueryTimeRange(timerange); // ?startrow String startrow = hconf.getInputHBaseQueryStartRow(); if (null == startrow || startrow.trim().length() <= 0) { MRLog.warn(LOG, "[MR WARN]?startrow<" + HbaseConfiguration.INPUT_QUERY_STARTROW + ">."); } // ?stoprow String stoprow = hconf.getInputHBaseQueryStopRow(); if (null == stoprow || stoprow.trim().length() <= 0) { MRLog.warn(LOG, "[MR WARN]?stoprow<" + HbaseConfiguration.INPUT_QUERY_STOPROW + ">."); } // ?timestamp long timestamp = hconf.getInputHBaseQueryTimestamp(); if (timestamp <= -1) { MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_TIMESTAMP + ">."); } // ?filters String filters = hconf.getInputHBaseQueryFilters(); if (null == filters || filters.length() <= 0) { MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FILTER + ">."); } // ?familyColumns String familyColumns[] = hconf.getInputHBaseQueryFamilyColumns(); if (null == familyColumns || familyColumns.length <= 0) { MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">."); } if (null != familyColumns) { for (String tmp : familyColumns) { if (tmp.split(":").length != 2) { String meg = "[MR ERROR]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } } } // ?familys String familys[] = hconf.getInputHBaseQueryFamilys(); if (null == familys || familys.length <= 0) { MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FAMILYS + ">."); } conf.setInputFormatClass(HbaseInputFormat.class); hconf.setInputClass(DBRecord.class); // ?MapTask? int taskNumber = HbaseInputFormat.getTableHRegionInfoCount(conf.getConfiguration(), startrow, stoprow); int reduceTasks = taskNumber; if (hconf.getInputMapEnd()) { reduceTasks = 0; } // hconf.setNumMapTasks(taskNumber); hconf.setNumReduceTasks(reduceTasks); hconf.setInputClass(DBRecord.class); conf.setMapperClass(DBMapper.class); conf.setMapOutputKeyClass(DBRecord.class); conf.setMapOutputValueClass(DBRecord.class); if (hconf.getInputIsCombiner()) { conf.setCombinerClass(DBGroupReducer.class); } } /** * ?? * * @param hconf * * @param hFieldName hbase * @param inputFieldName * @return * @throws Exception */ private void vParamSrcTargetFieldNames(HbaseConfiguration hconf, String[] inputFieldName) throws Exception { // ?? String inColumnRelation = hconf.getInputHBaseColumnRelation(); if (null == inColumnRelation) { String meg = "[MR ERROR]<" + HbaseConfiguration.INPUT_HBASE_COLUMN_RELATION + ">?"; MRLog.error(LOG, meg); throw new Exception(meg); } List<String[]> list = new ArrayList<String[]>();// ? List<String[]> rela = new ArrayList<String[]>();// StringUtil.decodeOutColumnSplitRelation(inColumnRelation, list, rela); String[][] clusterFieldNames = list.toArray(new String[0][0]); String[][] inColumnSplitRelations = rela.toArray(new String[0][0]); if (clusterFieldNames.length <= 0 || inColumnSplitRelations.length <= 0) { String meg = "[MR ERROR]<" + HbaseConfiguration.INPUT_HBASE_COLUMN_RELATION + ">?"; MRLog.error(LOG, meg); throw new Exception(meg); } // ? String splitSign = hconf.getInputHBaseColumnSplitSign(); Set<String> setSrcFiled = StringUtil.parseStringArrayToSet(inputFieldName); for (int i = 0; i < inColumnSplitRelations.length; i++) { String temp[] = inColumnSplitRelations[i]; for (int j = 0; j < temp.length; j++) { if (!setSrcFiled.contains(temp[j])) { String meg = "[MR ERROR]<" + HbaseConfiguration.INPUT_HBASE_COLUMN_RELATION + ">?<" + HbaseConfiguration.SYS_INPUT_FIELD_NAMES_PROPERTY + ">"; LOG.error(meg); throw new Exception(meg); } } if (temp.length > 1 && (null == splitSign || splitSign.trim().length() <= 0)) { String meg = "[MR ERROR]<" + HbaseConfiguration.INPUT_HBASE_COLUMN_RELATION + ">?1, ?<" + HbaseConfiguration.INPUT_HBASE_COLUMN_SPLIT_SIGN + ">"; LOG.error(meg); throw new Exception(meg); } } } /** * ?? * * @param timerange * @throws Exception */ private void vParamQueryTimeRange(String[] timerange) throws Exception { if (null == timerange || timerange.length <= 0) { // ???. MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_TIMERANGE + ">."); return; } if (timerange.length != 2) { String meg = "[MR ERROR]HBase?<" + HbaseConfiguration.INPUT_QUERY_TIMERANGE + ">,:132342155,32423532."; MRLog.error(LOG, meg); throw new Exception(meg); } long trange[] = new long[2]; try { trange[0] = Long.parseLong(timerange[0]); trange[1] = Long.parseLong(timerange[1]); } catch (Exception e) { String meg = "[MR ERROR]HBase?<" + HbaseConfiguration.INPUT_QUERY_TIMERANGE + ">,??."; MRLog.error(LOG, meg); throw new Exception(meg); } if (trange[0] > trange[1]) { String meg = "[MR ERROR]HBase?<" + HbaseConfiguration.INPUT_QUERY_TIMERANGE + ">,?."; MRLog.error(LOG, meg); throw new Exception(meg); } } }