Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ery.hadoop.mrddx.hbase; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableSet; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.util.ReflectionUtils; import com.ery.hadoop.mrddx.hbase.HbaseInputFormat.HbaseInputSplit; import com.ery.hadoop.mrddx.hbase.filter.HColumnCountGetFilter; import com.ery.hadoop.mrddx.hbase.filter.HColumnPaginationFilter; import com.ery.hadoop.mrddx.hbase.filter.HColumnPrefixFilter; import com.ery.hadoop.mrddx.hbase.filter.HColumnRangeFilter; import com.ery.hadoop.mrddx.hbase.filter.HDependentColumnFilter; import com.ery.hadoop.mrddx.hbase.filter.HFamilyFilter; import com.ery.hadoop.mrddx.hbase.filter.HFirstKeyOnlyFilter; import com.ery.hadoop.mrddx.hbase.filter.HInclusiveStopFilter; import com.ery.hadoop.mrddx.hbase.filter.HKeyOnlyFilter; import com.ery.hadoop.mrddx.hbase.filter.HMultipleColumnPrefixFilter; import com.ery.hadoop.mrddx.hbase.filter.HPageFilter; import com.ery.hadoop.mrddx.hbase.filter.HPrefixFilter; import com.ery.hadoop.mrddx.hbase.filter.HQualifierFilter; import com.ery.hadoop.mrddx.hbase.filter.HRandomRowFilter; import com.ery.hadoop.mrddx.hbase.filter.HRowFilter; import com.ery.hadoop.mrddx.hbase.filter.HSingleColumnValueExcludeFilter; import com.ery.hadoop.mrddx.hbase.filter.HSingleColumnValueFilter; import com.ery.hadoop.mrddx.hbase.filter.HSkipFilter; import com.ery.hadoop.mrddx.hbase.filter.HTimestampsFilter; import com.ery.hadoop.mrddx.hbase.filter.HValueFilter; import com.ery.hadoop.mrddx.hbase.filter.HWhileMatchFilter; import com.ery.hadoop.mrddx.log.MRLog; import com.ery.hadoop.mrddx.util.StringUtil; /** * A RecordReader that reads records from a SQL table. Emits LongWritables * containing the record number as key and DBWritables as value. * * @createDate 2013-1-15 * @version v1.0 * @param <T> */ @InterfaceAudience.Public @InterfaceStability.Evolving public class HbaseRecordReader<T extends HbaseWritable> extends RecordReader<LongWritable, T> { public static final Log LOG = LogFactory.getLog(HbaseRecordReader.class); HTable table = null; // HBase private HbaseInputSplit inputSplit; // ? private Class<T> inputClass; // conf private Configuration conf; // hbase conf private HbaseConfiguration dbConf; // ?? private String tableName; // ?,?(family:column1 target1,target2) private Map<String, String[]> srcTargetFiledNameMap; // ?? private ResultScanner resultScanner = null; // ?? private long pos = 0; // key private LongWritable key = null; // ? private T value = null; // ? private String splitSign; // private String[][] clusterFieldNames; // clusterFieldNames private String[][] inColumnSplitRelations; /** * */ public HbaseRecordReader() { } /** * * * @param split * @param inputClass valueclass * @param conf ? * @param dbConfig hbase? * @param tableName ?? * @param fieldNames ?? * @throws SQLException */ public HbaseRecordReader(HbaseInputFormat.HbaseInputSplit split, Class<T> inputClass, Configuration conf, HbaseConfiguration dbConfig, String tableName, String columnRelation) { this.srcTargetFiledNameMap = new Hashtable<String, String[]>(); this.inputSplit = split; this.inputClass = inputClass; this.conf = conf; this.dbConf = dbConfig; this.tableName = tableName; this.splitSign = dbConf.getInputHBaseColumnSplitSign(); List<String[]> list = new ArrayList<String[]>();// ? List<String[]> rela = new ArrayList<String[]>();// StringUtil.decodeOutColumnSplitRelation(columnRelation, list, rela); this.clusterFieldNames = list.toArray(new String[0][0]); this.inColumnSplitRelations = rela.toArray(new String[0][0]); for (int i = 0; i < this.clusterFieldNames.length; i++) { String stfn[] = this.clusterFieldNames[i]; if (stfn.length == 2) { this.srcTargetFiledNameMap.put(stfn[0] + HbaseConfiguration.sign_lineae + stfn[1], this.inColumnSplitRelations[i]); } } } /** * * * @return * @throws IOException IO */ protected ResultScanner executeQuery() throws IOException { table = new HTable(this.conf, this.tableName); Scan scan = new Scan(); scan.setCacheBlocks(false); // ? // TIMERANGE long timeRange[] = StringUtil.valueOfStringToLong(this.dbConf.getInputHBaseQueryTimerange()); if (timeRange.length == 2 && timeRange[0] <= timeRange[1]) { scan.setTimeRange(timeRange[0], timeRange[1]); } // FILTER FilterList lstFilter = new FilterList(); String filterContent = this.dbConf.getInputHBaseQueryFilters(); if (null != filterContent && filterContent.length() > 0) { String filters[] = filterContent.split("-"); for (int i = 0; i < filters.length; i++) { String tmp = StringUtil.decodeString(filters[i], "[", "]"); if (null == filters[i]) { continue; } Map<String, String> mapFilterValue = StringUtil.valueOfStringToHashMap(tmp, HbaseConfiguration.sign_comma, HbaseConfiguration.sign_colon); Filter filter = this.getFilter(mapFilterValue); if (null != filter) { lstFilter.addFilter(filter); } } } if (lstFilter.getFilters().size() > 0) { scan.setFilter(lstFilter); } // FAMILYS String familys[] = this.dbConf.getInputHBaseQueryFamilys(); if (null != familys) { for (int i = 0; i < familys.length; i++) { scan.addFamily(familys[i].getBytes()); } } // FAMILYCOLUMNS String familyColumns[] = this.dbConf.getInputHBaseQueryFamilyColumns(); if (null != familyColumns) { for (int i = 0; i < familyColumns.length; i++) { String fcolumn[] = familyColumns[i].split(HbaseConfiguration.sign_colon); scan.addColumn(fcolumn[0].getBytes(), fcolumn[1].getBytes()); } } // ?, ? // STARTROW String startRow = this.inputSplit.getStart(); if (null != startRow) { scan.setStartRow(Bytes.toBytes(startRow)); } // STOPROW String stopRow = this.inputSplit.getEnd(); if (null != startRow) { scan.setStopRow(Bytes.toBytes(stopRow)); } // TIMESTAMP long timestamp = this.dbConf.getInputHBaseQueryTimestamp(); if (timestamp != -1) { scan.setTimeStamp(timestamp); } // ?? this.printScanConditions(scan); ResultScanner scaner = table.getScanner(scan); // table.close(); return scaner; } /** * ??? * * @param scan ?? */ private void printScanConditions(Scan scan) { StringBuilder logMeg = new StringBuilder(); logMeg.append("TimeRange==>"); logMeg.append(scan.getTimeRange().getMin()); logMeg.append(":"); logMeg.append(scan.getTimeRange().getMax()); logMeg.append("\n"); logMeg.append("Filter==>"); Filter filter = scan.getFilter(); if (null != filter && filter instanceof FilterList) { List<Filter> filterLst = ((FilterList) filter).getFilters(); for (Filter f : filterLst) { logMeg.append(f.getClass().getName()); logMeg.append(":"); logMeg.append(f.toString()); } } logMeg.append("\n"); logMeg.append("Family-Column==>"); Map<byte[], NavigableSet<byte[]>> mapFamily = scan.getFamilyMap(); Iterator<byte[]> iterator = mapFamily.keySet().iterator(); while (iterator.hasNext()) { byte[] key = iterator.next(); logMeg.append(new String(key)); NavigableSet<byte[]> set = mapFamily.get(key); if (null == set) { continue; } Iterator<byte[]> setIterator = set.iterator(); while (setIterator.hasNext()) { logMeg.append(new String(setIterator.next())); logMeg.append(","); } } logMeg.append("\n"); logMeg.append("MaxVersions==>"); logMeg.append(scan.getMaxVersions()); logMeg.append("\n"); logMeg.append("StartRow==>"); logMeg.append(new String(scan.getStartRow())); logMeg.append("\n"); logMeg.append("StopRow==>"); logMeg.append(new String(scan.getStopRow())); MRLog.info(LOG, "[HBase scan conditions]\n" + logMeg.toString()); } public LongWritable getCurrentKey() { return key; } public T getCurrentValue() { return value; } public LongWritable createKey() { return new LongWritable(0); } public T createValue() { return ReflectionUtils.newInstance(this.inputClass, this.conf); } public long getPos() throws IOException { return pos; } @Deprecated public boolean next(LongWritable key, T value) throws IOException { this.key = key; this.value = value; return this.nextKeyValue(); } @Override public float getProgress() throws IOException { return 0.0f / Integer.MAX_VALUE; } /** * @return * @throws IOException */ public boolean nextKeyValue() throws IOException { if (this.key == null) { this.key = new LongWritable(); } if (this.value == null) { this.value = this.createValue(); } // if (null == this.resultScanner) { MRLog.info(LOG, "?"); this.resultScanner = this.executeQuery(); } Result result = this.resultScanner.next(); if (null == result) { return false; } KeyValue keyValue[] = result.raw(); this.value.readFields(keyValue, this.splitSign, this.srcTargetFiledNameMap); return true; } /** * ??() * * @param keyValue keyvalue */ protected void getRowAllVersionData(KeyValue[] keyValue) { // ??-? Set<String> setFamilyColumn = new HashSet<String>(); for (KeyValue kv : keyValue) { setFamilyColumn.add(new String(kv.getFamily()) + "-" + new String(kv.getQualifier())); } int columnCount = keyValue.length; // int fcCount = setFamilyColumn.size();// ?? if (columnCount <= 0 || fcCount <= 0) { MRLog.warn(LOG, "??!"); return; } // ? int percount = columnCount / fcCount; for (int i = 0; i < percount; i++) { List<HBaseKeyValue> lst = new ArrayList<HBaseKeyValue>(); for (int j = 0; j < percount * fcCount; j += percount) { HBaseKeyValue kv = new HBaseKeyValue(); kv.setFamily(new String(keyValue[j + i].getFamily())); kv.setQualifier(new String(keyValue[j + i].getQualifier())); kv.setValue(new String(keyValue[j + i].getValue())); lst.add(kv); } // to do save record(lst) } } @Override public void close() throws IOException { if (null != this.resultScanner) { this.resultScanner.close(); } try { if (table != null) { table.close(); } } catch (Exception e) { } } /** * ?Filter * * @param mapFilterValue ? * @return */ public Filter getFilter(Map<String, String> mapFilterValue) { try { int type = Integer.parseInt(mapFilterValue.get(HbaseConfiguration.FILTERTYPE_NAME)); return this.getFilter(type, mapFilterValue); } catch (Exception e) { return null; } } /** * ?Filter * * @param filterType (int) * @param values ? * @return */ public Filter getFilter(int filterType, Map<String, String> mapFilterValue) { switch (filterType) { case HbaseConfiguration.Filter_Type_ColumnCountGetFilter: return new HColumnCountGetFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_ColumnPaginationFilter: return new HColumnPaginationFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_ColumnPrefixFilter: return new HColumnPrefixFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_ColumnRangeFilter: return new HColumnRangeFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_DependentColumnFilter: return new HDependentColumnFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_FamilyFilter: return new HFamilyFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_QualifierFilter: return new HQualifierFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_RowFilter: return new HRowFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_ValueFilter: return new HValueFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_FirstKeyOnlyFilter: return new HFirstKeyOnlyFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_InclusiveStopFilter: return new HInclusiveStopFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_KeyOnlyFilter: return new HKeyOnlyFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_MultipleColumnPrefixFilter: return new HMultipleColumnPrefixFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_PageFilter: return new HPageFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_PrefixFilter: return new HPrefixFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_RandomRowFilter: return new HRandomRowFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_SingleColumnValueFilter: return new HSingleColumnValueFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_SingleColumnValueExcludeFilter: return new HSingleColumnValueExcludeFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_SkipFilter: return new HSkipFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_TimestampsFilter: return new HTimestampsFilter(mapFilterValue).getFilter(); case HbaseConfiguration.Filter_Type_WhileMatchFilter: return new HWhileMatchFilter(mapFilterValue).getFilter(); default: break; } return null; } /** * ?HBase? * * @return HBase? */ protected HbaseConfiguration getDBConf() { return dbConf; } @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // TODO Auto-generated method stub } }