Java tutorial
/** * Tencent is pleased to support the open source community by making TDW available. * Copyright (C) 2014 THL A29 Limited, a Tencent company. All rights reserved. * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS * OF ANY KIND, either express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.apache.hadoop.hive.ql.exec; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.StringTokenizer; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.serde2.lazy.LazyString; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.io.BytesWritable; public class ToolBox { public final static String dotDelimiter = "."; public final static String colonDelimiter = ":"; public final static String blankDelimiter = " "; public final static String tabDelimiter = "\t"; public final static String commaDelimiter = ","; public final static String starDelimiter = "*"; public final static String hiveDelimiter = "\1"; public final static String CBR_SWITCH_ATTR = "Cb.Switch"; public final static String CB_OPT_ATTR = "Cb.Optimization"; public final static String STATISTICS_COLUMNS_ATTR = "statistics.columns"; public final static String TABLE_HEADER_NAMES_ATTR = "TABLEHEADERNAMESATTR"; public final static String TABLE_HEADER_TYPES_ATTR = "TABLEHEADERTYPESATTR"; private static Log LOG = LogFactory.getLog("ToolBox"); public enum SortMethod { AscendSort, DescendSort }; SortMethod sortMethod; ArrayList<Tuple> tupleList; public class Tuple { String value; Integer freq; public Tuple(String v, Integer i) { this.value = v; this.freq = i; } public Tuple() { } public String getString() { return this.value; } public Integer getInteger() { return this.freq; } } public class TupleComparatorAscend implements Comparator<Tuple> { public int compare(Tuple t1, Tuple t2) { int res = t1.getInteger().compareTo(t2.getInteger()); if (res == 0) { if (t1.getString().compareTo(t2.getString()) < 0) { res = -1; } } if (res == 0) { res = t1.hashCode() - t2.hashCode(); } return res; } } public class TupleComparatorDescend implements Comparator<Tuple> { public int compare(Tuple t1, Tuple t2) { int res = t1.getInteger().compareTo(t2.getInteger()); if (res == 0) { if (t1.getString().compareTo(t2.getString()) < 0) { res = -1; } } if (res == 0) { res = t1.hashCode() - t2.hashCode(); } return -res; } } public ToolBox() { tupleList = new ArrayList<Tuple>(); } public void push(String value, Integer fre) { Tuple t = new Tuple(value, fre); tupleList.add(t); } public void ascendSort() { Comparator<Tuple> comparator = new TupleComparatorAscend(); Collections.sort(tupleList, comparator); } public void descendSort() { Comparator<Tuple> comparator = new TupleComparatorDescend(); Collections.sort(tupleList, comparator); } public int getCapacity() { return tupleList.size(); } public String getStringAtIdx(int idx) { return tupleList.get(idx).getString(); } public Integer getIntegeAtIdx(int idx) { return tupleList.get(idx).getInteger(); } void compact(java.util.Map<String, Integer> para, SortMethod sm, final Object o) { tupleList.clear(); for (String key : para.keySet()) { push(key, para.get(key)); } if (sm == ToolBox.SortMethod.AscendSort) { ascendSort(); } else { assert (sm == ToolBox.SortMethod.DescendSort); descendSort(); } if (o.getClass().getName().equalsIgnoreCase(Double.class.getName())) { double tailFactor = ((Double) o).doubleValue(); para.clear(); for (int idx = 0; idx < (int) (getCapacity() * tailFactor); idx++) { para.put(getStringAtIdx(idx), getIntegeAtIdx(idx)); } } else { assert (o.getClass().getName().equalsIgnoreCase(Integer.class.getName())); int tailFactor = ((Integer) o).intValue(); para.clear(); for (int idx = 0; idx < tailFactor; idx++) { para.put(getStringAtIdx(idx), getIntegeAtIdx(idx)); } } } void compactByAscendSort(java.util.Map<String, Integer> para, final double tailFactor) { tupleList.clear(); for (String key : para.keySet()) { push(key, para.get(key)); } ascendSort(); para.clear(); for (int idx = 0; idx < (int) (getCapacity() * tailFactor); idx++) { para.put(getStringAtIdx(idx), getIntegeAtIdx(idx)); } } void compactByDescendSort(java.util.Map<String, Integer> para, final double tailFactor) { tupleList.clear(); for (String key : para.keySet()) { push(key, para.get(key)); } descendSort(); para.clear(); for (int idx = 0; idx < (int) (getCapacity() * tailFactor); idx++) { para.put(getStringAtIdx(idx), getIntegeAtIdx(idx)); } } static HiveKey getHiveKey(String key, byte streamTag) { HiveKey keyWritable = new HiveKey(); int keylen = key.length(); keyWritable.setSize(keylen + 1); System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keylen); keyWritable.get()[keylen] = streamTag; final int r = 0; keyWritable.setHashCode(r); return keyWritable; } static String getOriginalKey(BytesWritable key) { byte[] b = key.getBytes(); return new String(b, 0, key.getSize() - 1); } static String retrieveComponent(String s, String d, int idx) { StringTokenizer st = new StringTokenizer(s, d); String ret = null; for (int i = 0; i < idx; i++) { ret = st.nextToken(); } return ret; } static <T> ArrayList<TreeMap<String, T>> aggregateKey(TreeMap<String, T> para, String delimiter, int idx) { ArrayList<TreeMap<String, T>> a = new ArrayList<TreeMap<String, T>>(); String prekey = null; TreeMap<String, T> h = null; for (String s : para.keySet()) { if (prekey == null) { prekey = retrieveComponent(s, delimiter, idx); h = new TreeMap<String, T>(); h.put(s, para.get(s)); } else if (prekey.equals(s)) { h.put(s, para.get(s)); } else { prekey = retrieveComponent(s, delimiter, idx); ; a.add(h); h = new TreeMap<String, T>(); h.put(s, para.get(s)); } } a.add(h); return a; } @Deprecated static ArrayList<TreeMap<String, String>> aggregateKey_string(TreeMap<String, String> para, String delimiter, int idx) { ArrayList<TreeMap<String, String>> a = new ArrayList<TreeMap<String, String>>(); String prekey = null; TreeMap<String, String> h = null; for (String s : para.keySet()) { if (prekey == null) { prekey = retrieveComponent(s, delimiter, idx); h = new TreeMap<String, String>(); h.put(s, para.get(s)); } else if (prekey.equals(s)) { h.put(s, para.get(s)); } else { prekey = retrieveComponent(s, delimiter, idx); ; a.add(h); h = new TreeMap<String, String>(); h.put(s, para.get(s)); } } a.add(h); return a; } @Deprecated static ArrayList<TreeMap<String, Integer>> aggregateKey_Integer(TreeMap<String, Integer> para, String delimiter, int idx) { ArrayList<TreeMap<String, Integer>> a = new ArrayList<TreeMap<String, Integer>>(); String prekey = null; TreeMap<String, Integer> h = null; for (String s : para.keySet()) { if (prekey == null) { prekey = retrieveComponent(s, delimiter, idx); h = new TreeMap<String, Integer>(); h.put(s, para.get(s)); } else if (prekey.equals(s)) { h.put(s, para.get(s)); } else { prekey = retrieveComponent(s, delimiter, idx); ; a.add(h); h = new TreeMap<String, Integer>(); h.put(s, para.get(s)); } } a.add(h); return a; } static double calDistincValue(TreeMap<String, Integer> para, int num_sampled_rows) { int num_multiple = 0; int num_distinct = para.keySet().size(); LOG.debug("num_distinct: " + num_distinct); double stat_distinct_values; for (String s : para.keySet()) { if (para.get(s) > 1) { num_multiple++; } } LOG.debug("num_multiple: " + num_multiple); if (num_multiple == 0) { stat_distinct_values = -1; return stat_distinct_values; } else if (num_multiple == num_distinct) { stat_distinct_values = num_distinct; return stat_distinct_values; } int totalrows = num_sampled_rows; int f1 = num_distinct - num_multiple; int d = num_distinct; int numer = num_sampled_rows * d; int denom = (num_sampled_rows - f1) + f1 * num_sampled_rows / totalrows; LOG.debug("numer: " + numer); LOG.debug("denom: " + denom); int distinct_values = numer / denom; if (distinct_values < d) { distinct_values = d; } else if (distinct_values > totalrows) { distinct_values = totalrows; } LOG.debug("distinct_values: " + distinct_values); LOG.debug("totalrows: " + totalrows); stat_distinct_values = Math.floor(distinct_values + 0.5); if (stat_distinct_values > 0.1 * totalrows) { stat_distinct_values = -(stat_distinct_values / totalrows); } LOG.debug("stat_distinct_values: " + stat_distinct_values); return stat_distinct_values; } static String convertHivePrimitiveStringToLazyTypeString(String hivePrimitive) { String lazyType = null; if (hivePrimitive.equalsIgnoreCase("STRING")) lazyType = "LazyString"; else if (hivePrimitive.equalsIgnoreCase("BIGINT")) lazyType = "LazyInteger"; else if (hivePrimitive.equalsIgnoreCase("INT")) lazyType = "LazyInteger"; else if (hivePrimitive.equalsIgnoreCase("TINYINT")) lazyType = "LazyByte"; else if (hivePrimitive.equalsIgnoreCase("DOUBLE")) lazyType = "LazyDouble"; else if (hivePrimitive.equalsIgnoreCase("FLOAT")) lazyType = "LazyFloat"; else if (hivePrimitive.equalsIgnoreCase("BOOLEAN")) lazyType = "LazyBoolean"; return lazyType; } static String convertLazyObjectToString(Object value) { String className = value.getClass().getName(); if (className.endsWith("LazyByte")) { byte b = ((LazyByteObjectInspector) LazyPrimitiveObjectInspectorFactory .getLazyObjectInspector(PrimitiveCategory.BYTE, false, (byte) 0, false)).get(value); return String.valueOf(b); } if (className.endsWith("LazyInteger")) { int i = ((LazyIntObjectInspector) LazyPrimitiveObjectInspectorFactory .getLazyObjectInspector(PrimitiveCategory.INT, false, (byte) 0, false)).get(value); return String.valueOf(i); } if (className.endsWith("LazyLong")) { long l = ((LazyLongObjectInspector) LazyPrimitiveObjectInspectorFactory .getLazyObjectInspector(PrimitiveCategory.LONG, false, (byte) 0, false)).get(value); return String.valueOf(l); } if (className.endsWith("LazyFloat")) { float f = ((LazyFloatObjectInspector) LazyPrimitiveObjectInspectorFactory .getLazyObjectInspector(PrimitiveCategory.FLOAT, false, (byte) 0, false)).get(value); return String.valueOf(f); } if (className.endsWith("LazyDouble")) { double d = ((LazyDoubleObjectInspector) LazyPrimitiveObjectInspectorFactory .getLazyObjectInspector(PrimitiveCategory.DOUBLE, false, (byte) 0, false)).get(value); return String.valueOf(d); } if (className.endsWith("LazyString")) { return ((LazyString) value).getWritableObject().toString(); } return new String(""); } public static void debugNode(org.apache.hadoop.hive.ql.lib.Node para, int indent) { if (para.getChildren() == null) { return; } for (org.apache.hadoop.hive.ql.lib.Node node : para.getChildren()) { for (int idx = 0; idx < indent; idx++) { System.out.print(" "); } debugNode(node, indent + 1); } } static public class tableTuple { String tableName; String fieldName; public tableTuple() { tableName = null; fieldName = null; } public tableTuple(String tn, String fn) { this.tableName = tn; this.fieldName = fn; } public String getTableName() { return tableName; } public void setTableName(String tn) { this.tableName = tn; } public String getFieldName() { return fieldName; } } static public class tableAliasTuple { String tableName; String alias; public tableAliasTuple(String tn, String al) { this.tableName = tn; this.alias = al; } public tableAliasTuple() { } public String getTableName() { return tableName; } public String getAlias() { return alias; } } static public class tableDistinctTuple { String tableName; String distinctField; public tableDistinctTuple(String tn, String df) { this.tableName = tn; this.distinctField = df; } public tableDistinctTuple() { } public String getTableName() { return tableName; } public String getDistinctField() { return distinctField; } } }