Java tutorial
package com.ebay.erl.mobius.core.model; import java.io.DataInput; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.WritableComparable; import com.ebay.erl.mobius.core.datajoin.DataJoinKey; import com.ebay.erl.mobius.core.sort.Sorter; import com.ebay.erl.mobius.core.sort.Sorter.Ordering; /** * Comparator for comparing values from two columns (object1 and * object2 in the compare method). * <p> * * This class supports comparing exchangeable type values, such as * comparing a number in string format to a long. * * <p> * This product is licensed under the Apache License, Version 2.0, * available at http://www.apache.org/licenses/LICENSE-2.0. * * This product contains portions derived from Apache hadoop which is * licensed under the Apache License, Version 2.0, available at * http://hadoop.apache.org. * * 2007 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan */ @SuppressWarnings("unchecked") public class TupleColumnComparator { private static final Log LOGGER = LogFactory.getLog(TupleColumnComparator.class); private byte type_for_object1; private byte type_for_object2; private static TreeMap<String, Integer> _IDX_MAPPING; public void setType(byte type_for_object1, byte type_for_object2) { this.type_for_object1 = type_for_object1; this.type_for_object2 = type_for_object2; } /** * to be called in {@link DataJoinKey} */ public int compareKey(WritableComparable key1, WritableComparable key2, Sorter[] sorters, Configuration conf) { if (sorters.length == 0) { return key1.compareTo(key2); } else { // when this method is called, and sorters is not null, // key1 and key2 must be Tuple instances. Tuple k1 = (Tuple) key1; Tuple k2 = (Tuple) key2; // the sorting priority is the same as the sorters // array for (int i = 0; i < sorters.length; i++) { Sorter aSorter = sorters[i]; String columnName = aSorter.getColumn(); boolean forceNumeric = aSorter.forceSortNumerically(); ////////////////////////////// // get value from k1, k2 ////////////////////////////// Object v1 = this.getValue(k1, columnName, forceNumeric, sorters); Object v2 = this.getValue(k2, columnName, forceNumeric, sorters); ////////////////////// // start to compare ////////////////////// int result; try { this.setType(Tuple.getType(v1), Tuple.getType(v2)); result = this.compare(v1, v2, conf); if (result != 0) { // ordering is decided, return the result Ordering ordering = aSorter.getOrdering(); switch (ordering) { case ASC: // remain the save return result; case DESC: // reverse the ordering return -result; default: throw new IllegalArgumentException(ordering + " is not a supported ordering."); } } } catch (IOException e) { throw new RuntimeException("Cannot performe compareKey", e); } } } return 0; } public int compare(Object object1, Object object2, Configuration conf) throws IOException { int _compare_result = Integer.MAX_VALUE; ObjectReader reader1 = new ObjectReader(object1, type_for_object1); ObjectReader reader2 = new ObjectReader(object2, type_for_object2); final Object v1 = reader1.getValue(); final Object v2 = reader2.getValue(); setType(Tuple.getType(v1), Tuple.getType(v2)); if (type_for_object1 == type_for_object2) { final byte equal_type = type_for_object1; final TupleColumnComparator cmp = new TupleColumnComparator(); cmp.setType(equal_type, equal_type); TupleTypeHandler<Integer> equalTypeComparator = new TupleTypeHandler<Integer>() { @Override protected Integer on_array() throws IOException { Array a1 = (Array) v1; Array a2 = (Array) v2; return a1.compareTo(a2); } @Override protected Integer on_boolean() throws IOException { return compare((Boolean) v1, (Boolean) v2); } @Override protected Integer on_byte() throws IOException { return compare((Byte) v1, (Byte) v2); } @Override protected Integer on_byte_array() throws IOException { byte[] b1 = (byte[]) v1; byte[] b2 = (byte[]) v2; int diff = b1.length - b2.length; if (diff == 0) { // equal size for (int i = 0; i < b1.length; i++) { diff = compare((Byte) b1[i], (Byte) b2[i]); if (diff != 0) return diff; } } return diff; } @Override protected Integer on_date() throws IOException { return compare((java.sql.Date) v1, (java.sql.Date) v2); } @Override protected Integer on_default() throws IOException { throw new IllegalArgumentException("Unsupported type [" + String.format("0x%02X", type) + "]"); } @Override protected Integer on_double() throws IOException { return compare((Double) v1, (Double) v2); } @Override protected Integer on_float() throws IOException { return compare((Float) v1, (Float) v2); } @Override protected Integer on_integer() throws IOException { return compare((Integer) v1, (Integer) v2); } @Override protected Integer on_long() throws IOException { return compare((Long) v1, (Long) v2); } @Override protected Integer on_null() throws IOException { return 0; } @Override protected Integer on_null_writable() throws IOException { return 0; } @Override protected Integer on_result_wrapper() throws IOException { ResultWrapper w1 = (ResultWrapper) v1; ResultWrapper w2 = (ResultWrapper) v2; return cmp.compare(w1.getCombinedResult(), w2.getCombinedResult(), null); } @Override protected Integer on_serializable() throws IOException { return compare((Comparable) v1, (Comparable) v2); } @Override protected Integer on_short() throws IOException { return compare((Short) v1, (Short) v2); } @Override protected Integer on_string() throws IOException { return compare((String) v1, (String) v2); } @Override protected Integer on_string_map() throws IOException { TreeMap<String, String> m1 = (TreeMap<String, String>) v1; TreeMap<String, String> m2 = (TreeMap<String, String>) v2; return compare(m1, m2); } @Override protected Integer on_time() throws IOException { return compare((java.sql.Time) v1, (java.sql.Time) v2); } @Override protected Integer on_timestamp() throws IOException { return compare((java.sql.Timestamp) v1, (java.sql.Timestamp) v2); } @Override protected Integer on_tuple() throws IOException { Tuple t1 = (Tuple) v1; Tuple t2 = (Tuple) v2; return t1.compareTo(t2); } @Override protected Integer on_writable() throws IOException { return compare((Comparable) v1, (Comparable) v2); } }; _compare_result = equalTypeComparator.handle(equal_type); } else { // different type if (type_for_object1 == Tuple.NULL_TYPE || type_for_object2 == Tuple.NULL_TYPE) { // one of them is null type _compare_result = (type_for_object1 == Tuple.NULL_TYPE) ? -1 : 1; } else if (Tuple.isNumericalType(type_for_object1) && type_for_object2 == Tuple.STRING_TYPE) { // comparing string with numerical type _compare_result = compare(((Number) v1).doubleValue(), Double.parseDouble(((String) v2))); } else if (Tuple.isNumericalType(type_for_object2) && type_for_object1 == Tuple.STRING_TYPE) { // comparing string with numerical type _compare_result = compare(Double.parseDouble(((String) v1)), ((Number) v2).doubleValue()); } else if (Tuple.isNumericalType(type_for_object1) && Tuple.isNumericalType(type_for_object2)) { // both are numerical type, but not exact the same LOGGER.debug("Comparing two different numberical type:" + Tuple.getTypeString(type_for_object1) + " vs " + Tuple.getTypeString(type_for_object2)); _compare_result = compare(((Number) v1).doubleValue(), ((Number) v2).doubleValue()); } else if (Tuple.isDateType(type_for_object1) && Tuple.isDateType(type_for_object2)) { // both are date type, but not exact the same LOGGER.debug("Comparing two different date type:" + Tuple.getTypeString(type_for_object1) + " vs " + Tuple.getTypeString(type_for_object2)); if (type_for_object1 == Tuple.TIME_TYPE || type_for_object2 == Tuple.TIME_TYPE) { // cannot compare java.sql.Time type with java.sql.Date or java.sql.Timestamp throw new IllegalArgumentException( "Cannot compare two columns with different types, column1 type:" + Tuple.getTypeString(Tuple.getType(object1)) + ", colum2 type:" + Tuple.getTypeString(Tuple.getType(object2))); } // one of them is java.sql.Date, the other one is java.sql.Timestamp, java.util.Date d1 = (java.util.Date) v1; java.util.Date d2 = (java.util.Date) v2; _compare_result = d1.compareTo(d2); } else { throw new IllegalArgumentException("Cannot compare two columns with different types, column1 type:" + Tuple.getTypeString(Tuple.getType(object1)) + ", colum2 type:" + Tuple.getTypeString(Tuple.getType(object2))); } } // comparing complete if (_compare_result == Integer.MAX_VALUE) throw new IllegalArgumentException(); else return _compare_result; } private TreeMap<String, Integer> getIdxMapping(Sorter[] sorters) { // the ordering of the values of the columns in k1 and k2 // are sorted by the column name's alphabetic ordering, // see {@link Tuple#write}. We might not have the schema // of the tuple here, so we need to build the index according // to the selected columns from the sorters in alphabetic order, // then we can get the value directly using index. if (_IDX_MAPPING == null) { _IDX_MAPPING = new TreeMap<String, Integer>(String.CASE_INSENSITIVE_ORDER); List<String> columnNames = new ArrayList<String>(); for (Sorter aSorter : sorters) { columnNames.add(aSorter.getColumn().toLowerCase()); } Collections.sort(columnNames); for (int i = 0; i < columnNames.size(); i++) { _IDX_MAPPING.put(columnNames.get(i), i); } } return _IDX_MAPPING; } private Object getValue(Tuple t, String columnName, boolean forceNumeric, Sorter[] sorters) { Object v = null; if (t.getSchema().length == 0) { // t is de-serialized from bytes, and schema // has not set, used the idxMapping to get the // value int columnIdx = this.getIdxMapping(sorters).get(columnName); if (forceNumeric) v = t.getDouble(columnIdx, Double.NaN); else v = t.get(columnIdx); // use its original value } else { // this tuple has schema, use the column name // directly to get the value if (forceNumeric) v = t.getDouble(columnName, Double.NaN); else v = t.get(columnName); } return v; } private static int compare(double v1, double v2) { return Double.compare(v1, v2); } private static int compare(String v1, String v2) { return v1.compareTo(v2); } private static int compare(TreeMap<String, String> m1, TreeMap<String, String> m2) { int _COMPARE_RESULT = Integer.MAX_VALUE; int m1_size = m1.size(); int m2_size = m2.size(); if (m1_size == 0 || m2_size == 0) { if (m1_size == m2_size) return 0; else if (m1_size != 0) return 1; else return -1; } Iterator<String> k1_it = m1.keySet().iterator(); Iterator<String> k2_it = m2.keySet().iterator(); boolean hasDiff = false; while (k1_it.hasNext()) { String k1 = k1_it.next(); if (k2_it.hasNext()) { String k2 = k2_it.next(); _COMPARE_RESULT = String.CASE_INSENSITIVE_ORDER.compare(k1, k2); if (_COMPARE_RESULT == 0) { // same key, check their value String v1 = m1.get(k1); String v2 = m2.get(k2); _COMPARE_RESULT = v1.compareTo(v2); } } else { // m1 has more keys than m2 and m1 has the same // values for all the keys in m2 _COMPARE_RESULT = 1; } if (_COMPARE_RESULT != 0 && _COMPARE_RESULT != Integer.MAX_VALUE) { hasDiff = true; break;// has result } } if (!hasDiff) { if (k2_it.hasNext()) { // m2 has more keys than m1, and m2 has the same // values for all the keys in m1 _COMPARE_RESULT = -1; } else { // m1 and m2 are the same } } return _COMPARE_RESULT; } private static int compare(Comparable v1, Comparable v2) { return v1.compareTo(v2); } private static final class ObjectReader { private Object obj; private DataInput in; private boolean fromIO; private ReadFieldImpl reader; private List<Object> value = new ArrayList<Object>(1); private byte type; public ObjectReader(Object obj, byte type) { fromIO = (obj instanceof DataInput); this.obj = obj; if (fromIO) { this.in = (DataInput) obj; this.reader = new ReadFieldImpl(value, this.in, null); this.type = type; } else this.type = Tuple.getType(this.obj); } public Object getValue() throws IOException { Object result = null; if (this.fromIO) { this.reader.handle(type); result = this.value.get(0); } else result = this.obj; if (result instanceof ResultWrapper) { result = ((ResultWrapper) result).getCombinedResult(); } return result; } } }