com.ebay.erl.mobius.core.model.TupleColumnComparator.java Source code

Java tutorial

Introduction

Here is the source code for com.ebay.erl.mobius.core.model.TupleColumnComparator.java

Source

package com.ebay.erl.mobius.core.model;

import java.io.DataInput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableComparable;

import com.ebay.erl.mobius.core.datajoin.DataJoinKey;
import com.ebay.erl.mobius.core.sort.Sorter;
import com.ebay.erl.mobius.core.sort.Sorter.Ordering;

/**
 * Comparator for comparing values from two columns (object1 and 
 * object2 in the compare method).
 * <p>
 * 
 * This class supports comparing exchangeable type values, such as
 * comparing a number in string format to a long.
 * 
 * <p>
 * This product is licensed under the Apache License,  Version 2.0, 
 * available at http://www.apache.org/licenses/LICENSE-2.0.
 * 
 * This product contains portions derived from Apache hadoop which is 
 * licensed under the Apache License, Version 2.0, available at 
 * http://hadoop.apache.org.
 * 
 *  2007  2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
 */
@SuppressWarnings("unchecked")
public class TupleColumnComparator {
    private static final Log LOGGER = LogFactory.getLog(TupleColumnComparator.class);

    private byte type_for_object1;

    private byte type_for_object2;

    private static TreeMap<String, Integer> _IDX_MAPPING;

    public void setType(byte type_for_object1, byte type_for_object2) {
        this.type_for_object1 = type_for_object1;
        this.type_for_object2 = type_for_object2;
    }

    /**
     * to be called in {@link DataJoinKey}
     */
    public int compareKey(WritableComparable key1, WritableComparable key2, Sorter[] sorters, Configuration conf) {
        if (sorters.length == 0) {
            return key1.compareTo(key2);
        } else {
            // when this method is called, and sorters is not null,
            // key1 and key2 must be Tuple instances.
            Tuple k1 = (Tuple) key1;
            Tuple k2 = (Tuple) key2;

            // the sorting priority is the same as the sorters 
            // array
            for (int i = 0; i < sorters.length; i++) {
                Sorter aSorter = sorters[i];

                String columnName = aSorter.getColumn();
                boolean forceNumeric = aSorter.forceSortNumerically();

                //////////////////////////////
                // get value from k1, k2
                //////////////////////////////
                Object v1 = this.getValue(k1, columnName, forceNumeric, sorters);
                Object v2 = this.getValue(k2, columnName, forceNumeric, sorters);

                //////////////////////
                // start to compare
                //////////////////////            
                int result;
                try {
                    this.setType(Tuple.getType(v1), Tuple.getType(v2));
                    result = this.compare(v1, v2, conf);
                    if (result != 0) {
                        // ordering is decided, return the result
                        Ordering ordering = aSorter.getOrdering();
                        switch (ordering) {
                        case ASC:
                            // remain the save
                            return result;
                        case DESC:
                            // reverse the ordering
                            return -result;
                        default:
                            throw new IllegalArgumentException(ordering + " is not a supported ordering.");
                        }
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Cannot performe compareKey", e);
                }
            }

        }
        return 0;
    }

    public int compare(Object object1, Object object2, Configuration conf) throws IOException {
        int _compare_result = Integer.MAX_VALUE;

        ObjectReader reader1 = new ObjectReader(object1, type_for_object1);
        ObjectReader reader2 = new ObjectReader(object2, type_for_object2);

        final Object v1 = reader1.getValue();
        final Object v2 = reader2.getValue();

        setType(Tuple.getType(v1), Tuple.getType(v2));

        if (type_for_object1 == type_for_object2) {
            final byte equal_type = type_for_object1;
            final TupleColumnComparator cmp = new TupleColumnComparator();
            cmp.setType(equal_type, equal_type);
            TupleTypeHandler<Integer> equalTypeComparator = new TupleTypeHandler<Integer>() {

                @Override
                protected Integer on_array() throws IOException {
                    Array a1 = (Array) v1;
                    Array a2 = (Array) v2;

                    return a1.compareTo(a2);
                }

                @Override
                protected Integer on_boolean() throws IOException {
                    return compare((Boolean) v1, (Boolean) v2);
                }

                @Override
                protected Integer on_byte() throws IOException {
                    return compare((Byte) v1, (Byte) v2);
                }

                @Override
                protected Integer on_byte_array() throws IOException {
                    byte[] b1 = (byte[]) v1;
                    byte[] b2 = (byte[]) v2;
                    int diff = b1.length - b2.length;
                    if (diff == 0) {
                        // equal size
                        for (int i = 0; i < b1.length; i++) {
                            diff = compare((Byte) b1[i], (Byte) b2[i]);
                            if (diff != 0)
                                return diff;
                        }
                    }
                    return diff;
                }

                @Override
                protected Integer on_date() throws IOException {
                    return compare((java.sql.Date) v1, (java.sql.Date) v2);
                }

                @Override
                protected Integer on_default() throws IOException {
                    throw new IllegalArgumentException("Unsupported type [" + String.format("0x%02X", type) + "]");
                }

                @Override
                protected Integer on_double() throws IOException {
                    return compare((Double) v1, (Double) v2);
                }

                @Override
                protected Integer on_float() throws IOException {
                    return compare((Float) v1, (Float) v2);
                }

                @Override
                protected Integer on_integer() throws IOException {
                    return compare((Integer) v1, (Integer) v2);
                }

                @Override
                protected Integer on_long() throws IOException {
                    return compare((Long) v1, (Long) v2);
                }

                @Override
                protected Integer on_null() throws IOException {
                    return 0;
                }

                @Override
                protected Integer on_null_writable() throws IOException {
                    return 0;
                }

                @Override
                protected Integer on_result_wrapper() throws IOException {

                    ResultWrapper w1 = (ResultWrapper) v1;
                    ResultWrapper w2 = (ResultWrapper) v2;
                    return cmp.compare(w1.getCombinedResult(), w2.getCombinedResult(), null);
                }

                @Override
                protected Integer on_serializable() throws IOException {
                    return compare((Comparable) v1, (Comparable) v2);
                }

                @Override
                protected Integer on_short() throws IOException {
                    return compare((Short) v1, (Short) v2);
                }

                @Override
                protected Integer on_string() throws IOException {
                    return compare((String) v1, (String) v2);
                }

                @Override
                protected Integer on_string_map() throws IOException {
                    TreeMap<String, String> m1 = (TreeMap<String, String>) v1;
                    TreeMap<String, String> m2 = (TreeMap<String, String>) v2;
                    return compare(m1, m2);
                }

                @Override
                protected Integer on_time() throws IOException {
                    return compare((java.sql.Time) v1, (java.sql.Time) v2);
                }

                @Override
                protected Integer on_timestamp() throws IOException {
                    return compare((java.sql.Timestamp) v1, (java.sql.Timestamp) v2);
                }

                @Override
                protected Integer on_tuple() throws IOException {
                    Tuple t1 = (Tuple) v1;
                    Tuple t2 = (Tuple) v2;
                    return t1.compareTo(t2);
                }

                @Override
                protected Integer on_writable() throws IOException {
                    return compare((Comparable) v1, (Comparable) v2);
                }
            };

            _compare_result = equalTypeComparator.handle(equal_type);

        } else {
            // different type
            if (type_for_object1 == Tuple.NULL_TYPE || type_for_object2 == Tuple.NULL_TYPE) {
                // one of them is null type
                _compare_result = (type_for_object1 == Tuple.NULL_TYPE) ? -1 : 1;
            }

            else if (Tuple.isNumericalType(type_for_object1) && type_for_object2 == Tuple.STRING_TYPE) {
                // comparing string with numerical type
                _compare_result = compare(((Number) v1).doubleValue(), Double.parseDouble(((String) v2)));
            } else if (Tuple.isNumericalType(type_for_object2) && type_for_object1 == Tuple.STRING_TYPE) {
                // comparing string with numerical type
                _compare_result = compare(Double.parseDouble(((String) v1)), ((Number) v2).doubleValue());
            }

            else if (Tuple.isNumericalType(type_for_object1) && Tuple.isNumericalType(type_for_object2)) {
                // both are numerical type, but not exact the same

                LOGGER.debug("Comparing two different numberical type:" + Tuple.getTypeString(type_for_object1)
                        + " vs " + Tuple.getTypeString(type_for_object2));

                _compare_result = compare(((Number) v1).doubleValue(), ((Number) v2).doubleValue());

            } else if (Tuple.isDateType(type_for_object1) && Tuple.isDateType(type_for_object2)) {
                // both are date type, but not exact the same
                LOGGER.debug("Comparing two different date type:" + Tuple.getTypeString(type_for_object1) + " vs "
                        + Tuple.getTypeString(type_for_object2));

                if (type_for_object1 == Tuple.TIME_TYPE || type_for_object2 == Tuple.TIME_TYPE) {
                    // cannot compare java.sql.Time type with java.sql.Date or java.sql.Timestamp
                    throw new IllegalArgumentException(
                            "Cannot compare two columns with different types, column1 type:"
                                    + Tuple.getTypeString(Tuple.getType(object1)) + ", colum2 type:"
                                    + Tuple.getTypeString(Tuple.getType(object2)));
                }

                // one of them is java.sql.Date, the other one is java.sql.Timestamp,
                java.util.Date d1 = (java.util.Date) v1;
                java.util.Date d2 = (java.util.Date) v2;
                _compare_result = d1.compareTo(d2);
            }

            else {
                throw new IllegalArgumentException("Cannot compare two columns with different types, column1 type:"
                        + Tuple.getTypeString(Tuple.getType(object1)) + ", colum2 type:"
                        + Tuple.getTypeString(Tuple.getType(object2)));
            }
        }

        // comparing complete
        if (_compare_result == Integer.MAX_VALUE)
            throw new IllegalArgumentException();
        else
            return _compare_result;
    }

    private TreeMap<String, Integer> getIdxMapping(Sorter[] sorters) {
        // the ordering of the values of the columns in k1 and k2 
        // are sorted by the column name's alphabetic ordering, 
        // see {@link Tuple#write}.  We might not have the schema
        // of the tuple here, so we need to build the index according
        // to the selected columns from the sorters in alphabetic order,
        // then we can get the value directly using index.   
        if (_IDX_MAPPING == null) {
            _IDX_MAPPING = new TreeMap<String, Integer>(String.CASE_INSENSITIVE_ORDER);

            List<String> columnNames = new ArrayList<String>();
            for (Sorter aSorter : sorters) {
                columnNames.add(aSorter.getColumn().toLowerCase());
            }
            Collections.sort(columnNames);
            for (int i = 0; i < columnNames.size(); i++) {
                _IDX_MAPPING.put(columnNames.get(i), i);
            }
        }
        return _IDX_MAPPING;
    }

    private Object getValue(Tuple t, String columnName, boolean forceNumeric, Sorter[] sorters) {
        Object v = null;
        if (t.getSchema().length == 0) {
            // t is de-serialized from bytes, and schema
            // has not set, used the idxMapping to get the
            // value
            int columnIdx = this.getIdxMapping(sorters).get(columnName);
            if (forceNumeric)
                v = t.getDouble(columnIdx, Double.NaN);
            else
                v = t.get(columnIdx); // use its original value
        } else {
            // this tuple has schema, use the column name
            // directly to get the value
            if (forceNumeric)
                v = t.getDouble(columnName, Double.NaN);
            else
                v = t.get(columnName);
        }
        return v;
    }

    private static int compare(double v1, double v2) {
        return Double.compare(v1, v2);
    }

    private static int compare(String v1, String v2) {
        return v1.compareTo(v2);
    }

    private static int compare(TreeMap<String, String> m1, TreeMap<String, String> m2) {
        int _COMPARE_RESULT = Integer.MAX_VALUE;

        int m1_size = m1.size();
        int m2_size = m2.size();

        if (m1_size == 0 || m2_size == 0) {
            if (m1_size == m2_size)
                return 0;
            else if (m1_size != 0)
                return 1;
            else
                return -1;
        }

        Iterator<String> k1_it = m1.keySet().iterator();
        Iterator<String> k2_it = m2.keySet().iterator();

        boolean hasDiff = false;

        while (k1_it.hasNext()) {
            String k1 = k1_it.next();

            if (k2_it.hasNext()) {
                String k2 = k2_it.next();

                _COMPARE_RESULT = String.CASE_INSENSITIVE_ORDER.compare(k1, k2);
                if (_COMPARE_RESULT == 0) {
                    // same key, check their value
                    String v1 = m1.get(k1);
                    String v2 = m2.get(k2);

                    _COMPARE_RESULT = v1.compareTo(v2);
                }
            } else {
                // m1 has more keys than m2 and m1 has the same
                // values for all the keys in m2
                _COMPARE_RESULT = 1;
            }

            if (_COMPARE_RESULT != 0 && _COMPARE_RESULT != Integer.MAX_VALUE) {
                hasDiff = true;
                break;// has result
            }
        }

        if (!hasDiff) {
            if (k2_it.hasNext()) {
                // m2 has more keys than m1, and m2 has the same
                // values for all the keys in m1
                _COMPARE_RESULT = -1;
            } else {
                // m1 and m2 are the same

            }
        }

        return _COMPARE_RESULT;
    }

    private static int compare(Comparable v1, Comparable v2) {
        return v1.compareTo(v2);
    }

    private static final class ObjectReader {
        private Object obj;
        private DataInput in;
        private boolean fromIO;

        private ReadFieldImpl reader;

        private List<Object> value = new ArrayList<Object>(1);

        private byte type;

        public ObjectReader(Object obj, byte type) {
            fromIO = (obj instanceof DataInput);
            this.obj = obj;
            if (fromIO) {
                this.in = (DataInput) obj;
                this.reader = new ReadFieldImpl(value, this.in, null);
                this.type = type;
            } else
                this.type = Tuple.getType(this.obj);
        }

        public Object getValue() throws IOException {
            Object result = null;
            if (this.fromIO) {
                this.reader.handle(type);
                result = this.value.get(0);
            } else
                result = this.obj;

            if (result instanceof ResultWrapper) {
                result = ((ResultWrapper) result).getCombinedResult();
            }

            return result;
        }
    }
}