com.ebay.erl.mobius.util.Util.java Source code

Java tutorial

Introduction

Here is the source code for com.ebay.erl.mobius.util.Util.java

Source

package com.ebay.erl.mobius.util;

import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.Reporter;

import com.ebay.erl.mobius.core.ConfigureConstants;
import com.ebay.erl.mobius.core.collection.BigTupleList;
import com.ebay.erl.mobius.core.model.Tuple;

/**
 * <p>
 * This product is licensed under the Apache License,  Version 2.0, 
 * available at http://www.apache.org/licenses/LICENSE-2.0.
 * 
 * This product contains portions derived from Apache hadoop which is 
 * licensed under the Apache License, Version 2.0, available at 
 * http://hadoop.apache.org.
 * 
 *  2007  2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan
 *
 */
public class Util {
    public static final String[] ZERO_SIZE_STRING_ARRAY = new String[0];

    private static final Map<String, Class<?>> _CLASS_MAPPING = new HashMap<String, Class<?>>();

    /**
     * Get the {@link Class} reference by the given
     * <code>fullClassName</code>.
     */
    public static Class<?> getClass(String fullClassName) {
        Class<?> clazz = null;
        synchronized (_CLASS_MAPPING) {
            if ((clazz = _CLASS_MAPPING.get(fullClassName)) == null) {
                try {
                    clazz = Class.forName(fullClassName);
                    _CLASS_MAPPING.put(fullClassName, clazz);
                } catch (ClassNotFoundException e) {
                    throw new RuntimeException(e);
                }
            }
        }
        return clazz;
    }

    /**
     * Get the instance by the given <code>fullClassName</code>.
     */
    public static Object newInstance(String fullClassName) {
        try {
            return getClass(fullClassName).newInstance();
        } catch (InstantiationException e) {
            throw new RuntimeException(e);
        } catch (IllegalAccessException e) {
            throw new RuntimeException(e);
        }
    }

    public static Iterable<Tuple> crossProduct(Configuration conf, Reporter reporter, List<BigTupleList> datasets)
            throws IOException {
        BigTupleList[] data = new BigTupleList[datasets.size()];
        for (int i = 0; i < datasets.size(); i++) {
            data[i] = datasets.get(i);
        }
        return crossProduct(conf, reporter, data);
    }

    /**
     * Perform cross product for the given <code>datasets</code>
     */
    public static Iterable<Tuple> crossProduct(Configuration conf, Reporter reporter, Iterable<Tuple>... datasets)
            throws IOException {
        // no need to cross product if there is only one dataset
        if (datasets.length == 1)
            return datasets[0];

        BigTupleList result = new BigTupleList(reporter);
        result.addAll(datasets[0]);

        for (int i = 1; i < datasets.length; i++) {
            Iterable<Tuple> dataset1 = result;
            Iterable<Tuple> dataset2 = datasets[i];

            if (dataset2 != null) {
                BigTupleList temp = new BigTupleList(reporter);
                Iterator<Tuple> it1 = dataset1.iterator();
                while (it1.hasNext()) {
                    Tuple rowFromDS1 = it1.next();

                    Iterator<Tuple> it2 = dataset2.iterator();
                    while (it2.hasNext()) {
                        Tuple merged = Tuple.merge(rowFromDS1, it2.next());
                        temp.add(merged);
                    }
                    close(it2);
                }
                close(it1);
                result.clear();
                result = null;
                result = temp;
            }
        }
        return result;
    }

    public static Iterable<Tuple> inMemoryCrossProduct(Iterable<Tuple>... datasets) {
        // no need to cross product if there is only one dataset
        if (datasets.length == 1)
            return datasets[0];

        List<Tuple> result = new ArrayList<Tuple>();
        for (Tuple aTuple : datasets[0]) {
            result.add(aTuple);
        }

        for (int i = 1; i < datasets.length; i++) {
            Iterable<Tuple> dataset1 = result;
            Iterable<Tuple> dataset2 = datasets[i];

            List<Tuple> temp = new ArrayList<Tuple>();
            for (Tuple rowFromDS1 : dataset1) {
                for (Tuple rowFromDS2 : dataset2) {
                    temp.add(Tuple.merge(rowFromDS1, rowFromDS2));
                }
            }
            result.clear();
            result = null;
            result = temp;
        }
        return result;
    }

    /**
     * Merge the given <code>confs</code> into ones.
     * <p>
     * 
     * The value from same property key in the later
     * configuration objects in the <code>confs</code>
     * will override the previous one.  
     * 
     * @return a new Configuration that has all the values
     * in the given <code>confs</code> list.
     */
    public static Configuration merge(Configuration... confs) {
        Configuration newConf = new Configuration(false);
        for (Configuration aConf : confs) {
            Iterator<Entry<String, String>> it = aConf.iterator();
            while (it.hasNext()) {
                Entry<String, String> anEntry = it.next();
                if (anEntry.getKey().equals(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING)) {
                    // handle ConfigureConstants.DATASET_ID_TO_NAME_MAPPING differently, as
                    // this key is set by per dataset, Configuration generated by each dataset
                    // is independent with each other.
                    String existingMapping = newConf.get(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING, "");
                    if (existingMapping.isEmpty()) {
                        newConf.set(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING, anEntry.getValue());
                    } else {
                        newConf.set(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING,
                                existingMapping + "," + anEntry.getValue());
                    }
                } else {
                    newConf.set(anEntry.getKey(), anEntry.getValue());
                }
            }
        }
        return newConf;
    }

    public boolean equalContent(File f1, File f2) throws IOException {
        if (f1.length() != f2.length())
            return false;

        BufferedReader br1 = null;
        BufferedReader br2 = null;
        try {
            br1 = new BufferedReader(new FileReader(f1));
            br2 = new BufferedReader(new FileReader(f2));

            String nl1 = null;
            String nl2 = null;
            while (true) {
                nl1 = br1.readLine();
                nl2 = br1.readLine();

                if (nl1 != null && nl2 != null) {
                    if (!nl1.equals(nl2)) {
                        return false;
                    }
                } else if (nl1 == null && nl2 == null) {
                    // reach EOF same time, and not difference so far
                    return true;
                } else {
                    // one of them is EOF, but the other is not.
                    return false;
                }
            }
        } finally {
            try {
                if (br1 != null)
                    br1.close();
            } catch (Throwable e) {
            }
            try {
                if (br2 != null)
                    br2.close();
            } catch (Throwable e) {
            }
        }
    }

    public static int findBoundary(Object[] sorted, Object x, Comparator<Object> comparator, boolean isUpper) {
        int start = 0;
        int end = sorted.length - 1;

        while (start <= end) {
            int mid = (start + end) / 2;
            int diff = comparator.compare(sorted[mid], x);
            if (diff == 0) {
                if (isUpper) {
                    if (mid == sorted.length - 1)// already at the end of the array
                        return mid;
                    else {
                        if (comparator.compare(sorted[mid + 1], x) > 0) {
                            // the next element is greater than x, 
                            // found the upper bound
                            return mid;
                        } else {
                            // the next element is same as x, move
                            // the start to mid+1
                            start = mid + 1;
                        }
                    }
                } // end of upper bound
                else {
                    if (mid == 0)// already at the begin of the array
                        return mid;
                    else {
                        if (comparator.compare(sorted[mid - 1], x) < 0) {
                            // the previous element is smaller than x, 
                            // found the lower bound
                            return mid;
                        } else {
                            // the previous element is same as x, move
                            // the end to mid-1
                            end = mid - 1;
                        }
                    }
                }
            } else if (diff > 0) {
                end = mid - 1;
            } else {
                start = mid + 1;
            }
        }

        return -1;
    }

    public static int findUpperBound(Object[] sorted, Object x, Comparator<Object> comparator) {
        return findBoundary(sorted, x, comparator, true);
    }

    public static int findLowerBound(Object[] sorted, Object x, Comparator<Object> comparator) {
        return findBoundary(sorted, x, comparator, false);
    }

    public static int findRepeatTimes(Object[] sorted, Object x, Comparator<Object> comparator) {
        int upper = findUpperBound(sorted, x, comparator);
        if (upper >= 0) {
            return upper - findLowerBound(sorted, x, comparator) + 1;
        }
        return -1;
    }

    @SuppressWarnings("unchecked")
    public static <T> List<T> findByType(List<? super T> list, Class<T> type) {
        if (list == null)
            throw new NullPointerException("<list parameter cannot be null");

        if (type == null)
            throw new NullPointerException("type parameter cannot be null");

        List<T> subclasses = new ArrayList<T>();
        for (Object e : list) {

            if (type.isAssignableFrom(e.getClass())) {
                subclasses.add((T) e);
            }
        }
        return subclasses;
    }

    @SuppressWarnings("unchecked")
    public static <U, T extends U> T[] findByType(U[] list, Class<T> type) {
        List<T> result = findByType(Arrays.asList(list), type);
        T[] t = (T[]) Array.newInstance(type, result.size());
        for (int i = 0; i < result.size(); i++)
            t[i] = result.get(i);
        return t;
    }

    public static <E> void close(Iterator<E> it) throws IOException {
        if (it != null && it instanceof Closeable) {
            ((Closeable) it).close();
        }
    }

    public static List<String> nonRegexSplit(String source, String delimiter) {
        if (source == null)
            throw new NullPointerException("source cannot be null.");

        if (delimiter == null)
            throw new NullPointerException("delimiter cannot be null");

        if (delimiter.isEmpty())
            throw new IllegalArgumentException("delimiter cannot be empty string.");

        final int length = delimiter.length();
        int start = 0;
        int end = source.indexOf(delimiter, start);

        List<String> result = new ArrayList<String>();
        while ((end = source.indexOf(delimiter, start)) >= 0) {
            result.add(source.substring(start, end));
            start = end + length;
        }

        if (source.endsWith(delimiter))// when the string ends with delimiter
            result.add("");
        if (start < source.length())
            result.add(source.substring(start));

        return result;
    }
}