Java tutorial
package com.ebay.erl.mobius.util; import java.io.BufferedReader; import java.io.Closeable; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.Reporter; import com.ebay.erl.mobius.core.ConfigureConstants; import com.ebay.erl.mobius.core.collection.BigTupleList; import com.ebay.erl.mobius.core.model.Tuple; /** * <p> * This product is licensed under the Apache License, Version 2.0, * available at http://www.apache.org/licenses/LICENSE-2.0. * * This product contains portions derived from Apache hadoop which is * licensed under the Apache License, Version 2.0, available at * http://hadoop.apache.org. * * 2007 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan * */ public class Util { public static final String[] ZERO_SIZE_STRING_ARRAY = new String[0]; private static final Map<String, Class<?>> _CLASS_MAPPING = new HashMap<String, Class<?>>(); /** * Get the {@link Class} reference by the given * <code>fullClassName</code>. */ public static Class<?> getClass(String fullClassName) { Class<?> clazz = null; synchronized (_CLASS_MAPPING) { if ((clazz = _CLASS_MAPPING.get(fullClassName)) == null) { try { clazz = Class.forName(fullClassName); _CLASS_MAPPING.put(fullClassName, clazz); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } } return clazz; } /** * Get the instance by the given <code>fullClassName</code>. */ public static Object newInstance(String fullClassName) { try { return getClass(fullClassName).newInstance(); } catch (InstantiationException e) { throw new RuntimeException(e); } catch (IllegalAccessException e) { throw new RuntimeException(e); } } public static Iterable<Tuple> crossProduct(Configuration conf, Reporter reporter, List<BigTupleList> datasets) throws IOException { BigTupleList[] data = new BigTupleList[datasets.size()]; for (int i = 0; i < datasets.size(); i++) { data[i] = datasets.get(i); } return crossProduct(conf, reporter, data); } /** * Perform cross product for the given <code>datasets</code> */ public static Iterable<Tuple> crossProduct(Configuration conf, Reporter reporter, Iterable<Tuple>... datasets) throws IOException { // no need to cross product if there is only one dataset if (datasets.length == 1) return datasets[0]; BigTupleList result = new BigTupleList(reporter); result.addAll(datasets[0]); for (int i = 1; i < datasets.length; i++) { Iterable<Tuple> dataset1 = result; Iterable<Tuple> dataset2 = datasets[i]; if (dataset2 != null) { BigTupleList temp = new BigTupleList(reporter); Iterator<Tuple> it1 = dataset1.iterator(); while (it1.hasNext()) { Tuple rowFromDS1 = it1.next(); Iterator<Tuple> it2 = dataset2.iterator(); while (it2.hasNext()) { Tuple merged = Tuple.merge(rowFromDS1, it2.next()); temp.add(merged); } close(it2); } close(it1); result.clear(); result = null; result = temp; } } return result; } public static Iterable<Tuple> inMemoryCrossProduct(Iterable<Tuple>... datasets) { // no need to cross product if there is only one dataset if (datasets.length == 1) return datasets[0]; List<Tuple> result = new ArrayList<Tuple>(); for (Tuple aTuple : datasets[0]) { result.add(aTuple); } for (int i = 1; i < datasets.length; i++) { Iterable<Tuple> dataset1 = result; Iterable<Tuple> dataset2 = datasets[i]; List<Tuple> temp = new ArrayList<Tuple>(); for (Tuple rowFromDS1 : dataset1) { for (Tuple rowFromDS2 : dataset2) { temp.add(Tuple.merge(rowFromDS1, rowFromDS2)); } } result.clear(); result = null; result = temp; } return result; } /** * Merge the given <code>confs</code> into ones. * <p> * * The value from same property key in the later * configuration objects in the <code>confs</code> * will override the previous one. * * @return a new Configuration that has all the values * in the given <code>confs</code> list. */ public static Configuration merge(Configuration... confs) { Configuration newConf = new Configuration(false); for (Configuration aConf : confs) { Iterator<Entry<String, String>> it = aConf.iterator(); while (it.hasNext()) { Entry<String, String> anEntry = it.next(); if (anEntry.getKey().equals(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING)) { // handle ConfigureConstants.DATASET_ID_TO_NAME_MAPPING differently, as // this key is set by per dataset, Configuration generated by each dataset // is independent with each other. String existingMapping = newConf.get(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING, ""); if (existingMapping.isEmpty()) { newConf.set(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING, anEntry.getValue()); } else { newConf.set(ConfigureConstants.DATASET_ID_TO_NAME_MAPPING, existingMapping + "," + anEntry.getValue()); } } else { newConf.set(anEntry.getKey(), anEntry.getValue()); } } } return newConf; } public boolean equalContent(File f1, File f2) throws IOException { if (f1.length() != f2.length()) return false; BufferedReader br1 = null; BufferedReader br2 = null; try { br1 = new BufferedReader(new FileReader(f1)); br2 = new BufferedReader(new FileReader(f2)); String nl1 = null; String nl2 = null; while (true) { nl1 = br1.readLine(); nl2 = br1.readLine(); if (nl1 != null && nl2 != null) { if (!nl1.equals(nl2)) { return false; } } else if (nl1 == null && nl2 == null) { // reach EOF same time, and not difference so far return true; } else { // one of them is EOF, but the other is not. return false; } } } finally { try { if (br1 != null) br1.close(); } catch (Throwable e) { } try { if (br2 != null) br2.close(); } catch (Throwable e) { } } } public static int findBoundary(Object[] sorted, Object x, Comparator<Object> comparator, boolean isUpper) { int start = 0; int end = sorted.length - 1; while (start <= end) { int mid = (start + end) / 2; int diff = comparator.compare(sorted[mid], x); if (diff == 0) { if (isUpper) { if (mid == sorted.length - 1)// already at the end of the array return mid; else { if (comparator.compare(sorted[mid + 1], x) > 0) { // the next element is greater than x, // found the upper bound return mid; } else { // the next element is same as x, move // the start to mid+1 start = mid + 1; } } } // end of upper bound else { if (mid == 0)// already at the begin of the array return mid; else { if (comparator.compare(sorted[mid - 1], x) < 0) { // the previous element is smaller than x, // found the lower bound return mid; } else { // the previous element is same as x, move // the end to mid-1 end = mid - 1; } } } } else if (diff > 0) { end = mid - 1; } else { start = mid + 1; } } return -1; } public static int findUpperBound(Object[] sorted, Object x, Comparator<Object> comparator) { return findBoundary(sorted, x, comparator, true); } public static int findLowerBound(Object[] sorted, Object x, Comparator<Object> comparator) { return findBoundary(sorted, x, comparator, false); } public static int findRepeatTimes(Object[] sorted, Object x, Comparator<Object> comparator) { int upper = findUpperBound(sorted, x, comparator); if (upper >= 0) { return upper - findLowerBound(sorted, x, comparator) + 1; } return -1; } @SuppressWarnings("unchecked") public static <T> List<T> findByType(List<? super T> list, Class<T> type) { if (list == null) throw new NullPointerException("<list parameter cannot be null"); if (type == null) throw new NullPointerException("type parameter cannot be null"); List<T> subclasses = new ArrayList<T>(); for (Object e : list) { if (type.isAssignableFrom(e.getClass())) { subclasses.add((T) e); } } return subclasses; } @SuppressWarnings("unchecked") public static <U, T extends U> T[] findByType(U[] list, Class<T> type) { List<T> result = findByType(Arrays.asList(list), type); T[] t = (T[]) Array.newInstance(type, result.size()); for (int i = 0; i < result.size(); i++) t[i] = result.get(i); return t; } public static <E> void close(Iterator<E> it) throws IOException { if (it != null && it instanceof Closeable) { ((Closeable) it).close(); } } public static List<String> nonRegexSplit(String source, String delimiter) { if (source == null) throw new NullPointerException("source cannot be null."); if (delimiter == null) throw new NullPointerException("delimiter cannot be null"); if (delimiter.isEmpty()) throw new IllegalArgumentException("delimiter cannot be empty string."); final int length = delimiter.length(); int start = 0; int end = source.indexOf(delimiter, start); List<String> result = new ArrayList<String>(); while ((end = source.indexOf(delimiter, start)) >= 0) { result.add(source.substring(start, end)); start = end + length; } if (source.endsWith(delimiter))// when the string ends with delimiter result.add(""); if (start < source.length()) result.add(source.substring(start)); return result; } }