Java tutorial
/** * Copyright [2012-2014] eBay Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.core.util; import com.google.common.base.Function; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import ml.shifu.core.container.fieldMeta.FieldMeta; import ml.shifu.core.exception.MalformedDataException; import ml.shifu.core.exception.SizeMismatchException; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; import java.util.Map.Entry; /** * {@link CommonUtils} is used to for almost all kinds of utility function in this framework. */ public final class CommonUtils { private static final Logger LOG = LoggerFactory.getLogger(CommonUtils.class); /** * Avoid using new for our utility class. */ private CommonUtils() { } public static double getDoubleOrElse(Object o, Double defaultValue) { try { return Double.parseDouble(o.toString()); } catch (Exception e) { return defaultValue; } } public static String toStringOrEmpty(Object o) { if (o == null) { return ""; } return o.toString(); } public static String[] loadHeader(String path, String delimiter) throws IOException { LOG.info("Loading header from: " + path); try (InputStream is = new FileInputStream(path)) { return loadHeader(is, delimiter); } } public static String[] loadHeader(InputStream is, String delimiter) { Set<String> nameSet = new HashSet<>(); try (Scanner scanner = new Scanner(is)) { String headerLine = scanner.nextLine().trim(); if (StringUtils.isEmpty(headerLine)) { throw new MalformedDataException("Header is empty"); } LOG.info("Delimiter: " + delimiter); String[] header = StringUtils.splitPreserveAllTokens(headerLine, delimiter); LOG.info("Number of Fields: " + header.length); LOG.info(Arrays.toString(header)); for (int i = 0; i < header.length; i++) { header[i] = header[i].trim(); if (StringUtils.isEmpty(header[i])) { throw new MalformedDataException("Field is empty: #" + i + ", " + headerLine); } if (nameSet.contains(header[i])) { throw new MalformedDataException("Duplicated field names: " + header[i]); } nameSet.add(header[i]); } return header; } } public static List<String> readAllLines(String path) throws IOException { return Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8); } /** * Get relative column name from pig header. For example, one column is a::b, return b. If b, return b. * * @throws NullPointerException if parameter raw is null. */ public static String getRelativePigHeaderColumnName(String raw) { int position = raw.lastIndexOf(Constants.PIG_COLUMN_SEPARATOR); return position >= 0 ? raw.substring(position + Constants.PIG_COLUMN_SEPARATOR.length()) : raw; } /** * Return the real bin number for one value. As the first bin value is NEGATIVE_INFINITY, invalid index is 0, not * -1. * * @param binBoundary bin boundary list which should be sorted. * @throws IllegalArgumentException if binBoundary is null or empty. */ private static int getNumericBinNum(List<Double> binBoundary, double value) { if (binBoundary == null || binBoundary.isEmpty()) { throw new IllegalArgumentException("binBoundary should not be null or empty."); } int n = binBoundary.size() - 1; while (n > 0 && value < binBoundary.get(n)) { n--; } return n; } /** * Common split function to ignore special character like '|'. It's better to return a list while many calls in our * framework using string[]. * * @throws IllegalArgumentException {@code raw} and {@code delimiter} is null or empty. */ public static String[] split(String raw, String delimiter) { List<String> split = splitAndReturnList(raw, delimiter); return split.toArray(new String[split.size()]); } /** * Common split function to ignore special character like '|'. * * @throws IllegalArgumentException {@code raw} and {@code delimiter} is null or empty. */ public static List<String> splitAndReturnList(String raw, String delimiter) { if (StringUtils.isEmpty(raw) || StringUtils.isEmpty(delimiter)) { throw new IllegalArgumentException(String .format("raw and delimiter should not be null or empty, raw:%s, delimiter:%s", raw, delimiter)); } List<String> headerList = new ArrayList<String>(); for (String str : Splitter.on(delimiter).split(raw)) { headerList.add(str); } return headerList; } /** * Return one HashMap Object contains keys in the first parameter, values in the second parameter. Before calling * this method, you should be aware that headers should be unique. * * @throws IllegalArgumentException if lengths of two arrays are not the same. * @throws NullPointerException if header or data is null. */ public static Map<String, String> createRawDataMap(String[] header, String[] data) { if (header.length != data.length) { throw new IllegalArgumentException(String .format("Header/Data mismatch: Header length %s, Data length %s", header.length, data.length)); } Map<String, String> rawDataMap = new HashMap<String, String>(header.length); for (int i = 0; i < header.length; i++) { rawDataMap.put(header[i], data[i]); } return rawDataMap; } // For UDF public static Map<String, String> createRawDataMap(FieldMeta fieldMeta, List<Object> data) { String[] header = fieldMeta.getHeader(); return createRawDataMap(header, data.toArray()); } public static Map<String, String> createRawDataMap(String[] header, Object[] data) { if (header.length != data.length) { throw new SizeMismatchException("FieldMeta", header.length, "Tuple", data.length); } Map<String, String> rawDataMap = new HashMap<>(header.length); for (int i = 0; i < header.length; i++) { rawDataMap.put(header[i], toStringOrEmpty(data[i])); } return rawDataMap; } /** * Change list str to List object with double type. * * @throws IllegalArgumentException if str is not a valid list str: [1,2]. */ public static List<Double> stringToDoubleList(String str) { List<String> list = checkAndReturnSplitCollections(str); return Lists.transform(list, new Function<String, Double>() { @Override public Double apply(String input) { return Double.valueOf(input.trim()); } }); } private static List<String> checkAndReturnSplitCollections(String str) { checkListStr(str); return Arrays.asList(str.trim().substring(1, str.length() - 1).split(Constants.COMMA)); } private static void checkListStr(String str) { if (StringUtils.isEmpty(str)) { throw new IllegalArgumentException("str should not be null or empty"); } if (!str.startsWith("[") || !str.endsWith("]")) { throw new IllegalArgumentException("Invalid list string format, should be like '[1,2,3]'"); } } /** * Change list str to List object with integer type. * * @throws IllegalArgumentException if str is not a valid list str. */ public static List<Integer> stringToIntegerList(String str) { List<String> list = checkAndReturnSplitCollections(str); return Lists.transform(list, new Function<String, Integer>() { @Override public Integer apply(String input) { return Integer.valueOf(input.trim()); } }); } /** * Change list str to List object with string type. * * @throws IllegalArgumentException if str is not a valid list str. */ public static List<String> stringToStringList(String str) { List<String> list = checkAndReturnSplitCollections(str); return Lists.transform(list, new Function<String, String>() { @Override public String apply(String input) { return input.trim(); } }); } /** * Return map entries sorted by value. */ public static <K, V extends Comparable<V>> List<Map.Entry<K, V>> getEntriesSortedByValues(Map<K, V> map) { List<Map.Entry<K, V>> entries = new LinkedList<Map.Entry<K, V>>(map.entrySet()); Collections.sort(entries, new Comparator<Map.Entry<K, V>>() { @Override public int compare(Entry<K, V> o1, Entry<K, V> o2) { return o1.getValue().compareTo(o2.getValue()); } }); return entries; } /** * Get the file separator regex * * @return "/" - if the OS is Linux * "\\\\" - if the OS is Windows */ public static String getPathSeparatorRegex() { if (File.separator.equals(Constants.SLASH)) { return File.separator; } else { return Constants.BACK_SLASH + File.separator; } } /** * To check whether there is targetColumn in columns or not * * @return true - if the columns contains targetColumn, or false */ public static boolean isColumnExists(String[] columns, String targetColumn) { if (ArrayUtils.isEmpty(columns) || StringUtils.isBlank(targetColumn)) { return false; } for (String column : columns) { if (column != null && column.equalsIgnoreCase(targetColumn)) { return true; } } return false; } /** * Returns the element if it is in both collections. * - return null if any collection is null or empty * - return null if no element exists in both collections * * @param leftCol - left collection * @param rightCol - right collection * @return First element that are found in both collections * null if no elements in both collection or any collection is null or empty */ public static <T> T containsAny(Collection<T> leftCol, Collection<T> rightCol) { if (leftCol == null || rightCol == null || leftCol.isEmpty() || rightCol.isEmpty()) { return null; } for (T element : leftCol) { if (rightCol.contains(element)) { return element; } } return null; } /** * Escape the delimiter for Pig.... Since the Pig doesn't support invisible character * * @param delimiter - the original delimiter * @return the delimiter after escape */ public static String escapePigString(String delimiter) { StringBuilder buf = new StringBuilder(); for (int i = 0; i < delimiter.length(); i++) { char c = delimiter.charAt(i); switch (c) { case '\t': buf.append("\\\\t"); break; default: buf.append(c); break; } } return buf.toString(); } /** * Convert data into <key, value> map. The @inputData is String of a record, which is delimited by @delimiter * If fields in @inputData is not equal @header size, return null * * @param inputData - String of a record * @param delimiter - the delimiter of the input data * @param header - the column names for all the input data * @return <key, value> map for the record */ public static Map<String, String> convertDataIntoMap(String inputData, String delimiter, String[] header) { String[] input = CommonUtils.split(inputData, delimiter); if (input == null || input.length == 0 || input.length != header.length) { LOG.error("the wrong input data, {}", inputData); return null; } Map<String, String> rawDataMap = new HashMap<String, String>(input.length); for (int i = 0; i < header.length; i++) { if (input[i] == null) { rawDataMap.put(header[i], ""); } else { rawDataMap.put(header[i], input[i]); } } return rawDataMap; } public static Class getClass(String name) throws ClassNotFoundException { return Class.forName(name); } public static boolean isValidNumber(Object raw) { if (raw == null) { return false; } Double value; try { value = Double.parseDouble(raw.toString()); } catch (NumberFormatException e) { return false; } return !(Double.isNaN(value) || Double.isInfinite(value)); } public static Object tryParse(String raw) { try { return Integer.valueOf(raw); } catch (Exception e) { try { return Double.valueOf(raw); } catch (Exception e1) { return raw; } } } }