ml.shifu.core.util.CommonUtils.java Source code

Java tutorial

Introduction

Here is the source code for ml.shifu.core.util.CommonUtils.java

Source

/**
 * Copyright [2012-2014] eBay Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ml.shifu.core.util;

import com.google.common.base.Function;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import ml.shifu.core.container.fieldMeta.FieldMeta;
import ml.shifu.core.exception.MalformedDataException;
import ml.shifu.core.exception.SizeMismatchException;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.Map.Entry;

/**
 * {@link CommonUtils} is used to for almost all kinds of utility function in this framework.
 */
public final class CommonUtils {

    private static final Logger LOG = LoggerFactory.getLogger(CommonUtils.class);

    /**
     * Avoid using new for our utility class.
     */
    private CommonUtils() {
    }

    public static double getDoubleOrElse(Object o, Double defaultValue) {
        try {
            return Double.parseDouble(o.toString());
        } catch (Exception e) {
            return defaultValue;
        }
    }

    public static String toStringOrEmpty(Object o) {
        if (o == null) {
            return "";
        }
        return o.toString();
    }

    public static String[] loadHeader(String path, String delimiter) throws IOException {
        LOG.info("Loading header from: " + path);
        try (InputStream is = new FileInputStream(path)) {
            return loadHeader(is, delimiter);
        }
    }

    public static String[] loadHeader(InputStream is, String delimiter) {
        Set<String> nameSet = new HashSet<>();

        try (Scanner scanner = new Scanner(is)) {
            String headerLine = scanner.nextLine().trim();
            if (StringUtils.isEmpty(headerLine)) {
                throw new MalformedDataException("Header is empty");
            }
            LOG.info("Delimiter: " + delimiter);
            String[] header = StringUtils.splitPreserveAllTokens(headerLine, delimiter);

            LOG.info("Number of Fields: " + header.length);
            LOG.info(Arrays.toString(header));

            for (int i = 0; i < header.length; i++) {
                header[i] = header[i].trim();

                if (StringUtils.isEmpty(header[i])) {
                    throw new MalformedDataException("Field is empty: #" + i + ", " + headerLine);
                }

                if (nameSet.contains(header[i])) {
                    throw new MalformedDataException("Duplicated field names: " + header[i]);
                }
                nameSet.add(header[i]);
            }
            return header;
        }
    }

    public static List<String> readAllLines(String path) throws IOException {
        return Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8);
    }

    /**
     * Get relative column name from pig header. For example, one column is a::b, return b. If b, return b.
     *
     * @throws NullPointerException if parameter raw is null.
     */
    public static String getRelativePigHeaderColumnName(String raw) {
        int position = raw.lastIndexOf(Constants.PIG_COLUMN_SEPARATOR);
        return position >= 0 ? raw.substring(position + Constants.PIG_COLUMN_SEPARATOR.length()) : raw;
    }

    /**
     * Return the real bin number for one value. As the first bin value is NEGATIVE_INFINITY, invalid index is 0, not
     * -1.
     *
     * @param binBoundary bin boundary list which should be sorted.
     * @throws IllegalArgumentException if binBoundary is null or empty.
     */
    private static int getNumericBinNum(List<Double> binBoundary, double value) {
        if (binBoundary == null || binBoundary.isEmpty()) {
            throw new IllegalArgumentException("binBoundary should not be null or empty.");
        }

        int n = binBoundary.size() - 1;
        while (n > 0 && value < binBoundary.get(n)) {
            n--;
        }
        return n;
    }

    /**
     * Common split function to ignore special character like '|'. It's better to return a list while many calls in our
     * framework using string[].
     *
     * @throws IllegalArgumentException {@code raw} and {@code delimiter} is null or empty.
     */
    public static String[] split(String raw, String delimiter) {
        List<String> split = splitAndReturnList(raw, delimiter);
        return split.toArray(new String[split.size()]);
    }

    /**
     * Common split function to ignore special character like '|'.
     *
     * @throws IllegalArgumentException {@code raw} and {@code delimiter} is null or empty.
     */
    public static List<String> splitAndReturnList(String raw, String delimiter) {
        if (StringUtils.isEmpty(raw) || StringUtils.isEmpty(delimiter)) {
            throw new IllegalArgumentException(String
                    .format("raw and delimiter should not be null or empty, raw:%s, delimiter:%s", raw, delimiter));
        }
        List<String> headerList = new ArrayList<String>();
        for (String str : Splitter.on(delimiter).split(raw)) {
            headerList.add(str);
        }

        return headerList;

    }

    /**
     * Return one HashMap Object contains keys in the first parameter, values in the second parameter. Before calling
     * this method, you should be aware that headers should be unique.
     *
     * @throws IllegalArgumentException if lengths of two arrays are not the same.
     * @throws NullPointerException     if header or data is null.
     */
    public static Map<String, String> createRawDataMap(String[] header, String[] data) {
        if (header.length != data.length) {
            throw new IllegalArgumentException(String
                    .format("Header/Data mismatch: Header length %s, Data length %s", header.length, data.length));
        }

        Map<String, String> rawDataMap = new HashMap<String, String>(header.length);
        for (int i = 0; i < header.length; i++) {
            rawDataMap.put(header[i], data[i]);
        }
        return rawDataMap;
    }

    // For UDF
    public static Map<String, String> createRawDataMap(FieldMeta fieldMeta, List<Object> data) {

        String[] header = fieldMeta.getHeader();
        return createRawDataMap(header, data.toArray());

    }

    public static Map<String, String> createRawDataMap(String[] header, Object[] data) {

        if (header.length != data.length) {
            throw new SizeMismatchException("FieldMeta", header.length, "Tuple", data.length);
        }

        Map<String, String> rawDataMap = new HashMap<>(header.length);
        for (int i = 0; i < header.length; i++) {
            rawDataMap.put(header[i], toStringOrEmpty(data[i]));
        }
        return rawDataMap;
    }

    /**
     * Change list str to List object with double type.
     *
     * @throws IllegalArgumentException if str is not a valid list str: [1,2].
     */
    public static List<Double> stringToDoubleList(String str) {
        List<String> list = checkAndReturnSplitCollections(str);

        return Lists.transform(list, new Function<String, Double>() {
            @Override
            public Double apply(String input) {
                return Double.valueOf(input.trim());
            }
        });
    }

    private static List<String> checkAndReturnSplitCollections(String str) {
        checkListStr(str);
        return Arrays.asList(str.trim().substring(1, str.length() - 1).split(Constants.COMMA));
    }

    private static void checkListStr(String str) {
        if (StringUtils.isEmpty(str)) {
            throw new IllegalArgumentException("str should not be null or empty");
        }
        if (!str.startsWith("[") || !str.endsWith("]")) {
            throw new IllegalArgumentException("Invalid list string format, should be like '[1,2,3]'");
        }
    }

    /**
     * Change list str to List object with integer type.
     *
     * @throws IllegalArgumentException if str is not a valid list str.
     */
    public static List<Integer> stringToIntegerList(String str) {
        List<String> list = checkAndReturnSplitCollections(str);
        return Lists.transform(list, new Function<String, Integer>() {
            @Override
            public Integer apply(String input) {
                return Integer.valueOf(input.trim());
            }
        });
    }

    /**
     * Change list str to List object with string type.
     *
     * @throws IllegalArgumentException if str is not a valid list str.
     */
    public static List<String> stringToStringList(String str) {
        List<String> list = checkAndReturnSplitCollections(str);
        return Lists.transform(list, new Function<String, String>() {
            @Override
            public String apply(String input) {
                return input.trim();
            }
        });
    }

    /**
     * Return map entries sorted by value.
     */
    public static <K, V extends Comparable<V>> List<Map.Entry<K, V>> getEntriesSortedByValues(Map<K, V> map) {
        List<Map.Entry<K, V>> entries = new LinkedList<Map.Entry<K, V>>(map.entrySet());

        Collections.sort(entries, new Comparator<Map.Entry<K, V>>() {
            @Override
            public int compare(Entry<K, V> o1, Entry<K, V> o2) {
                return o1.getValue().compareTo(o2.getValue());
            }
        });

        return entries;
    }

    /**
     * Get the file separator regex
     *
     * @return "/" - if the OS is Linux
     * "\\\\" - if the OS is Windows
     */
    public static String getPathSeparatorRegex() {
        if (File.separator.equals(Constants.SLASH)) {
            return File.separator;
        } else {
            return Constants.BACK_SLASH + File.separator;
        }
    }

    /**
     * To check whether there is targetColumn in columns or not
     *
     * @return true - if the columns contains targetColumn, or false
     */
    public static boolean isColumnExists(String[] columns, String targetColumn) {
        if (ArrayUtils.isEmpty(columns) || StringUtils.isBlank(targetColumn)) {
            return false;
        }

        for (String column : columns) {
            if (column != null && column.equalsIgnoreCase(targetColumn)) {
                return true;
            }
        }

        return false;
    }

    /**
     * Returns the element if it is in both collections.
     * - return null if any collection is null or empty
     * - return null if no element exists in both collections
     *
     * @param leftCol  - left collection
     * @param rightCol - right collection
     * @return First element that are found in both collections
     * null if no elements in both collection or any collection is null or empty
     */
    public static <T> T containsAny(Collection<T> leftCol, Collection<T> rightCol) {
        if (leftCol == null || rightCol == null || leftCol.isEmpty() || rightCol.isEmpty()) {
            return null;
        }

        for (T element : leftCol) {
            if (rightCol.contains(element)) {
                return element;
            }
        }

        return null;
    }

    /**
     * Escape the delimiter for Pig.... Since the Pig doesn't support invisible character
     *
     * @param delimiter - the original delimiter
     * @return the delimiter after escape
     */
    public static String escapePigString(String delimiter) {
        StringBuilder buf = new StringBuilder();

        for (int i = 0; i < delimiter.length(); i++) {
            char c = delimiter.charAt(i);
            switch (c) {
            case '\t':
                buf.append("\\\\t");
                break;
            default:
                buf.append(c);
                break;
            }
        }

        return buf.toString();
    }

    /**
     * Convert data into <key, value> map. The @inputData is String of a record, which is delimited by @delimiter
     * If fields in @inputData is not equal @header size, return null
     *
     * @param inputData - String of a record
     * @param delimiter - the delimiter of the input data
     * @param header    - the column names for all the input data
     * @return <key, value> map for the record
     */
    public static Map<String, String> convertDataIntoMap(String inputData, String delimiter, String[] header) {
        String[] input = CommonUtils.split(inputData, delimiter);
        if (input == null || input.length == 0 || input.length != header.length) {
            LOG.error("the wrong input data, {}", inputData);
            return null;
        }

        Map<String, String> rawDataMap = new HashMap<String, String>(input.length);
        for (int i = 0; i < header.length; i++) {
            if (input[i] == null) {
                rawDataMap.put(header[i], "");
            } else {
                rawDataMap.put(header[i], input[i]);
            }
        }

        return rawDataMap;
    }

    public static Class getClass(String name) throws ClassNotFoundException {

        return Class.forName(name);

    }

    public static boolean isValidNumber(Object raw) {
        if (raw == null) {
            return false;
        }
        Double value;
        try {
            value = Double.parseDouble(raw.toString());
        } catch (NumberFormatException e) {
            return false;
        }

        return !(Double.isNaN(value) || Double.isInfinite(value));

    }

    public static Object tryParse(String raw) {
        try {
            return Integer.valueOf(raw);
        } catch (Exception e) {
            try {
                return Double.valueOf(raw);
            } catch (Exception e1) {
                return raw;
            }
        }
    }

}