Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.runtime.util; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.matrix.data.FrameBlock; import org.apache.sysml.runtime.matrix.data.MatrixIndexes; import org.apache.sysml.runtime.matrix.data.NumItemsByEachReducerMetaData; import org.apache.sysml.runtime.matrix.data.Pair; import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue; public class UtilFunctions { //for accurate cast of double values to int and long //IEEE754: binary64 (double precision) eps = 2^(-53) = 1.11 * 10^(-16) //(same epsilon as used for matrix index cast in R) public static double DOUBLE_EPS = Math.pow(2, -53); //prime numbers for old hash function (divide prime close to max int, //because it determines the max hash domain size public static final long ADD_PRIME1 = 99991; public static final int DIVIDE_PRIME = 1405695061; public static int longHashCode(long v) { return (int) (v ^ (v >>> 32)); } /** * Returns the hash code for a long-long pair. This is the default * hash function for the keys of a distributed matrix in MR/Spark. * * @param key1 first long key * @param key2 second long key * @return hash code */ public static int longlongHashCode(long key1, long key2) { //basic hash mixing of two longs hashes (similar to //Arrays.hashCode(long[]) but w/o array creation/copy) int h = (int) (key1 ^ (key1 >>> 32)); return h * 31 + (int) (key2 ^ (key2 >>> 32)); } public static int nextIntPow2(int in) { int expon = (in == 0) ? 0 : 32 - Integer.numberOfLeadingZeros(in - 1); long pow2 = (long) Math.pow(2, expon); return (int) ((pow2 > Integer.MAX_VALUE) ? Integer.MAX_VALUE : pow2); } /** * Computes the 1-based block index based on the global cell index and block size meta * data. See computeCellIndex for the inverse operation. * * @param cellIndex global cell index * @param blockSize block size * @return 1-based block index */ public static long computeBlockIndex(long cellIndex, int blockSize) { return (cellIndex - 1) / blockSize + 1; } /** * Computes the 0-based cell-in-block index based on the global cell index and block * size meta data. See computeCellIndex for the inverse operation. * * @param cellIndex global cell index * @param blockSize block size * @return 0-based cell-in-block index */ public static int computeCellInBlock(long cellIndex, int blockSize) { return (int) ((cellIndex - 1) % blockSize); } /** * Computes the global 1-based cell index based on the block index, block size meta data, * and specific 0-based in-block cell index. * * NOTE: this is equivalent to cellIndexCalculation. * * @param blockIndex block index * @param blockSize block size * @param cellInBlock 0-based cell-in-block index * @return global 1-based cell index */ public static long computeCellIndex(long blockIndex, int blockSize, int cellInBlock) { return (blockIndex - 1) * blockSize + 1 + cellInBlock; } /** * Computes the actual block size based on matrix dimension, block index, and block size * meta data. For boundary blocks, the actual block size is less or equal than the block * size meta data; otherwise they are identical. * * @param len matrix dimension * @param blockIndex block index * @param blockSize block size metadata * @return actual block size */ public static int computeBlockSize(long len, long blockIndex, long blockSize) { long remain = len - (blockIndex - 1) * blockSize; return (int) Math.min(blockSize, remain); } public static boolean isInBlockRange(MatrixIndexes ix, int brlen, int bclen, long rl, long ru, long cl, long cu) { long bRLowerIndex = (ix.getRowIndex() - 1) * brlen + 1; long bRUpperIndex = ix.getRowIndex() * brlen; long bCLowerIndex = (ix.getColumnIndex() - 1) * bclen + 1; long bCUpperIndex = ix.getColumnIndex() * bclen; if (rl > bRUpperIndex || ru < bRLowerIndex) { return false; } else if (cl > bCUpperIndex || cu < bCLowerIndex) { return false; } else { return true; } } public static boolean isInFrameBlockRange(Long ix, int brlen, long rl, long ru) { if (rl > ix + brlen - 1 || ru < ix) return false; else return true; } public static boolean isInBlockRange(MatrixIndexes ix, int brlen, int bclen, IndexRange ixrange) { return isInBlockRange(ix, brlen, bclen, ixrange.rowStart, ixrange.rowEnd, ixrange.colStart, ixrange.colEnd); } public static boolean isInFrameBlockRange(Long ix, int brlen, int bclen, IndexRange ixrange) { return isInFrameBlockRange(ix, brlen, ixrange.rowStart, ixrange.rowEnd); } // Reused by both MR and Spark for performing zero out public static IndexRange getSelectedRangeForZeroOut(IndexedMatrixValue in, int blockRowFactor, int blockColFactor, IndexRange indexRange) { IndexRange tempRange = new IndexRange(-1, -1, -1, -1); long topBlockRowIndex = UtilFunctions.computeBlockIndex(indexRange.rowStart, blockRowFactor); int topRowInTopBlock = UtilFunctions.computeCellInBlock(indexRange.rowStart, blockRowFactor); long bottomBlockRowIndex = UtilFunctions.computeBlockIndex(indexRange.rowEnd, blockRowFactor); int bottomRowInBottomBlock = UtilFunctions.computeCellInBlock(indexRange.rowEnd, blockRowFactor); long leftBlockColIndex = UtilFunctions.computeBlockIndex(indexRange.colStart, blockColFactor); int leftColInLeftBlock = UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor); long rightBlockColIndex = UtilFunctions.computeBlockIndex(indexRange.colEnd, blockColFactor); int rightColInRightBlock = UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor); //no overlap if (in.getIndexes().getRowIndex() < topBlockRowIndex || in.getIndexes().getRowIndex() > bottomBlockRowIndex || in.getIndexes().getColumnIndex() < leftBlockColIndex || in.getIndexes().getColumnIndex() > rightBlockColIndex) { tempRange.set(-1, -1, -1, -1); return tempRange; } //get the index range inside the block tempRange.set(0, in.getValue().getNumRows() - 1, 0, in.getValue().getNumColumns() - 1); if (topBlockRowIndex == in.getIndexes().getRowIndex()) tempRange.rowStart = topRowInTopBlock; if (bottomBlockRowIndex == in.getIndexes().getRowIndex()) tempRange.rowEnd = bottomRowInBottomBlock; if (leftBlockColIndex == in.getIndexes().getColumnIndex()) tempRange.colStart = leftColInLeftBlock; if (rightBlockColIndex == in.getIndexes().getColumnIndex()) tempRange.colEnd = rightColInRightBlock; return tempRange; } // Reused by both MR and Spark for performing zero out public static IndexRange getSelectedRangeForZeroOut(Pair<Long, FrameBlock> in, int blockRowFactor, int blockColFactor, IndexRange indexRange, long lSrcRowIndex, long lDestRowIndex) { int iRowStart, iRowEnd, iColStart, iColEnd; if (indexRange.rowStart <= lDestRowIndex) iRowStart = 0; else iRowStart = (int) (indexRange.rowStart - in.getKey()); iRowEnd = (int) Math.min(indexRange.rowEnd - lSrcRowIndex, blockRowFactor) - 1; iColStart = UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor); iColEnd = UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor); return new IndexRange(iRowStart, iRowEnd, iColStart, iColEnd); } public static long getTotalLength(NumItemsByEachReducerMetaData metadata) { long[] counts = metadata.getNumItemsArray(); long total = 0; for (long count : counts) total += count; return total; } public static long getLengthForInterQuantile(NumItemsByEachReducerMetaData metadata, double p) { long total = UtilFunctions.getTotalLength(metadata); long lpos = (long) Math.ceil(total * p);//lower bound is inclusive long upos = (long) Math.ceil(total * (1 - p));//upper bound is inclusive return upos - lpos + 1; } /** * JDK8 floating decimal double parsing, which is generally faster * than <JDK8 parseDouble and works well in multi-threaded tasks. * * @param str string to parse to double * @return double value */ public static double parseToDouble(String str) { //return FloatingDecimal.parseDouble(str); return Double.parseDouble(str); } public static int parseToInt(String str) { int ret = -1; if (str.contains(".")) ret = toInt(Double.parseDouble(str)); else ret = Integer.parseInt(str); return ret; } public static long parseToLong(String str) { long ret = -1; if (str.contains(".")) ret = toLong(Double.parseDouble(str)); else ret = Long.parseLong(str); return ret; } public static int toInt(double val) { return (int) Math.floor(val + DOUBLE_EPS); } public static long toLong(double val) { return (long) Math.floor(val + DOUBLE_EPS); } public static int toInt(Object obj) { if (obj instanceof Long) return ((Long) obj).intValue(); else return ((Integer) obj).intValue(); } public static int roundToNext(int val, int factor) { //round up to next non-zero multiple of factor int pval = Math.max(val, factor); return ((pval + factor - 1) / factor) * factor; } public static Object doubleToObject(ValueType vt, double in) { return doubleToObject(vt, in, true); } public static Object doubleToObject(ValueType vt, double in, boolean sparse) { if (in == 0 && sparse) return null; switch (vt) { case STRING: return String.valueOf(in); case BOOLEAN: return (in != 0); case INT: return UtilFunctions.toLong(in); case DOUBLE: return in; default: throw new RuntimeException("Unsupported value type: " + vt); } } public static Object stringToObject(ValueType vt, String in) { if (in == null) return null; switch (vt) { case STRING: return in; case BOOLEAN: return Boolean.parseBoolean(in); case INT: return Long.parseLong(in); case DOUBLE: return Double.parseDouble(in); default: throw new RuntimeException("Unsupported value type: " + vt); } } public static double objectToDouble(ValueType vt, Object in) { if (in == null) return 0; switch (vt) { case STRING: return !((String) in).isEmpty() ? Double.parseDouble((String) in) : 0; case BOOLEAN: return ((Boolean) in) ? 1d : 0d; case INT: return (Long) in; case DOUBLE: return (Double) in; default: throw new RuntimeException("Unsupported value type: " + vt); } } public static String objectToString(Object in) { return (in != null) ? in.toString() : null; } /** * Convert object to string * * @param in object * @param ignoreNull If this flag has set, it will ignore null. This flag is mainly used in merge functionality to override data with "null" data. * @return string representation of object */ public static String objectToString(Object in, boolean ignoreNull) { String strReturn = objectToString(in); if (strReturn == null) return strReturn; else if (ignoreNull) { if (in instanceof Double && ((Double) in).doubleValue() == 0.0) return null; else if (in instanceof Long && ((Long) in).longValue() == 0) return null; else if (in instanceof Boolean && ((Boolean) in).booleanValue() == false) return null; else if (in instanceof String && ((String) in).trim().length() == 0) return null; else return strReturn; } else return strReturn; } public static Object objectToObject(ValueType vt, Object in) { if (in instanceof Double && vt == ValueType.DOUBLE || in instanceof Long && vt == ValueType.INT || in instanceof Boolean && vt == ValueType.BOOLEAN || in instanceof String && vt == ValueType.STRING) return in; //quick path to avoid double parsing else return stringToObject(vt, objectToString(in)); } public static Object objectToObject(ValueType vt, Object in, boolean ignoreNull) { String str = objectToString(in, ignoreNull); if (str == null || vt == ValueType.STRING) return str; else return stringToObject(vt, str); } public static int compareTo(ValueType vt, Object in1, Object in2) { if (in1 == null && in2 == null) return 0; else if (in1 == null) return -1; else if (in2 == null) return 1; switch (vt) { case STRING: return ((String) in1).compareTo((String) in2); case BOOLEAN: return ((Boolean) in1).compareTo((Boolean) in2); case INT: return ((Long) in1).compareTo((Long) in2); case DOUBLE: return ((Double) in1).compareTo((Double) in2); default: throw new RuntimeException("Unsupported value type: " + vt); } } /** * Compares two version strings of format x.y.z, where x is major, * y is minor, and z is maintenance release. * * @param version1 first version string * @param version2 second version string * @return 1 if version1 greater, -1 if version2 greater, 0 if equal */ public static int compareVersion(String version1, String version2) { String[] partsv1 = version1.split("\\."); String[] partsv2 = version2.split("\\."); int len = Math.min(partsv1.length, partsv2.length); for (int i = 0; i < partsv1.length && i < len; i++) { Integer iv1 = Integer.parseInt(partsv1[i]); Integer iv2 = Integer.parseInt(partsv2[i]); if (iv1.compareTo(iv2) != 0) return iv1.compareTo(iv2); } return 0; //equal } public static boolean isIntegerNumber(String str) { byte[] c = str.getBytes(); for (int i = 0; i < c.length; i++) if (c[i] < 48 || c[i] > 57) return false; return true; } public static byte max(byte[] array) { byte ret = Byte.MIN_VALUE; for (int i = 0; i < array.length; i++) ret = (array[i] > ret) ? array[i] : ret; return ret; } public static String unquote(String s) { if (s != null && s.length() >= 2 && ((s.startsWith("\"") && s.endsWith("\"")) || (s.startsWith("'") && s.endsWith("'")))) { s = s.substring(1, s.length() - 1); } return s; } public static String quote(String s) { return "\"" + s + "\""; } /** * Parses a memory size with optional g/m/k quantifiers into its * number representation. * * @param arg memory size as readable string * @return byte count of memory size */ public static long parseMemorySize(String arg) { if (arg.endsWith("g") || arg.endsWith("G")) return Long.parseLong(arg.substring(0, arg.length() - 1)) * 1024 * 1024 * 1024; else if (arg.endsWith("m") || arg.endsWith("M")) return Long.parseLong(arg.substring(0, arg.length() - 1)) * 1024 * 1024; else if (arg.endsWith("k") || arg.endsWith("K")) return Long.parseLong(arg.substring(0, arg.length() - 1)) * 1024; else return Long.parseLong(arg.substring(0, arg.length())); } /** * Format a memory size with g/m/k quantifiers into its * number representation. * * @param arg byte count of memory size * @return memory size as readable string */ public static String formatMemorySize(long arg) { if (arg >= 1024 * 1024 * 1024) return String.format("%d GB", arg / (1024 * 1024 * 1024)); else if (arg >= 1024 * 1024) return String.format("%d MB", arg / (1024 * 1024)); else if (arg >= 1024) return String.format("%d KB", arg / (1024)); else return String.format("%d", arg); } /** * Obtain sequence list * * @param low lower bound (inclusive) * @param up upper bound (inclusive) * @param incr increment * @return list of integers */ public static List<Integer> getSequenceList(int low, int up, int incr) { ArrayList<Integer> ret = new ArrayList<Integer>(); for (int i = low; i <= up; i += incr) ret.add(i); return ret; } public static double getDouble(Object obj) { return (obj instanceof Double) ? (Double) obj : Double.parseDouble(obj.toString()); } public static boolean isNonZero(Object obj) { if (obj instanceof Double) return ((Double) obj) != 0; else { //avoid expensive double parsing String sobj = obj.toString(); return (!sobj.equals("0") && !sobj.equals("0.0")); } } public static ValueType[] nCopies(int n, ValueType vt) { ValueType[] ret = new ValueType[n]; Arrays.fill(ret, vt); return ret; } public static int frequency(ValueType[] schema, ValueType vt) { int count = 0; for (ValueType tmp : schema) count += tmp.equals(vt) ? 1 : 0; return count; } public static ValueType[] copyOf(ValueType[] schema1, ValueType[] schema2) { return (ValueType[]) ArrayUtils.addAll(schema1, schema2); } }