Java tutorial
/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.util; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.ObjectStreamClass; import java.io.PrintStream; import java.io.PrintWriter; import java.io.StringWriter; import java.io.Writer; import java.lang.reflect.Constructor; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import cascading.flow.FlowElement; import cascading.flow.FlowException; import cascading.flow.Scope; import cascading.operation.BaseOperation; import cascading.operation.Operation; import cascading.pipe.Pipe; import cascading.scheme.Scheme; import cascading.tap.Tap; import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.mapred.JobConf; import org.apache.log4j.Logger; import org.jgrapht.ext.DOTExporter; import org.jgrapht.ext.EdgeNameProvider; import org.jgrapht.ext.IntegerNameProvider; import org.jgrapht.ext.MatrixExporter; import org.jgrapht.ext.VertexNameProvider; import org.jgrapht.graph.SimpleDirectedGraph; /** Class Util provides reusable operations. */ public class Util { /** Field LOG */ private static final Logger LOG = Logger.getLogger(Util.class); /** * This method serializes the given Object instance and retunrs a String Base64 representation. * * @param object to be serialized * @return String */ public static String serializeBase64(Object object) throws IOException { return serializeBase64(object, true); } public static String serializeBase64(Object object, boolean compress) throws IOException { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream(compress ? new GZIPOutputStream(bytes) : bytes); try { out.writeObject(object); } finally { out.close(); } return new String(Base64.encodeBase64(bytes.toByteArray())); } /** * This method deserializes the Base64 encoded String into an Object instance. * * @param string * @return an Object */ public static Object deserializeBase64(String string) throws IOException { return deserializeBase64(string, true); } public static Object deserializeBase64(String string, boolean decompress) throws IOException { if (string == null || string.length() == 0) return null; ObjectInputStream in = null; try { ByteArrayInputStream bytes = new ByteArrayInputStream(Base64.decodeBase64(string.getBytes())); in = new ObjectInputStream(decompress ? new GZIPInputStream(bytes) : bytes) { @Override protected Class<?> resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException { try { return Class.forName(desc.getName(), false, Thread.currentThread().getContextClassLoader()); } catch (ClassNotFoundException exception) { return super.resolveClass(desc); } } }; return in.readObject(); } catch (ClassNotFoundException exception) { throw new FlowException("unable to deserialize data", exception); } finally { if (in != null) in.close(); } } /** * This method creates a globally unique HEX value seeded by the given string. * * @param seed * @return a String */ public static String createUniqueID(String seed) { String base = String.format("%s%d%.10f", seed, System.currentTimeMillis(), Math.random()); return DigestUtils.md5Hex(base); } /** * This method joins the values in the given list with the delim String value. * * @param list * @param delim * @return String */ public static String join(int[] list, String delim) { return join(list, delim, false); } public static String join(int[] list, String delim, boolean printNull) { StringBuffer buffer = new StringBuffer(); int count = 0; for (Object s : list) { if (count != 0) buffer.append(delim); if (printNull || s != null) buffer.append(s); count++; } return buffer.toString(); } public static String join(String delim, String... strings) { return join(delim, false, strings); } public static String join(String delim, boolean printNull, String... strings) { return join(strings, delim, printNull); } /** * This method joins the values in the given list with the delim String value. * * @param list * @param delim * @return a String */ public static String join(Object[] list, String delim) { return join(list, delim, false); } public static String join(Object[] list, String delim, boolean printNull) { StringBuffer buffer = new StringBuffer(); int count = 0; for (Object s : list) { if (count != 0) buffer.append(delim); if (printNull || s != null) buffer.append(s); count++; } return buffer.toString(); } /** * This method joins each value in the collection with a tab character as the delimiter. * * @param collection * @return a String */ public static String join(Collection collection) { return join(collection, "\t"); } /** * This method joins each valuein the collection with the given delimiter. * * @param collection * @param delim * @return a String */ public static String join(Collection collection, String delim) { return join(collection, delim, false); } public static String join(Collection collection, String delim, boolean printNull) { StringBuffer buffer = new StringBuffer(); join(buffer, collection, delim, printNull); return buffer.toString(); } /** * This method joins each value in the collection with the given delimiter. All results are appended to the * given {@link StringBuffer} instance. * * @param buffer * @param collection * @param delim */ public static void join(StringBuffer buffer, Collection collection, String delim) { join(buffer, collection, delim, false); } public static void join(StringBuffer buffer, Collection collection, String delim, boolean printNull) { int count = 0; for (Object s : collection) { if (count != 0) buffer.append(delim); if (printNull || s != null) buffer.append(s); count++; } } public static String[] removeNulls(String... strings) { List<String> list = new ArrayList<String>(); for (String string : strings) { if (string != null) list.add(string); } return list.toArray(new String[list.size()]); } public static Collection<String> quote(Collection<String> collection, String quote) { List<String> list = new ArrayList<String>(); for (String string : collection) list.add(quote + string + quote); return list; } public static String print(Collection collection, String delim) { StringBuffer buffer = new StringBuffer(); print(buffer, collection, delim); return buffer.toString(); } public static void print(StringBuffer buffer, Collection collection, String delim) { int count = 0; for (Object s : collection) { if (count != 0) buffer.append(delim); buffer.append("["); buffer.append(s); buffer.append("]"); count++; } } /** * This method attempts to remove any username and password from the given url String. * * @param url * @return a String */ public static String sanitizeUrl(String url) { if (url == null) return null; return url.replaceAll("(?<=//).*:.*@", "") + "\"]"; } /** * This methdo attempts to remove duplicate consecutive forward slashes from the given url. * * @param url * @return a String */ public static String normalizeUrl(String url) { if (url == null) return null; return url.replaceAll("([^:]/)/{2,}", "$1/"); } /** * This method returns the {@link Object#toString()} of the given object, or an empty String if the object * is null. * * @param object * @return a String */ public static String toNull(Object object) { if (object == null) return ""; return object.toString(); } /** * This method truncates the given String value to the given size, but appends an ellipse ("...") if the * String is larger than maxSize. * * @param string * @param maxSize * @return a String */ public static String truncate(String string, int maxSize) { string = toNull(string); if (string.length() <= maxSize) return string; return String.format("%s...", string.subSequence(0, maxSize - 3)); } public static <A> A getProperty(Map<Object, Object> properties, String key, A defaultValue) { if (properties == null) return defaultValue; A value = (A) properties.get(key); return value == null ? defaultValue : value; } public static String printGraph(SimpleDirectedGraph graph) { StringWriter writer = new StringWriter(); printGraph(writer, graph); return writer.toString(); } public static void printGraph(PrintStream out, SimpleDirectedGraph graph) { PrintWriter printWriter = new PrintWriter(out); printGraph(printWriter, graph); } public static void printGraph(String filename, SimpleDirectedGraph graph) { try { Writer writer = new FileWriter(filename); printGraph(writer, graph); writer.close(); } catch (IOException exception) { exception.printStackTrace(); } } @SuppressWarnings({ "unchecked" }) private static void printGraph(Writer writer, SimpleDirectedGraph graph) { DOTExporter dot = new DOTExporter(new IntegerNameProvider(), new VertexNameProvider() { public String getVertexName(Object object) { return object.toString().replaceAll("\"", "\'"); } }, new EdgeNameProvider<Object>() { public String getEdgeName(Object object) { return object.toString().replaceAll("\"", "\'"); } }); dot.export(writer, graph); } public static void printMatrix(PrintStream out, SimpleDirectedGraph<FlowElement, Scope> graph) { new MatrixExporter().exportAdjacencyMatrix(new PrintWriter(out), graph); } /** * This method removes all nulls from the given List. * * @param list */ @SuppressWarnings({ "StatementWithEmptyBody" }) public static void removeAllNulls(List list) { while (list.remove(null)) ; } public static String formatTrace(Scheme scheme, String message) { if (scheme == null) return message; String trace = scheme.getTrace(); if (trace == null) return message; return "[" + truncate(scheme.toString(), 25) + "][" + trace + "] " + message; } /** * Method formatRawTrace does not include the pipe name * * @param pipe of type Pipe * @param message of type String * @return String */ public static String formatRawTrace(Pipe pipe, String message) { if (pipe == null) return message; String trace = pipe.getTrace(); if (trace == null) return message; return "[" + trace + "] " + message; } public static String formatTrace(Pipe pipe, String message) { if (pipe == null) return message; String trace = pipe.getTrace(); if (trace == null) return message; return "[" + truncate(pipe.getName(), 25) + "][" + trace + "] " + message; } public static String formatTrace(Tap tap, String message) { if (tap == null) return message; String trace = tap.getTrace(); if (trace == null) return message; return "[" + truncate(tap.toString(), 25) + "][" + trace + "] " + message; } public static String formatTrace(Operation operation, String message) { if (!(operation instanceof BaseOperation)) return message; String trace = ((BaseOperation) operation).getTrace(); if (trace == null) return message; return "[" + trace + "] " + message; } public static String captureDebugTrace(Class type) { StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); for (int i = 3; i < stackTrace.length; i++) { StackTraceElement stackTraceElement = stackTrace[i]; Package aPackage = type.getPackage(); if (aPackage != null && stackTraceElement.getClassName().startsWith(aPackage.getName())) continue; return stackTraceElement.toString(); } return null; } public static Class findMainClass(Class defaultType) { StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); for (StackTraceElement stackTraceElement : stackTrace) { if (stackTraceElement.getMethodName().equals("main") && !stackTraceElement.getClassName().startsWith("org.apache.hadoop")) { try { LOG.info("resolving application jar from found main method on: " + stackTraceElement.getClassName()); return Thread.currentThread().getContextClassLoader() .loadClass(stackTraceElement.getClassName()); } catch (ClassNotFoundException exception) { LOG.warn("unable to load class while discovering application jar: " + stackTraceElement.getClassName(), exception); } } } LOG.info("using default application jar, may cause class not found exceptions on the cluster"); return defaultType; } public static void writeDOT(Writer writer, SimpleDirectedGraph graph, IntegerNameProvider vertexIdProvider, VertexNameProvider vertexNameProvider, EdgeNameProvider edgeNameProvider) { new DOTExporter(vertexIdProvider, vertexNameProvider, edgeNameProvider).export(writer, graph); } public interface RetryOperator<T> { T operate() throws Exception; boolean rethrow(Exception exception); } public static <T> T retry(Logger logger, int retries, int secondsDelay, String message, RetryOperator<T> operator) throws Exception { Exception saved = null; for (int i = 0; i < retries; i++) { try { return operator.operate(); } catch (Exception exception) { if (operator.rethrow(exception)) { logger.warn(message + ", but not retrying", exception); throw exception; } saved = exception; logger.warn(message + ", attempt: " + (i + 1), exception); try { Thread.sleep(secondsDelay * 1000); } catch (InterruptedException exception1) { // do nothing } } } logger.warn(message + ", done retrying after attempts: " + retries, saved); throw saved; } public static Object createProtectedObject(Class type, Object[] parameters, Class[] parameterTypes) { try { Constructor constructor = type.getDeclaredConstructor(parameterTypes); constructor.setAccessible(true); return constructor.newInstance(parameters); } catch (Exception exception) { exception.printStackTrace(); throw new FlowException("unable to instantiate type: " + type.getName(), exception); } } public static Thread getHDFSShutdownHook() { Exception caughtException = null; try { // we must init the FS so the finalizer is registered FileSystem.getLocal(new JobConf()); Field field = FileSystem.class.getDeclaredField("clientFinalizer"); field.setAccessible(true); Thread finalizer = (Thread) field.get(null); if (finalizer != null) Runtime.getRuntime().removeShutdownHook(finalizer); return finalizer; } catch (NoSuchFieldException exception) { caughtException = exception; } catch (IllegalAccessException exception) { caughtException = exception; } catch (IOException exception) { caughtException = exception; } LOG.info("unable to find and remove client hdfs shutdown hook, received exception: " + caughtException.getClass().getName()); return null; } public static Object invokeStaticMethod(Class type, String methodName, Object[] parameters, Class[] parameterTypes) { try { Method method = type.getDeclaredMethod(methodName, parameterTypes); method.setAccessible(true); return method.invoke(null, parameters); } catch (Exception exception) { throw new FlowException("unable to invoke static method: " + type.getName() + "." + methodName, exception); } } }