dk.netarkivet.common.utils.ProcessUtils.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.common.utils.ProcessUtils.java

Source

/* File:        $Id$
 * Revision:    $Revision$
 * Author:      $Author$
 * Date:        $Date$
 *
 * The Netarchive Suite - Software to harvest and preserve websites
 * Copyright 2004-2012 The Royal Danish Library, the Danish State and
 * University Library, the National Library of France and the Austrian
 * National Library.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

package dk.netarkivet.common.utils;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import dk.netarkivet.common.Constants;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;

/**
 * Various utilities for running processes -- not exactly Java's forte.
 *
 */

public class ProcessUtils {
    /** The logger. */
    private static Log log = LogFactory.getLog(ProcessUtils.class);

    /** Runs an external process that takes no input, discarding its output.
     *
     * @param environment An environment to run the process in (may be null)
     * @param programAndArgs The program and its arguments.
     * @return The return code of the process.
     */
    public static int runProcess(String[] environment, String... programAndArgs) {
        try {
            log.debug("Running external program: " + StringUtils.conjoin(" ", programAndArgs) + " with environment "
                    + StringUtils.conjoin(" ", environment));

            Process p = Runtime.getRuntime().exec(programAndArgs, environment);
            discardProcessOutput(p.getInputStream());
            discardProcessOutput(p.getErrorStream());
            while (true) {
                try {
                    return p.waitFor();
                } catch (InterruptedException e) {
                    // Ignoring interruptions, we just want to try waiting
                    // again.
                }
            }
        } catch (IOException e) {
            throw new IOFailure("Failure while running " + Arrays.toString(programAndArgs), e);
        }
    }

    /** Runs an external process that takes no input, discarding its output.
     * This is a convenience wrapper for runProcess(environment, programAndArgs)
     * @param programAndArgs The program to run and its arguments
     * @return The return code of the process.
     */
    public static int runProcess(String... programAndArgs) {
        return runProcess(null, programAndArgs);
    }

    /**
     * Read the output from a process. Due to oddities in the Process
     * handling, this has to be done char by char. This method just implements
     * a consumer thread to eat the output of a process and so prevent
     * blocking.
     *
     * @param inputStream
     *            A stream to read up to end of file. This stream is closed at
     *            some point in the future, but not necessarily before this
     *            method returns.
     */
    public static void discardProcessOutput(final InputStream inputStream) {
        makeCollectorThread(inputStream, new DiscardingOutputStream(), -1).start();
    }

    /** Collect all output from an inputstream, up to maxCollect bytes,
     * in an output object. This will eventually close the given InputStream,
     * but not necessarily before the method returns.  The thread created
     * is placed in a thread set, and should be removed once all output
     * has been collected.  While only a limited amount may be written to
     * the output object, the entire output will be read from the inputStream
     * unless the thread or the inputStream is destroyed first.
     *
     * @param inputStream The inputstream to read contents from
     * @param maxCollect The maximum number of bytes to collect, or -1 for no
     *  limit
     * @param collectionThreads Set of threads that concurrently collect output
     * @return An object that collects the output.  Once the thread returned
     * is finished, the object will no longer be written to.  The collected
     * output can be retrieved with the toString method.
     */
    public static Object collectProcessOutput(final InputStream inputStream, final int maxCollect,
            Set<Thread> collectionThreads) {
        final OutputStream stream = new ByteArrayOutputStream();
        Thread t = makeCollectorThread(inputStream, stream, maxCollect);
        t.start();
        collectionThreads.add(t);
        return stream;
    }

    /** Collect all output from an inputstream, appending it to a file.
     * This will eventually close the given InputStream,
     * but not necessarily before the method returns.  The thread created
     * is placed in a thread set, and should be removed once all output
     * has been collected.
     *
     * @param inputStream The inputstream to read contents from
     * @param outputFile The file that output should be appended to.
     * @param collectionThreads Set of threads that concurrently collect output
     */
    public static void writeProcessOutput(final InputStream inputStream, final File outputFile,
            Set<Thread> collectionThreads) {
        final OutputStream stream;
        try {
            stream = new FileOutputStream(outputFile, true);
        } catch (FileNotFoundException e) {
            throw new IOFailure("Cannot create file '" + outputFile + " to write process output to.", e);
        }
        Thread t = makeCollectorThread(inputStream, stream, -1);
        t.start();
        collectionThreads.add(t);
    }

    /** Collect all output from an inputstream, writing it to an output stream,
     * using a separate thread. This will eventually close the given InputStream
     * and OutputStream, but not necessarily before the method returns. While
     * only a limited amount may be written to the output object, the entire
     * output will be read fron the inputStream unless the thread or the
     * inputStream is destroyed first.
     *
     * @param inputStream The inputstream to read contents from
     * @param outputStream An stream to write the output to.
     * @param maxCollect The maximum number of bytes to collect, or -1 for no
     *  limit
     * @return The thread that will collect the output.
     */
    private static Thread makeCollectorThread(final InputStream inputStream, final OutputStream outputStream,
            final int maxCollect) {
        return new Thread() {
            public void run() {
                try {
                    InputStream reader = null;
                    OutputStream writer = null;
                    try {
                        reader = new BufferedInputStream(inputStream);
                        writer = new BufferedOutputStream(outputStream);
                        copyContents(reader, writer, maxCollect);
                    } finally {
                        if (reader != null) {
                            reader.close();
                        }
                        if (writer != null) {
                            writer.close();
                        }
                    }
                } catch (IOException e) {
                    // This seems ugly
                    throw new RuntimeException("Couldn't close streams for " + "process.", e);
                }
            }
        };
    }

    /** Reads all contents from a stream, writing some or all to another.
     *
     * @param in InputStream to read from
     * @param out OutputStream to write to
     * @param maxCollect Maximum number of bytes to write to out
     * @throws IOFailure If there are problems reading or writing.
     */
    private static void copyContents(InputStream in, OutputStream out, int maxCollect) {
        int bytesRead;
        byte[] buffer = new byte[Constants.IO_BUFFER_SIZE];
        int totalBytesRead = 0;
        try {
            while ((bytesRead = in.read(buffer, 0, Constants.IO_BUFFER_SIZE)) != -1) {
                if (maxCollect == -1) {
                    out.write(buffer, 0, bytesRead);
                } else if (totalBytesRead < maxCollect) {
                    out.write(buffer, 0, Math.min(bytesRead, maxCollect - totalBytesRead));
                }
                // Close early if applicable
                if (maxCollect != -1 && totalBytesRead < maxCollect && totalBytesRead + bytesRead > maxCollect) {
                    out.close();
                }
                totalBytesRead += bytesRead;
            }
        } catch (IOException e) {
            throw new IOFailure("Error reading or writing process output", e);
        }
    }

    /** Wait for the end of a process, but only for a limited time.  This
     * method takes care of the ways waitFor can get interrupted.
     *
     * @param p Process to wait for
     * @param maxWait The maximum number of milliseconds to wait for the
     * process to exit.
     * @return Exit value for process, or null if the process didn't exit
     * within the expected time.
     */
    public static Integer waitFor(final Process p, long maxWait) {
        ArgumentNotValid.checkNotNull(p, "Process p");
        ArgumentNotValid.checkPositive(maxWait, "long maxWait");
        long startTime = System.currentTimeMillis();
        Timer timer = new Timer(true);
        final Thread waitThread = Thread.currentThread();
        boolean wakeupScheduled = false;
        final AtomicBoolean doneWaiting = new AtomicBoolean(false);
        while (System.currentTimeMillis() < startTime + maxWait) {
            try {
                if (!wakeupScheduled) {
                    // First time in here, we need to start the wakup thread,
                    // but be sure it doesn't notify us too early or too late.
                    synchronized (waitThread) {
                        timer.schedule(new TimerTask() {
                            public void run() {
                                synchronized (waitThread) {
                                    if (!doneWaiting.get()) {
                                        waitThread.interrupt();
                                    }
                                }
                            }
                        }, maxWait);
                        wakeupScheduled = true;
                    }
                }

                p.waitFor();
                break;
            } catch (InterruptedException e) {
                // May happen for a number of reasons.  We just check if we've
                // timed out yet when we go through the loop again.
            }
        }
        synchronized (waitThread) {
            timer.cancel();
            doneWaiting.set(true);
            Thread.interrupted(); // In case the timer task interrupted.
        }
        try {
            return p.exitValue();
        } catch (IllegalThreadStateException e) {
            log.warn("Process '" + p + "' did not exit within " + (System.currentTimeMillis() - startTime)
                    + " milliseconds");
            return null;
        }
    }

    /**
     * Runs a system process (Unix sort) to sort a file.
     * @param inputFile the input file.
     * @param outputFile the output file.
     * @return the process exit code.
     */
    public static int runUnixSort(File inputFile, File outputFile) {
        return runUnixSort(inputFile, outputFile, null, false);
    }

    /**
     * Runs a system process (Unix sort) to sort a file.
     * @param inputFile the input file.
     * @param outputFile the output file.
     * @param tempDir the directory where to store temporary files (null for default system temp).
     * @param crawllogSorting Should we sort crawllog style ("-k 4b") or not
     * @return the process exit code.
     */
    public static int runUnixSort(File inputFile, File outputFile, File tempDir, boolean crawllogSorting) {
        String[] environment = new String[] { "LANG=C" };
        LinkedList<String> cmdAndParams = new LinkedList<String>();
        cmdAndParams.add("sort");
        cmdAndParams.add(inputFile.getAbsolutePath());
        if (crawllogSorting) {
            // -k 4b means fourth field (from 1) ignoring leading blanks
            cmdAndParams.add("-k");
            cmdAndParams.add("4b");
        }
        // -o means output to (file)
        cmdAndParams.add("-o");
        cmdAndParams.add(outputFile.getAbsolutePath());

        if (tempDir != null) {
            // -T configures where to store temporary files
            cmdAndParams.add("-T");
            cmdAndParams.add(tempDir.getAbsolutePath());
        }

        return ProcessUtils.runProcess(environment,
                (String[]) cmdAndParams.toArray(new String[cmdAndParams.size()]));
    }

}