com.krawler.common.util.ByteUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.krawler.common.util.ByteUtil.java

Source

/*
 * Copyright (C) 2012  Krawler Information Systems Pvt Ltd
 * All rights reserved.
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/
package com.krawler.common.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PipedInputStream;
import java.io.Reader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;

public class ByteUtil {

    /**
     * write the data to the specified path.
     * 
     * @param path
     * @param data
     * @throws IOException
     */
    public static void putContent(String path, byte[] data) throws IOException {
        FileOutputStream fos = null;
        try {
            fos = new FileOutputStream(new File(path));
            fos.write(data);
        } finally {
            if (fos != null)
                fos.close();
        }
    }

    /**
     * read all the content in the specified file and return as byte array.
     * 
     * @param file
     *            file to read
     * @return content of the file
     * @throws IOException
     */
    public static byte[] getContent(File file) throws IOException {
        byte[] buffer = new byte[(int) file.length()];

        InputStream is = null;
        try {
            is = new FileInputStream(file);
            int total_read = 0, num_read;

            int num_left = buffer.length;

            while (num_left > 0 && (num_read = is.read(buffer, total_read, num_left)) != -1) {
                total_read += num_read;
                num_left -= num_read;
            }
        } finally {
            closeStream(is);
        }
        return buffer;
    }

    /**
     * Reads all data from the <code>InputStream</code> into a <tt>byte[]</tt>
     * array. Closes the stream, regardless of whether an error occurs.
     * 
     * @param is
     *            The stream to read from.
     * @param sizeHint
     *            A (no-binding) hint as to the size of the resulting
     *            <tt>byte[]</tt> array.
     */
    public static byte[] getContent(InputStream is, int sizeHint) throws IOException {
        return getContent(is, sizeHint, -1);
    }

    /**
     * Reads all data from the <code>InputStream</code> into a <tt>byte[]</tt>
     * array. Closes the stream, regardless of whether an error occurs. If a
     * positive <code>sizeLimit</code> is specified and the stream is larger
     * than that limit, an <code>IOException</code> is thrown.
     * 
     * @param is
     *            The stream to read from.
     * @param sizeHint
     *            A (non-binding) hint as to the size of the resulting
     *            <tt>byte[]</tt> array.
     * @param sizeLimit
     *            The maximum number of bytes that can be copied from the stream
     *            before an <code>IOException</code> is thrown.
     */
    public static byte[] getContent(InputStream is, int sizeHint, long sizeLimit) throws IOException {
        return getContent(is, -1, sizeHint, sizeLimit);
    }

    /**
     * Reads a certain quantity of data from the <code>InputStream</code> into
     * a <tt>byte[]</tt> array. Closes the stream, regardless of whether an
     * error occurs. If a nonnegative <code>length</code> is specified, the
     * amount of data read into the array is capped by that value; otherwise,
     * the method behaves exactly as {@link #getContent(InputStream, int)}.
     * 
     * @param is
     *            The stream to read from.
     * @param length
     *            The maximum number of bytes that will be copied from the
     *            stream.
     * @param sizeHint
     *            A (non-binding) hint as to the size of the resulting
     *            <tt>byte[]</tt> array.
     */
    public static byte[] getPartialContent(InputStream is, int length, int sizeHint) throws IOException {
        return getContent(is, length, sizeHint, -1);
    }

    private static byte[] getContent(InputStream is, int length, int sizeHint, long sizeLimit) throws IOException {
        if (length == 0)
            return new byte[0];

        ByteArrayOutputStream baos = null;
        try {
            if (length > 0 && (sizeHint > length || sizeHint < 0))
                sizeHint = length;
            baos = new ByteArrayOutputStream(Math.max(sizeHint, 0));

            byte[] buffer = new byte[8192];
            int num, limit = length > 0 ? Math.min(buffer.length, length - baos.size()) : buffer.length;
            while ((num = is.read(buffer, 0, limit)) != -1) {
                baos.write(buffer, 0, num);

                if (sizeLimit > 0 && baos.size() > sizeLimit)
                    throw new IOException("stream too large");
                if (length > 0 && baos.size() >= length)
                    break;

                limit = length > 0 ? Math.min(buffer.length, length - baos.size()) : buffer.length;
            }
            return baos.toByteArray();
        } finally {
            closeStream(is);
        }
    }

    /**
     * Reads a <tt>String</tt> from the given <tt>Reader</tt>. Reads until
     * the either end of the stream is hit or until <tt>length</tt> characters
     * are read.
     * 
     * @return the content or an empty <tt>String</tt> if no content is
     *         available
     */
    public static String getContent(Reader reader, int length, boolean close) throws IOException {
        if (reader == null || length == 0) {
            return "";
        }
        if (length < 0) {
            length = Integer.MAX_VALUE;
        }
        char[] buf = new char[Math.min(1024, length)];
        int totalRead = 0;
        StringBuilder retVal = new StringBuilder(buf.length);

        try {
            while (true) {
                int numToRead = Math.min(buf.length, length - totalRead);
                if (numToRead <= 0) {
                    break;
                }
                int numRead = reader.read(buf);
                if (numRead < 0) {
                    break;
                }
                retVal.append(buf, 0, numRead);
                totalRead += numRead;
            }
            return retVal.toString();
        } finally {
            if (close) {
                try {
                    reader.close();
                } catch (IOException e) {
                    KrawlerLog.misc.warn("Unable to close Reader", e);
                }
            }
        }
    }

    // When this method is called from SendMsg SOAP command path
    // the getDataSource().getInputStream() call descends into
    // Java Activation Framework to set up the input stream as
    // a PipedInputStream that is fed from a PipedOutputStream
    // using a new thread named "DataHandler.getInputStream".
    // This thread lives on until the PipedInputStream is drained.
    // (See javax.activation.DataHandler.getInputStream(),
    // line 242 in JAF 1.0.2 DataHandler.java)
    //
    // A problem occurs when the above try block throws an
    // exception, such as when the transformation server is down.
    // If we don't drain the PipedInputStream, the getInputStream
    // thread will spin forever waiting for the PipedInputStream's
    // internal circular buffer to free up some space, which it
    // won't after filling up initially because no one is reading
    // from the input stream. The input stream won't get garbage
    // collected because the getInputStream thread has a reference
    // to it.
    //
    // When the transformation server remains down, more and more
    // getInputStream threads will pile up, and with each thread
    // grabbing memory for stack, the JVM process will grow and
    // eventually will start throwing OutOfMemoryError.
    //
    // To avoid this mess, we must drain the PipedInputStream if
    // the try block doesn't complete successfully.
    //
    // If this method is called from LMTP path the input stream
    // returned is a FileInputStream, and no special clean up is
    // necessary.
    public static void closeStream(InputStream is) {
        if (is == null)
            return;

        if (is instanceof PipedInputStream) {
            try {
                while (is.read() != -1)
                    ;
            } catch (Exception e) {
                KrawlerLog.misc.debug("ignoring exception while draining PipedInputStream", e);
            }
        }

        try {
            is.close();
        } catch (Exception e) {
            KrawlerLog.misc.debug("ignoring exception while closing input stream", e);
        }
    }

    public static void closeStream(OutputStream os) {
        if (os == null)
            return;

        try {
            os.close();
        } catch (Exception e) {
            KrawlerLog.misc.debug("ignoring exception while closing output stream", e);
        }
    }

    /**
     * find the index of "target" within "source".
     * 
     * @param source
     *            the array being searched
     * @param offset
     *            where to start within that array
     * @param target
     *            the array we are searching for
     * @return index of target within source, or -1 if not found.
     */
    public static int indexOf(byte[] source, int offset, byte[] target) {
        int i = offset;
        int slen = source.length;
        int tlen = target.length;
        int max = offset + (slen - tlen);
        byte first = target[0];

        while (i <= max) {
            if (source[i] == first) {
                boolean match = true;
                // look at rest
                for (int j = 1; match && j < tlen; j++) {
                    match = source[i + j] == target[j];
                }
                if (match)
                    return i;
            }
            i++;
        }
        return -1;
    }

    public static boolean isASCII(byte[] data) {
        if (data == null)
            return false;
        int i;
        for (i = 0; i < data.length; i++) {
            byte c = data[i];
            // invalid control characters, DEL, and the high-order bit
            if ((c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) || c >= 0x7F)
                return false;
        }
        return true;
    }

    /**
     * compress the supplied data using GZIPOutputStream and return the
     * compressed data.
     * 
     * @param data
     *            data to compress
     * @return compressesd data
     */
    public static byte[] compress(byte[] data) throws IOException {
        ByteArrayOutputStream baos = null;
        GZIPOutputStream gos = null;
        try {
            baos = new ByteArrayOutputStream(data.length); // data.length
            // overkill
            gos = new GZIPOutputStream(baos);
            gos.write(data);
            gos.finish();
            return baos.toByteArray();
        } finally {
            if (gos != null) {
                gos.close();
            } else if (baos != null)
                baos.close();
        }
    }

    /**
     * uncompress the supplied data using GZIPInputStream and return the
     * uncompressed data.
     * 
     * @param data
     *            data to uncompress
     * @return uncompressed data
     */
    public static byte[] uncompress(byte[] data) throws IOException {
        // TODO: optimize, this makes my head hurt
        ByteArrayOutputStream baos = null;
        ByteArrayInputStream bais = null;
        GZIPInputStream gis = null;
        try {
            int estimatedResultSize = data.length * 3;
            baos = new ByteArrayOutputStream(estimatedResultSize);
            bais = new ByteArrayInputStream(data);
            byte[] buffer = new byte[8192];
            gis = new GZIPInputStream(bais);

            int numRead;
            while ((numRead = gis.read(buffer, 0, buffer.length)) != -1) {
                baos.write(buffer, 0, numRead);
            }
            return baos.toByteArray();
        } finally {
            if (gis != null)
                gis.close();
            else if (bais != null)
                bais.close();
            if (baos != null)
                baos.close();
        }
    }

    /**
     * Determines if the data contained in the buffer is gzipped by matching the
     * first 2 bytes with GZIP magic GZIP_MAGIC (0x8b1f).
     * 
     * @param data
     * @return
     */
    public static boolean isGzipped(byte[] data) {
        return data != null && data.length > 2 && ((data[0] | (data[1] << 8)) == GZIPInputStream.GZIP_MAGIC);
    }

    public static String encodeFSSafeBase64(byte[] data) {
        byte[] encoded = Base64.encodeBase64(data);
        // Replace '/' with ',' to make the digest filesystem-safe.
        for (int i = 0; i < encoded.length; i++) {
            if (encoded[i] == (byte) '/')
                encoded[i] = (byte) ',';
        }
        return new String(encoded);
    }

    private static byte[] decodeFSSafeBase64(String str) {
        byte[] bytes = str.getBytes();
        // Undo the mapping done in encodeFSSafeBase64().
        for (int i = 0; i < bytes.length; i++) {
            if (bytes[i] == (byte) ',')
                bytes[i] = (byte) '/';
        }
        return Base64.decodeBase64(bytes);
    }

    /**
     * Returns the SHA1 digest of the supplied data.
     * 
     * @param data
     *            data to digest
     * @param base64
     *            if <tt>true</tt>, return as base64 String, otherwise return
     *            as hex string.
     * @return
     */
    public static String getSHA1Digest(byte[] data, boolean base64) {
        try {
            MessageDigest md = MessageDigest.getInstance("SHA1");
            byte[] digest = md.digest(data);
            if (base64)
                return encodeFSSafeBase64(digest);
            else
                return new String(Hex.encodeHex(digest));
        } catch (NoSuchAlgorithmException e) {
            // this should never happen unless the JDK is foobar
            // e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    /**
     * Reads the given <tt>InputStream</tt> in its entirety, closes the
     * stream, and returns the SHA1 digest of the read data.
     * 
     * @param in
     *            data to digest
     * @param base64
     *            if <tt>true</tt>, returns as base64 String, otherwise
     *            return as hex string.
     * @return
     */
    public static String getSHA1Digest(InputStream in, boolean base64) throws IOException {
        try {
            MessageDigest md = MessageDigest.getInstance("SHA1");
            byte[] buffer = new byte[1024];
            int numBytes;
            while ((numBytes = in.read(buffer)) >= 0) {
                md.update(buffer, 0, numBytes);
            }
            byte[] digest = md.digest();
            in.close();
            if (base64)
                return encodeFSSafeBase64(digest);
            else
                return new String(Hex.encodeHex(digest));
        } catch (NoSuchAlgorithmException e) {
            // this should never happen unless the JDK is foobar
            // e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    /**
     * return the MD5 digest of the supplied data.
     * 
     * @param data
     *            data to digest
     * @param base64
     *            if true, return as base64 String, otherwise return as hex
     *            string.
     * @return
     */
    public static String getMD5Digest(byte[] data, boolean base64) {
        try {
            MessageDigest md = MessageDigest.getInstance("MD5");
            byte[] digest = md.digest(data);
            if (base64)
                return encodeFSSafeBase64(digest);
            else
                return new String(Hex.encodeHex(digest));
        } catch (NoSuchAlgorithmException e) {
            // this should never happen unless the JDK is foobar
            // e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    /**
     * Returns the SHA1 digest for the given data, encoded as base64.
     */
    public static String getDigest(byte[] data) {
        return getSHA1Digest(data, true);
    }

    /**
     * Returns byte array containing binary version of digest.
     * 
     * @param digest
     * @return
     */
    public static byte[] getBinaryDigest(String digest) {
        return decodeFSSafeBase64(digest);
    }

    public static boolean isValidDigest(String digest) {
        if (digest != null) {
            byte[] bin = decodeFSSafeBase64(digest);
            if (bin != null) {
                String str = encodeFSSafeBase64(bin);
                return digest.equals(str);
            }
        }
        return false;
    }

    /**
     * Copies an input stream fully to output stream.
     * 
     * @param in
     *            the <tt>InputStream</tt>
     * @param closeIn
     *            if <tt>true</tt>, the <tt>InputStream</tt> is closed
     *            before returning, even when there is an error.
     * @param out
     *            the <tt>OutputStream</tt>
     * @param closeOut
     *            if <tt>true</tt>, the <tt>OutputStream</tt> is closed
     *            before returning, even when there is an error.
     * @return the number of bytes copied
     * @throws IOException
     */
    public static int copy(InputStream in, boolean closeIn, OutputStream out, boolean closeOut) throws IOException {
        return copy(in, closeIn, out, closeOut, -1L);
    }

    /**
     * Copies an input stream fully to output stream.
     * 
     * @param in
     *            the <tt>InputStream</tt>
     * @param closeIn
     *            if <tt>true</tt>, the <tt>InputStream</tt> is closed
     *            before returning, even when there is an error.
     * @param out
     *            the <tt>OutputStream</tt>
     * @param closeOut
     *            if <tt>true</tt>, the <tt>OutputStream</tt> is closed
     *            before returning, even when there is an error.
     * @param maxLength
     *            maximum number of bytes to copy
     * @return the number of bytes copied
     * @throws IOException
     */
    public static int copy(InputStream in, boolean closeIn, OutputStream out, boolean closeOut, long maxLength)
            throws IOException {
        try {
            byte buffer[] = new byte[8192];
            int numRead;
            int transferred = 0;
            while ((numRead = in.read(buffer)) >= 0) {
                out.write(buffer, 0, numRead);
                transferred += numRead;

                if (maxLength >= 0 && transferred > maxLength)
                    throw new IOException("stream exceeded allowable length: " + transferred);
            }
            return transferred;
        } finally {
            if (closeIn)
                closeStream(in);
            if (closeOut)
                closeStream(out);
        }
    }

    // Custom read/writeUTF8 methods to replace DataInputStream.readUTF() and
    // DataOutputStream.writeUTF() which have 64KB limit

    private static final int MAX_STRING_LEN = 32 * 1024 * 1024; // 32MB

    public static void writeUTF8(DataOutput out, String str) throws IOException {
        // Special case: Null string is serialized as length of -1.
        if (str == null) {
            out.writeInt(-1);
            return;
        }

        int len = str.length();
        if (len > MAX_STRING_LEN)
            throw new IOException(
                    "String length " + len + " is too long in ByteUtil.writeUTF8(); max=" + MAX_STRING_LEN);
        if (len > 0) {
            byte[] buf = str.getBytes("UTF-8");
            out.writeInt(buf.length);
            out.write(buf);
        } else
            out.writeInt(0);
    }

    public static String readUTF8(DataInput in) throws IOException {
        int len = in.readInt();
        if (len > MAX_STRING_LEN) {
            throw new IOException(
                    "String length " + len + " is too long in ByteUtil.writeUTF8(); max=" + MAX_STRING_LEN);
        } else if (len > 0) {
            byte[] buf = new byte[len];
            in.readFully(buf, 0, len);
            return new String(buf, "UTF-8");
        } else if (len == 0) {
            return "";
        } else if (len == -1) {
            return null;
        } else {
            throw new IOException("Invalid length " + len + " in ByteUtil.readUTF8()");
        }
    }

    public static class TeeOutputStream extends OutputStream {
        private OutputStream stream1, stream2;

        public TeeOutputStream(OutputStream one, OutputStream two) {
            if (one == two)
                two = null;
            stream1 = one;
            stream2 = two;
        }

        @Override
        public void write(int b) throws IOException {
            if (stream1 != null)
                stream1.write(b);
            if (stream2 != null)
                stream2.write(b);
        }

        @Override
        public void flush() throws IOException {
            if (stream1 != null)
                stream1.flush();
            if (stream2 != null)
                stream2.flush();
        }

        @Override
        public void write(byte b[], int off, int len) throws IOException {
            if (stream1 != null)
                stream1.write(b, off, len);
            if (stream2 != null)
                stream2.write(b, off, len);
        }
    }

    public static class PositionInputStream extends FilterInputStream {
        private long position = 0, mark = 0;

        public PositionInputStream(InputStream is) {
            super(is);
        }

        @Override
        public int read() throws IOException {
            int c = super.read();
            if (c != -1)
                position++;
            return c;
        }

        @Override
        public int read(byte[] b, int off, int len) throws IOException {
            int count = super.read(b, off, len);
            position += count;
            return count;
        }

        @Override
        public synchronized void mark(int readlimit) {
            super.mark(readlimit);
            mark = position;
        }

        @Override
        public synchronized void reset() throws IOException {
            super.reset();
            position = mark;
        }

        @Override
        public long skip(long n) throws IOException {
            long delta = super.skip(n);
            position += delta;
            return delta;
        }

        public long getPosition() {
            return position;
        }
    }

    public static class SegmentInputStream extends PositionInputStream {
        private final long mLimit;

        public static SegmentInputStream create(InputStream is, long start, long end) throws IOException {
            if (start != 0)
                is.skip(start);
            return new SegmentInputStream(is, Math.max(0L, end - start));
        }

        public SegmentInputStream(InputStream is, long limit) {
            super(is);
            mLimit = limit;
        }

        private long actualAvailable() {
            return mLimit - getPosition();
        }

        @Override
        public int available() {
            return (int) Math.min(actualAvailable(), Integer.MAX_VALUE);
        }

        @Override
        public int read() throws IOException {
            return available() <= 0 ? -1 : super.read();
        }

        @Override
        public int read(byte[] b, int off, int len) throws IOException {
            return available() <= 0 ? -1 : super.read(b, off, Math.min(len, available()));
        }

        @Override
        public long skip(long n) throws IOException {
            return super.skip(Math.max(Math.min(n, actualAvailable()), 0L));
        }
    }
}