org.ccnx.ccn.impl.support.DataUtils.java Source code

Introduction

Here is the source code for org.ccnx.ccn.impl.support.DataUtils.java
Source

/*
 * Part of the CCNx Java Library.
 *
 * Copyright (C) 2008-2012 Palo Alto Research Center, Inc.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License version 2.1
 * as published by the Free Software Foundation.
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details. You should have received
 * a copy of the GNU Lesser General Public License along with this library;
 * if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
 * Fifth Floor, Boston, MA 02110-1301 USA.
 */

package org.ccnx.ccn.impl.support;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;

import org.bouncycastle.util.encoders.Base64;
import org.ccnx.ccn.config.SystemConfiguration;

/**
 * Miscellaneous utility routines for CCN, mostly data comparison and conversion.
 */
public final class DataUtils {

    public static final int BITS_PER_BYTE = 8;
    public static final String EMPTY = "";
    public static final String LINE_SEPARATOR = System.getProperty("line.separator");

    /**
     * Useful when we move over to 1.6, and can avoid UnsupportedCharsetExceptions this way.
     */
    public static Charset UTF8_CHARSET;

    static {
        try {
            UTF8_CHARSET = Charset.forName("UTF-8");
            if (null == UTF8_CHARSET) {
                // This shouldn't happen, but be noisy about it if it does...
                throw new UnsupportedCharsetException(
                        "Attempt to retrieve the UTF-8 charset returned null! Significant configuration error!");
            }
        } catch (Exception e) { // Should be UnsupportedCharsetException or IllegalCharsetNameException
            Log.severe("Unknown encoding UTF-8! This is a significant configuration problem.");
            throw new RuntimeException("Cannot find UTF-8 encoding. Significant configuration error");
        }
    }

    public static <T extends Comparable<T>> int compare(T left, T right) {
        int result = 0;
        if (null != left) {
            if (null == right)
                return 1; // sort nothing before something
            result = left.compareTo(right);
        } else {
            if (null != right)
                result = -1; // sort nothing before something
            // else fall through and compare publishers
            else
                result = 0; // null == null
        }
        return result;
    }

    /**
     * Perform a shortlex comparison of byte arrays in canonical CCN ordering.
     * Shortlex ordering is ordering by cardinality, then by lexigraphic.
     *
     * MM - This method should really be renamed to "shortlex" or something
     * other than "compare", unless it is needed for an Override name.
     *
     * @param left
     * @param right
     * @return < 0 if left comes before right, 0 if they are equal, > 0 if left comes after right
     */
    public static int compare(byte[] left, byte[] right) {
        if (null != left) {
            if (null == right) {
                return (1);
            } else {
                int leftLength = left.length;
                int rightLength = right.length;
                // If a is shorter than b then a comes before b
                if (leftLength < rightLength) {
                    return (-1);
                } else if (leftLength > rightLength) {
                    return (1);
                } else {
                    // They have equal lengths - compare byte by byte
                    for (int i = 0; i < leftLength; ++i) {
                        short leftSubI = (short) (left[i] & 0xff);
                        short rightSubI = (short) (right[i] & 0xff);
                        if (leftSubI < rightSubI) {
                            return (-1);
                        } else if (leftSubI > rightSubI) {
                            return (1);
                        }
                    }
                }
            }
        } else {
            if (null != right)
                return (-1); // sort nothing before something
            // else fall through and compare publishers
            else
                return (0); // null == null
        }
        return (0);
    }

    /**
     * This is not like compare(byte[], byte[]).  That is shortlex.  This
     * is an actual lexigraphic ordering based on the shortlex compare
     * of each byte array.
     * @see compare(byte[], byte[])
     */
    public static int compare(ArrayList<byte[]> left, ArrayList<byte[]> right) {

        int result = 0;
        if (null != left) {
            if (null == right) {
                result = 1;
            } else {
                // here we have the comparison.
                int leftSize = left.size();
                int rightSize = right.size();
                int minlen = (leftSize < rightSize) ? leftSize : rightSize;
                for (int i = 0; i < minlen; ++i) {
                    result = compare(left.get(i), right.get(i));
                    if (0 != result)
                        break;
                }
                if (result == 0) {
                    // ok, they're equal up to the minimum length
                    if (leftSize < rightSize) {
                        result = -1;
                    } else if (leftSize > rightSize) {
                        result = 1;
                    }
                    // else they're equal, result = 0
                }
            }
        } else {
            if (null != right)
                result = -1; // sort nothing before something
            // else fall through and compare publishers
            else
                result = 0; // null == null
        }
        return result;
    }

    /**
     * Used to print non ASCII components for logging, etc.
     *
     * @param bytes
     * @return the data as a BigInteger String
     */
    public static String printBytes(byte[] bytes) {
        if (bytes == null) {
            return "";
        }
        BigInteger bi = new BigInteger(1, bytes);
        return bi.toString(SystemConfiguration.DEBUG_RADIX);
    }

    /**
     * Used to print components to be interpreted as hexadecimal such as segments
     * @param bytes
     * @return the data as a Hexadecimal String
     */
    public static String printHexBytes(byte[] bytes) {
        if ((null == bytes) || (bytes.length == 0)) {
            return "<empty>";
        }
        BigInteger bi = new BigInteger(1, bytes);
        return bi.toString(16);
    }

    /**
     * A place to centralize interfaces to base64 encoding/decoding, as the classes
     * we use change depending on what ships with Java.
     */

    public static byte[] base64Decode(byte[] input) throws IOException {
        return Base64.decode(input);
    }

    public static byte[] base64Encode(byte[] input) {
        return Base64.encode(input);
    }

    public static final int LINELEN = 64;

    public static String base64Encode(byte[] input, Integer lineLength) {
        byte[] encodedBytes = base64Encode(input);
        return lineWrap(DataUtils.getUTF8StringFromBytes(encodedBytes), LINELEN);
    }

    /**
     * @deprecated not used in CCNx, candidate for removal in future release.
     * @param input
     * @param lineLength
     * @return the byte array with added CRLF line-breaks and null termination.
     */
    @Deprecated
    public static byte[] lineWrapBase64(byte[] input, int lineLength) {
        int finalLen = input.length + 2 * (input.length / lineLength) + 3;
        byte output[] = new byte[finalLen];
        // add line breaks
        int outidx = 0;
        int inidx = 0;
        while (inidx < input.length) {
            output[outidx] = input[inidx];
            outidx++;
            inidx++;
            if ((inidx % lineLength) == 0) {
                output[outidx++] = (byte) 0x0D;
                output[outidx++] = (byte) 0x0A;
            }
        }
        output[outidx] = '\0';
        return (output);

    }

    /**
     * @param inputString
     * @param lineLength
     * @return
     */
    public static String lineWrap(String inputString, int lineLength) {
        if ((null == inputString) || (inputString.length() <= lineLength)) {
            return inputString;
        }

        StringBuffer line = new StringBuffer(inputString);

        int length = inputString.length();
        int sepLen = LINE_SEPARATOR.length();
        int index = lineLength - sepLen;
        while (index < length - sepLen) {
            line.insert(index, LINE_SEPARATOR);
            index += lineLength;
            length += sepLen;
        }
        return line.toString();
    }

    /**
     * byte array compare
     * @param left
     * @param right
     * @return true if equal
     */
    public static boolean arrayEquals(byte[] left, byte[] right) {
        if (left == null) {
            return ((right == null) ? true : false);
        }
        if (right == null) {
            return ((left == null) ? true : false);
        }
        if (left.length != right.length)
            return false;
        for (int i = 0; i < left.length; i++) {
            if (left[i] != right[i])
                return false;
        }
        return true;
    }

    /**
     * byte array compare
     * @param left
     * @param right
     * @param length
     * @return true if equal
     */
    public static boolean arrayEquals(byte[] left, byte[] right, int length) {
        if (left == null) {
            return ((right == null) ? true : false);
        }
        if (right == null) {
            return ((left == null) ? true : false);
        }

        // If one of left or right is shorter than length, arrays
        // must be same length to be equal.
        if (left.length < length || right.length < length)
            if (left.length != right.length)
                return false;

        int minarray = (left.length < right.length) ? left.length : right.length;
        int minlen = (length < minarray) ? length : minarray;

        for (int i = 0; i < minlen; i++) {
            if (left[i] != right[i])
                return false;
        }
        return true;
    }

    /**
     * Check if a byte array starts with a certain prefix.
     *
     * Used to check for binary prefixes used to mark certain ContentName components for special purposes.
     *
     * @param prefix bytes to look for, if null this method always returns true.
     * @param data data to inspect. If null this method always returns false.
     * @return true if data starts with prefix.
     */
    public static boolean isBinaryPrefix(byte[] prefix, byte[] data) {
        if ((null == prefix) || (prefix.length == 0))
            return true;
        if ((null == data) || (data.length < prefix.length))
            return false;
        for (int i = 0; i < prefix.length; ++i) {
            if (prefix[i] != data[i])
                return false;
        }
        return true;
    }

    /**
     * Recursively delete a directory and all its contents.
     * If given File does not exist, this method returns with no error
     * but if it exists as a file not a directory, an exception will be thrown.
     * Similar to org.apache.commons.io.FileUtils.deleteDirectory
     * but avoids dependency on that library for minimal use.
     * @param directory
     * @throws IOException if "directory" is a file
     */
    public static void deleteDirectory(File directory) throws IOException {
        if (!directory.exists()) {
            return;
        }
        if (!directory.isDirectory()) {
            throw new IOException(directory.getPath() + " is not a directory");
        }
        for (File child : directory.listFiles()) {
            if (child.isDirectory()) {
                deleteDirectory(child);
            } else {
                child.delete();
            }
        }
        directory.delete();
    }

    /**
     * This was used in early content demos; keep it around as it may be generally useful.
     * @param file
     * @return
     * @throws IOException
     */
    public static byte[] getBytesFromFile(File file) throws IOException {
        InputStream is = new FileInputStream(file);

        // Get the size of the file
        long length = file.length();

        if (length > Integer.MAX_VALUE) {
            throw new IOException("File is too large: " + file.getName());
        }

        // Create the byte array to hold the data
        byte[] bytes = new byte[(int) length];

        // Read in the bytes
        int offset = 0;
        int numRead = 0;
        while (offset < bytes.length && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
            offset += numRead;
        }

        // Ensure all the bytes have been read in
        if (offset < bytes.length) {
            throw new IOException("Could not completely read file " + file.getName());
        }

        // Close the input stream and return bytes
        is.close();
        return bytes;
    }

    /**
     * Read a stream (usually small) completely in to a byte array. Used to get all of the
     * bytes out of one or more content objects for decoding or other processing, where the
     * content needs to be handed to something else as a unit.
     */
    public static byte[] getBytesFromStream(InputStream input) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        byte[] buf = new byte[1024];
        int byteCount = 0;
        byteCount = input.read(buf);
        while (byteCount > 0) {
            baos.write(buf, 0, byteCount);
            byteCount = input.read(buf);
        }
        return baos.toByteArray();
    }

    /**
     * Wrap up handling of UTF-8 encoding in one place (as much as possible), because
     * an UnsupportedEncodingException in response to a request for UTF-8 signals
     * a significant configuration error; we should catch it and signal a RuntimeException
     * in one place and let the rest of the code not worry about it.
     */
    public static String getUTF8StringFromBytes(byte[] stringBytes) {
        try {
            // Version taking a Charset not available till 1.6.
            return new String(stringBytes, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            Log.severe("Unknown encoding UTF-8! This is a significant configuration problem.");
            throw new RuntimeException("Unknown encoding UTF-8! This is a significant configuration problem.");
        }
    }

    /**
     * Wrap up handling of UTF-8 encoding in one place (as much as possible), because
     * an UnsupportedEncodingException in response to a request for UTF-8 signals
     * a significant configuration error; we should catch it and signal a RuntimeException
     * in one place and let the rest of the code not worry about it.
     */
    public static byte[] getBytesFromUTF8String(String stringData) {
        try {
            // Version taking a Charset not available till 1.6.
            return stringData.getBytes("UTF-8");
        } catch (UnsupportedEncodingException e) {
            Log.severe("Unknown encoding UTF-8! This is a significant configuration problem.");
            throw new RuntimeException("Unknown encoding UTF-8! This is a significant configuration problem.");
        }
    }

    /**
     * Lexicographically compare two byte arrays, looking at a limited number of bytes.
     * @param arr1
     * @param arr2
     * @param count Maximum number of bytes to inspect.
     * @return < 0 if left comes before right, 0 if they are equal, > 0 if left comes after right
     */
    public static int bytencmp(byte[] arr1, int offset1, byte[] arr2, int offset2, int count) {
        if (null == arr1) {
            if (null == arr2)
                return 0;
            return 1;
        }
        if (null == arr2)
            return -1;

        int cmpcount = Math.min(Math.min(count, (arr1.length - offset1)), (arr2.length - offset2));
        for (int i = offset1, j = offset2; i < cmpcount; ++i, ++j) {
            if (arr1[i] < arr2[j])
                return -1;
            if (arr1[i] > arr2[j])
                return 1;
        }
        if (cmpcount == count)
            return 0;
        // OK, they match up to the length of the shortest one, which is shorter
        // than count. Whichever is shorter is less.
        if (arr1.length > arr2.length)
            return 1;
        if (arr1.length < arr2.length)
            return -1;
        return 0;
    }

    public static int bytencmp(byte[] arr1, byte[] arr2, int count) {
        return bytencmp(arr1, 0, arr2, 0, count);
    }

    /**
     * Finds the index of the first occurrence of byteToFind in array starting at given
     * offset, returns 01 if not found.
     * @param array array to search
     * @param startingOffset offset into array to start at
     * @param byteToFind byte to seek
     * @return position in array containing first occurrence of byteToFind, or array.length if not found
     */
    public static int byteindex(byte[] array, int startingOffset, byte byteToFind) {
        int byteindex;
        for (byteindex = startingOffset; byteindex < array.length; byteindex++) {
            if (array[byteindex] == byteToFind)
                break;
        }
        return (byteindex == array.length) ? -1 : byteindex;
    }

    /**
     * Finds the index of the first occurrence of byteToFind in array, returns -1 if not found.
     * @param array array to search
     * @param byteToFind byte to seek
     * @return position in array containing first occurrence of byteToFind, or array.length if not found
     */
    public static int byteindex(byte[] array, byte byteToFind) {
        return byteindex(array, 0, byteToFind);
    }

    /**
     * Finds the index of the last occurrence of byteToFind in array starting at given
     * offset, returns -1 if not found.
     * @param array array to search
     * @param startingOffset offset into array to start at
     * @param byteToFind byte to seek
     * @return position in array containing first occurrence of byteToFind, or array.length if not found
     */
    public static int byterindex(byte[] array, int startingOffset, byte byteToFind) {
        int byteindex;
        for (byteindex = startingOffset; byteindex >= 0; byteindex--) {
            if (array[byteindex] == byteToFind)
                break;
        }
        return byteindex;
    }

    /**
     * Finds the last of the first occurrence of byteToFind in array, returns -1 if not found.
     * @param array array to search
     * @param byteToFind byte to seek
     * @return position in array containing first occurrence of byteToFind, or array.length if not found
     */
    public static int byterindex(byte[] array, byte byteToFind) {
        return byterindex(array, (array != null) ? array.length : 0, byteToFind);
    }

    /**
     * Count how may times a given byte occurs in an array.
     */
    public static int occurcount(byte[] array, int startingOffset, int length, byte byteToFind) {
        int count = 0;
        if (array == null)
            return 0;

        for (int i = startingOffset; i < length; ++i) {
            if (array[i] == byteToFind) {
                count++;
            }
        }
        return count;
    }

    public static int occurcount(byte[] array, int length, byte byteToFind) {
        return occurcount(array, 0, (null != array) ? array.length : -1, byteToFind);
    }

    public static int occurcount(byte[] array, byte byteToFind) {
        return occurcount(array, 0, byteToFind);
    }

    /**
     * Akin to String.split for binary arrays; splits on a given byte value.
     */
    public static byte[][] binarySplit(byte[] array, int startingOffset, byte splitValue) {
        int index = 0;
        int offset = 0;
        int lastoffset = startingOffset;
        int count = occurcount(array, startingOffset, splitValue) + 1;
        if (count == 1) {
            // no split values; just return the original array
            return new byte[][] { array };
        }
        byte[][] components = new byte[count][];
        while (index < count) {
            offset = byteindex(array, lastoffset, splitValue);
            if (offset < 0) {
                // last one
                offset = array.length;
            }
            components[index] = new byte[offset - lastoffset];
            System.arraycopy(array, lastoffset, components[index], 0, components[index].length);
            lastoffset = offset + 1;
            index++;
        }
        return components;
    }

    public static byte[][] binarySplit(byte[] array, byte splitValue) {
        return binarySplit(array, 0, splitValue);
    }

    public static byte[] subarray(byte[] array, int offset, int len) {
        byte[] newarray = new byte[len];
        System.arraycopy(array, offset, newarray, 0, len);
        return newarray;
    }

    /**
     * Convert a BigEndian byte array in to a long assuming unsigned values.
     * No bounds checking is done on the array -- caller should make sure
     * it is 8 or fewer bytes.
     *
     * Should operate like BigInteger(1, bytes).longValue().
     */
    public final static long byteArrayToUnsignedLong(final byte[] src) {
        long value = 0;
        for (int i = 0; i < src.length; i++) {
            value = value << 8;
            // Java will assume the byte is signed, so extend it and trim it.
            int b = (src[i]) & 0xFF;
            value |= b;
        }
        return value;
    }

    /**
     * Like byteArrayToUnsignedLong, excpet we begin at byte position @start, not
     * at position 0.  This is commonly used to skip the 1st byte of a CommandMarker.
     * If @start is 0, works exactly like byteArrayToUnsignedLong(src).
     * @param src
     * @param start
     * @return
     */
    public final static long byteArrayToUnsignedLong(final byte[] src, int start) {
        long value = 0;
        for (int i = start; i < src.length; i++) {
            value = value << 8;
            // Java will assume the byte is signed, so extend it and trim it.
            int b = (src[i]) & 0xFF;
            value |= b;
        }
        return value;
    }

    /**
     * Convert a long value to a Big Endian byte array.  Assume
     * the long is not signed.
     *
     * This should be the equivalent of:
     *      byte [] b = BigInteger.valueOf(toBinaryTimeAsLong()).toByteArray();
     if( 0 == b[0] && b.length > 1 ) {
        byte [] bb = new byte[b.length - 1];
        System.arraycopy(b, 1, bb, 0, bb.length);
        b = bb;
     }
        
     */
    private final static byte[] _byte0 = { 0 };

    public final static byte[] unsignedLongToByteArray(final long value) {
        if (0 == value)
            return _byte0;

        if (0 <= value && value <= 0x00FF) {
            byte[] bb = new byte[1];
            bb[0] = (byte) (value & 0x00FF);
            return bb;
        }

        byte[] out = null;
        int offset = -1;
        for (int i = 7; i >= 0; --i) {
            byte b = (byte) ((value >> (i * 8)) & 0xFF);
            if (out == null && b != 0) {
                out = new byte[i + 1];
                offset = i;
            }
            if (out != null)
                out[offset - i] = b;
        }
        return out;
    }

    /**
     * Like unsignedLongToByteArray, except we specify what the first byte should be, so the
     * array is 1 byte longer than normal.  This is used by things that need a CommandMarker.
     *
     * If the value is 0, then the array will be 1 byte with only @fistByte.  The 0x00 byte
     * will not be included.
     */
    public final static byte[] unsignedLongToByteArray(final long value, final byte firstByte) {
        // A little bit of unwinding for common cases.
        // These hit a lot of the SegmentationProfile cases

        if (0 == value) {
            byte[] bb = new byte[1];
            bb[0] = firstByte;
            return bb;
        }

        if (0 <= value && value <= 0x00FF) {
            byte[] bb = new byte[2];
            bb[0] = firstByte;
            bb[1] = (byte) (value & 0x00FF);
            return bb;
        }

        if (0 <= value && value <= 0x0000FFFFL) {
            byte[] bb = new byte[3];
            bb[0] = firstByte;
            bb[1] = (byte) ((value >>> 8) & 0x00FF);
            bb[2] = (byte) (value & 0x00FF);
            return bb;
        }

        byte[] out = null;
        int offset = -1;
        for (int i = 7; i >= 0; --i) {
            byte b = (byte) ((value >> (i * 8)) & 0xFF);
            if (out == null && b != 0) {
                out = new byte[i + 2];
                offset = i;
            }
            if (out != null)
                out[offset - i + 1] = b;
        }
        out[0] = firstByte;
        return out;
    }

}