org.dspace.core.Utils.java Source code

Introduction

Here is the source code for org.dspace.core.Utils.java
Source

/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.core;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.rmi.dgc.VMID;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.app.util.DCInput;
import org.dspace.app.util.DCInputSet;
import org.dspace.app.util.DCInputsReader;
import org.dspace.app.util.DCInputsReaderException;
import org.dspace.content.Collection;

import com.coverity.security.Escape;

/**
 * Utility functions for DSpace.
 * 
 * @author Peter Breton
 * @version $Revision$
 */
public final class Utils {
    /** log4j logger */
    private static Logger log = Logger.getLogger(Utils.class);

    private static final Pattern DURATION_PATTERN = Pattern.compile("(\\d+)([smhdwy])");

    private static final long MS_IN_SECOND = 1000L;

    private static final long MS_IN_MINUTE = 60000L;

    private static final long MS_IN_HOUR = 3600000L;

    private static final long MS_IN_DAY = 86400000L;

    private static final long MS_IN_WEEK = 604800000L;

    private static final long MS_IN_YEAR = 31536000000L;

    private static int counter = 0;

    private static Random random = new Random();

    private static VMID vmid = new VMID();

    // for parseISO8601Date
    private static SimpleDateFormat parseFmt[] = {
            // first try at parsing, has milliseconds (note General time zone)
            new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSz"),

            // second try at parsing, no milliseconds (note General time zone)
            new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssz"),

            // finally, try without any timezone (defaults to current TZ)
            new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSS"),

            new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss") };

    // for formatISO8601Date
    // output canonical format (note RFC22 time zone, easier to hack)
    private static SimpleDateFormat outFmtSecond = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssZ");

    // output format with millsecond precision
    private static SimpleDateFormat outFmtMillisec = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSZ");

    private static Calendar outCal = GregorianCalendar.getInstance();

    /** Private Constructor */
    private Utils() {
    }

    /**
     * Return an MD5 checksum for data in hex format.
     * 
     * @param data
     *            The data to checksum.
     * @return MD5 checksum for the data in hex format.
     */
    public static String getMD5(String data) {
        return getMD5(data.getBytes());
    }

    /**
     * Return an MD5 checksum for data in hex format.
     * 
     * @param data
     *            The data to checksum.
     * @return MD5 checksum for the data in hex format.
     */
    public static String getMD5(byte[] data) {
        return toHex(getMD5Bytes(data));
    }

    /**
     * Return an MD5 checksum for data as a byte array.
     * 
     * @param data
     *            The data to checksum.
     * @return MD5 checksum for the data as a byte array.
     */
    public static byte[] getMD5Bytes(byte[] data) {
        try {
            MessageDigest digest = MessageDigest.getInstance("MD5");

            return digest.digest(data);
        } catch (NoSuchAlgorithmException nsae) {
        }

        // Should never happen
        return null;
    }

    /**
     * Return a hex representation of the byte array
     * 
     * @param data
     *            The data to transform.
     * @return A hex representation of the data.
     */
    public static String toHex(byte[] data) {
        if ((data == null) || (data.length == 0)) {
            return null;
        }

        StringBuffer result = new StringBuffer();

        // This is far from the most efficient way to do things...
        for (int i = 0; i < data.length; i++) {
            int low = (int) (data[i] & 0x0F);
            int high = (int) (data[i] & 0xF0);

            result.append(Integer.toHexString(high).substring(0, 1));
            result.append(Integer.toHexString(low));
        }

        return result.toString();
    }

    /**
     * Generate a unique key. The key is a long (length 38 to 40) sequence of
     * digits.
     * 
     * @return A unique key as a long sequence of base-10 digits.
     */
    public static String generateKey() {
        return new BigInteger(generateBytesKey()).abs().toString();
    }

    /**
     * Generate a unique key. The key is a 32-character long sequence of hex
     * digits.
     * 
     * @return A unique key as a long sequence of hex digits.
     */
    public static String generateHexKey() {
        return toHex(generateBytesKey());
    }

    /**
     * Generate a unique key as a byte array.
     * 
     * @return A unique key as a byte array.
     */
    public static synchronized byte[] generateBytesKey() {
        byte[] junk = new byte[16];

        random.nextBytes(junk);

        String input = new StringBuffer().append(vmid).append(new java.util.Date()).append(Arrays.toString(junk))
                .append(counter++).toString();

        return getMD5Bytes(input.getBytes());
    }

    // The following two methods are taken from the Jakarta IOUtil class.

    /**
     * Copy stream-data from source to destination. This method does not buffer,
     * flush or close the streams, as to do so would require making non-portable
     * assumptions about the streams' origin and further use. If you wish to
     * perform a buffered copy, use {@link #bufferedCopy}.
     * 
     * @param input
     *            The InputStream to obtain data from.
     * @param output
     *            The OutputStream to copy data to.
     * @throws IOException if IO error
     */
    public static void copy(final InputStream input, final OutputStream output) throws IOException {
        final int BUFFER_SIZE = 1024 * 4;
        final byte[] buffer = new byte[BUFFER_SIZE];

        while (true) {
            final int count = input.read(buffer, 0, BUFFER_SIZE);

            if (-1 == count) {
                break;
            }

            // write out those same bytes
            output.write(buffer, 0, count);
        }

        // needed to flush cache
        // output.flush();
    }

    /**
     * Copy stream-data from source to destination, with buffering. This is
     * equivalent to passing {@link #copy}a
     * <code>java.io.BufferedInputStream</code> and
     * <code>java.io.BufferedOutputStream</code> to {@link #copy}, and
     * flushing the output stream afterwards. The streams are not closed after
     * the copy.
     * 
     * @param source
     *            The InputStream to obtain data from.
     * @param destination
     *            The OutputStream to copy data to.
     * @throws IOException if IO error
     */
    public static void bufferedCopy(final InputStream source, final OutputStream destination) throws IOException {
        final BufferedInputStream input = new BufferedInputStream(source);
        final BufferedOutputStream output = new BufferedOutputStream(destination);
        copy(input, output);
        output.flush();
    }

    /**
     * Replace characters that could be interpreted as HTML codes with symbolic
     * references (entities). This function should be called before displaying
     * any metadata fields that could contain the characters {@code "<", ">", "&", "'"},
     * and double quotation marks. This will effectively disable HTML links
     * in metadata.
     * 
     * @param value
     *            the metadata value to be scrubbed for display
     * 
     * @return the passed-in string, with html special characters replaced with
     *         entities.
     */
    public static String addEntities(String value) {
        return Escape.html(value);
    }

    /**
     * Utility method to parse durations defined as {@code \d+[smhdwy]} (seconds,
     * minutes, hours, days, weeks, years)
     * 
     * @param duration
     *            specified duration
     * 
     * @return number of milliseconds equivalent to duration.
     * 
     * @throws ParseException
     *             if the duration is of incorrect format
     */
    public static long parseDuration(String duration) throws ParseException {
        Matcher m = DURATION_PATTERN.matcher(duration.trim());
        if (!m.matches()) {
            throw new ParseException("'" + duration + "' is not a valid duration definition", 0);
        }

        String units = m.group(2);
        long multiplier = MS_IN_SECOND;

        if ("s".equals(units)) {
            multiplier = MS_IN_SECOND;
        } else if ("m".equals(units)) {
            multiplier = MS_IN_MINUTE;
        } else if ("h".equals(units)) {
            multiplier = MS_IN_HOUR;
        } else if ("d".equals(units)) {
            multiplier = MS_IN_DAY;
        } else if ("w".equals(units)) {
            multiplier = MS_IN_WEEK;
        } else if ("y".equals(units)) {
            multiplier = MS_IN_YEAR;
        } else {
            throw new ParseException(
                    units + " is not a valid time unit (must be 'y', " + "'w', 'd', 'h', 'm' or 's')",
                    duration.indexOf(units));
        }

        long qint = Long.parseLong(m.group(1));

        return qint * multiplier;
    }

    /**
     * Translates timestamp from an ISO 8601-standard format, which
     * is commonly used in XML and RDF documents.
     * This method is synchronized because it depends on a non-reentrant
     * static DateFormat (more efficient than creating a new one each call).
     *
     * @param s the input string
     * @return Date object, or null if there is a problem translating.
     */
    public static synchronized Date parseISO8601Date(String s) {
        // attempt to normalize the timezone to something we can parse;
        // SimpleDateFormat can't handle "Z"
        char tzSign = s.charAt(s.length() - 6);
        if (s.endsWith("Z")) {
            s = s.substring(0, s.length() - 1) + "GMT+00:00";
        }

        // check for trailing timezone
        else if (tzSign == '-' || tzSign == '+') {
            s = s.substring(0, s.length() - 6) + "GMT" + s.substring(s.length() - 6);
        }

        // try to parse without milliseconds
        ParseException lastError = null;
        for (int i = 0; i < parseFmt.length; ++i) {
            try {
                return parseFmt[i].parse(s);
            } catch (ParseException e) {
                lastError = e;
            }
        }
        if (lastError != null) {
            log.error("Error parsing date:", lastError);
        }
        return null;
    }

    /**
     * Convert a Date to String in the ISO 8601 standard format.
     * The RFC822 timezone is almost right, still need to insert ":".
     * This method is synchronized because it depends on a non-reentrant
     * static DateFormat (more efficient than creating a new one each call).
     *
     * @param d the input Date
     * @return String containing formatted date.
     */
    public static synchronized String formatISO8601Date(Date d) {
        String result;
        outCal.setTime(d);
        if (outCal.get(Calendar.MILLISECOND) == 0) {
            result = outFmtSecond.format(d);
        } else {
            result = outFmtMillisec.format(d);
        }
        int rl = result.length();
        return result.substring(0, rl - 2) + ":" + result.substring(rl - 2);
    }

    public static <E> java.util.Collection<E> emptyIfNull(java.util.Collection<E> collection) {
        return collection == null ? Collections.<E>emptyList() : collection;
    }

    /**
     * Utility method to extract schema, element, qualifier from the metadata field key 
     * Keep in mind that this method try to auto discover the common separator used in DSpace ("_" or ".") 
     * 
     * Return an array of token with size 3 which contains:
     * schema = tokens[0];
     * element = tokens[1];
     * qualifier = tokens[2]; //it can be empty string
     * 
     * @param metadata (the field in the form dc.title or dc_title)
     * @return array of tokens 
     */
    public static String[] tokenize(String metadata) {
        String separator = metadata.contains("_") ? "_" : ".";
        StringTokenizer dcf = new StringTokenizer(metadata, separator);

        String[] tokens = { "", "", "" };
        int i = 0;
        while (dcf.hasMoreTokens()) {
            tokens[i] = dcf.nextToken().trim();
            i++;
        }
        // Tokens contains:
        // schema = tokens[0];
        // element = tokens[1];
        // qualifier = tokens[2];
        return tokens;

    }

    /**
     * Make the metadata field key using the separator.
     * 
     * @param schema
     * @param element
     * @param qualifier
     * @param separator (DSpace common separator are "_" or ".")
     * @return metadata field key
     */
    public static String standardize(String schema, String element, String qualifier, String separator) {
        if (StringUtils.isBlank(qualifier)) {
            return schema + separator + element;
        } else {
            return schema + separator + element + separator + qualifier;
        }
    }
}