org.pentaho.di.core.util.DateDetector.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.core.util.DateDetector.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/
package org.pentaho.di.core.util;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.commons.collections.BidiMap;
import org.apache.commons.collections.bidimap.DualHashBidiMap;

public class DateDetector {

    private static final String LOCALE_en_US = "en_US";

    @SuppressWarnings("serial")
    static final BidiMap DATE_FORMAT_TO_REGEXPS_US = new DualHashBidiMap() {
        {
            put("MM-dd-yyyy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{4}$");
            put("dd/MM/yyyy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{4}$");
            put("MM-dd-yy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{2}$");
            put("dd/MM/yy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{2}$");
            put("yyyyMMdd", "^\\d{8}$");
            put("dd-MM-yy", "^\\d{1,2}-\\d{1,2}-\\d{2}$");
            put("dd-MM-yyyy", "^\\d{1,2}-\\d{1,2}-\\d{4}$");
            put("dd.MM.yy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}$");
            put("dd.MM.yyyy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$");
            put("MM/dd/yy", "^\\d{1,2}/\\d{1,2}/\\d{2}$");
            put("MM/dd/yyyy", "^\\d{1,2}/\\d{1,2}/\\d{4}$");
            put("yyyy-MM-dd", "^\\d{4}-\\d{1,2}-\\d{1,2}$");
            put("yyyy.MM.dd", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$");
            put("yyyy/MM/dd", "^\\d{4}/\\d{1,2}/\\d{1,2}$");
            put("dd MMM yyyy", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$");
            put("dd MMMM yyyy", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$");
            put("yyyyMMddHHmm", "^\\d{12}$");
            put("yyyyMMdd HHmm", "^\\d{8}\\s\\d{4}$");
            put("dd-MM-yy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}$");
            put("dd-MM-yyyy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("dd.MM.yy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}$");
            put("dd.MM.yyyy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("MM/dd/yy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}$");
            put("MM/dd/yyyy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("yyyy-MM-dd HH:mm", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}$");
            put("yyyy.MM.dd HH:mm", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}$");
            put("yyyy/MM/dd HH:mm", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}$");
            put("dd MMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("dd MMMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("yyyyMMddHHmmss", "^\\d{14}$");
            put("yyyyMMdd HHmmss", "^\\d{8}\\s\\d{6}$");
            put("dd-MM-yy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd-MM-yyyy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd.MM.yy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd.MM.yyyy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("MM/dd/yy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("MM/dd/yyyy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("yyyy-MM-dd HH:mm:ss", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("yyyy.MM.dd HH:mm:ss", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("yyyy/MM/dd HH:mm:ss", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd MMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd MMMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd-MM-yy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd-MM-yyyy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd.MM.yy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd.MM.yyyy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("MM/dd/yy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("MM/dd/yyyy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("yyyy-MM-dd HH:mm:ss.SSS", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("yyyy.MM.dd HH:mm:ss.SSS", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("yyyy/MM/dd HH:mm:ss.SSS", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd MMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd MMMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
        }
    };

    @SuppressWarnings("serial")
    static final BidiMap DATE_FORMAT_TO_REGEXPS = new DualHashBidiMap() {
        {
            put("MM-dd-yyyy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{4}$");
            put("dd/MM/yyyy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{4}$");
            put("MM-dd-yy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{2}$");
            put("dd/MM/yy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{2}$");
            put("yyyyMMdd", "^\\d{8}$");
            put("dd-MM-yy", "^\\d{1,2}-\\d{1,2}-\\d{2}$");
            put("dd-MM-yyyy", "^\\d{1,2}-\\d{1,2}-\\d{4}$");
            put("dd.MM.yy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}$");
            put("dd.MM.yyyy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$");
            put("dd/MM/yy", "^\\d{1,2}/\\d{1,2}/\\d{2}$");
            put("dd/MM/yyyy", "^\\d{1,2}/\\d{1,2}/\\d{4}$");
            put("yyyy-MM-dd", "^\\d{4}-\\d{1,2}-\\d{1,2}$");
            put("yyyy.MM.dd", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$");
            put("yyyy/MM/dd", "^\\d{4}/\\d{1,2}/\\d{1,2}$");
            put("dd MMM yyyy", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$");
            put("dd MMMM yyyy", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$");
            put("yyyyMMddHHmm", "^\\d{12}$");
            put("yyyyMMdd HHmm", "^\\d{8}\\s\\d{4}$");
            put("dd-MM-yy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}$");
            put("dd-MM-yyyy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("dd.MM.yy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}$");
            put("dd.MM.yyyy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("dd/MM/yy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}$");
            put("dd/MM/yyyy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("yyyy-MM-dd HH:mm", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}$");
            put("yyyy.MM.dd HH:mm", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}$");
            put("yyyy/MM/dd HH:mm", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}$");
            put("dd MMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("dd MMMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}$");
            put("yyyyMMddHHmmss", "^\\d{14}$");
            put("yyyyMMdd HHmmss", "^\\d{8}\\s\\d{6}$");
            put("dd-MM-yy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd-MM-yyyy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd.MM.yy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd.MM.yyyy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd/MM/yy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd/MM/yyyy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("yyyy-MM-dd HH:mm:ss", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("yyyy.MM.dd HH:mm:ss", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("yyyy/MM/dd HH:mm:ss", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd MMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd MMMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$");
            put("dd-MM-yy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd-MM-yyyy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd.MM.yy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd.MM.yyyy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd/MM/yy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd/MM/yyyy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("yyyy-MM-dd HH:mm:ss.SSS", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("yyyy.MM.dd HH:mm:ss.SSS", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("yyyy/MM/dd HH:mm:ss.SSS", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd MMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
            put("dd MMMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$");
        }
    };

    // util class, hide constructor
    private DateDetector() {
    };

    /**
     * 
     * @param dateFormat - date format for get regexp
     * @return regexp for given date format
     */
    public static String getRegexpByDateFormat(String dateFormat) {
        return getRegexpByDateFormat(dateFormat, null);
    }

    /**
     * 
     * @param dateFormat - date format for get regexp by locale
     * @return regexp for given date format
     */
    public static String getRegexpByDateFormat(String dateFormat, String locale) {
        if (locale != null && LOCALE_en_US.equalsIgnoreCase(locale)) {
            return (String) DATE_FORMAT_TO_REGEXPS_US.get(dateFormat);
        }
        return (String) DATE_FORMAT_TO_REGEXPS.get(dateFormat);
    }

    /**
     * 
     * @param regex - regexp for parse date format from string
     * <br>
     * <b>NOTES:</b> if regex could be used for US and EU locale. 
     * It returns europeans locale. For en_US locale please use
     * 
     *  {@link #getDateFormatByRegex( String regex, String locale ) }
     *
     * @return {@link java.lang.String} string wich represented Date Format
     */
    public static String getDateFormatByRegex(String regex) {
        return getDateFormatByRegex(regex, null);
    }

    /**
     * 
     * @param regex
     *          - regexp for parse date format from string by locale
     * @return {@link java.lang.String} string wich represented Date Format
     */
    public static String getDateFormatByRegex(String regex, String locale) {
        if (locale != null && LOCALE_en_US.equalsIgnoreCase(locale)) {
            return (String) DATE_FORMAT_TO_REGEXPS_US.getKey(regex);
        }
        return (String) DATE_FORMAT_TO_REGEXPS.getKey(regex);
    }

    /**
     * 
     * @param dateString
     *          date string for parse
     * @return {@link java.util.Date} converted from dateString by detected format
     * @throws ParseException
     *           - if we can not detect date format for string or we can not parse date string
     */
    public static Date getDateFromString(String dateString) throws ParseException {
        String dateFormat = detectDateFormat(dateString);
        if (dateFormat == null) {
            throw new ParseException("Unknown date format.", 0);
        }
        return getDateFromStringByFormat(dateString, dateFormat);
    }

    /**
     * 
     * @param dateString
     *          date string for parse
     * @return {@link java.util.Date} converted from dateString by detected format
     * @throws ParseException
     *           - if we can not detect date format for string or we can not parse date string
     */
    public static Date getDateFromString(String dateString, String locale) throws ParseException {
        String dateFormat = detectDateFormat(dateString, locale);
        if (dateFormat == null) {
            throw new ParseException("Unknown date format.", 0);
        }
        return getDateFromStringByFormat(dateString, dateFormat);
    }

    /**
     * 
     * @param dateString
     *          date string for parse
     * @param dateFormat
     *          format which should be applied for string
     * @return {@link java.util.Date} converted from dateString by format
     * @throws ParseException
     *           if we can not parse date string
     */
    public static Date getDateFromStringByFormat(String dateString, String dateFormat) throws ParseException {
        if (dateFormat == null) {
            throw new ParseException("Unknown date format. Format is null. ", 0);
        }
        if (dateString == null) {
            throw new ParseException("Unknown date string. Date string is null. ", 0);
        }
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat(dateFormat);
        simpleDateFormat.setLenient(false); // Don't automatically convert invalid date.
        return simpleDateFormat.parse(dateString);
    }

    /**
     * 
     * @param dateString
     *          - date string for detect date format
     * @return {@link java.lang.String} string which represented Date Format or null
     * 
     */
    public static String detectDateFormat(String dateString) {
        return detectDateFormat(dateString, null);
    }

    /**
     * 
     * @param dateString
     *          - date string for detect date format
     * @return {@link java.lang.String} string which represented Date Format or null
     */
    public static String detectDateFormat(String dateString, String locale) {
        if (dateString == null) {
            return null;
        }
        for (Object regexp : getDateFormatToRegExps(locale).values()) {
            if (dateString.toLowerCase().matches((String) regexp)) {
                return (String) getDateFormatToRegExps(locale).getKey(regexp);
            }
        }
        return null;
    }

    /**
     * Finds a date format that matches the date value given. Will try the desiredKey format before attempting others. The
     * first to match is returned.
     * 
     * @param dateString
     *          the literal value of the date (eg: "01/01/2001")
     * @param locale
     *          the locale in play
     * @param desiredKey
     *          the desired format (should be a valid key to DATE_FORMAT_TO_REGEXPS)
     * @return The key to the format that matched or null if none found.
     */
    public static String detectDateFormatBiased(String dateString, String locale, String desiredKey) {
        if (dateString == null) {
            return null;
        }
        String regex = (String) getDateFormatToRegExps(locale).get(desiredKey);
        if (regex != null && dateString.toLowerCase().matches(regex)) {
            return desiredKey;
        } else {
            return detectDateFormat(dateString, locale);
        }
    }

    public static BidiMap getDateFormatToRegExps(String locale) {
        if (locale == null || LOCALE_en_US.equalsIgnoreCase(locale)) {
            return DATE_FORMAT_TO_REGEXPS_US;
        } else {
            return DATE_FORMAT_TO_REGEXPS;
        }
    }

    /**
     * 
     * @param dateString - string for check
     * @param dateFormat - format for check
     * @return true if we can parse string by format without exception
     */
    public static boolean isValidDate(String dateString, String dateFormat) {
        try {
            getDateFromStringByFormat(dateString, dateFormat);
            return true;
        } catch (ParseException e) {
            return false;
        }
    }

    /**
     * @param dateString - string for check
     * @return true if we can parse string without exception
     */
    public static boolean isValidDate(String dateString) {
        try {
            getDateFromString(dateString);
            return true;
        } catch (ParseException e) {
            return false;
        }
    }

    /**
     * 
     * @param dateFormat - format which we will try to apply for string
     * @param dateString - string which contains date
     * @return true if we found that we know dateFormat and it applied for given string
     */
    public static boolean isValidDateFormatToStringDate(String dateFormat, String dateString) {
        String detectedDateFormat = detectDateFormat(dateString);
        if ((dateFormat != null) && (dateFormat.equals(detectedDateFormat))) {
            return true;
        }
        return false;
    }

    /**
     * 
     * @param dateFormat - format which we will try to apply for string
     * @param dateString - string which contains date
     * @param locale - locale for date format
     * @return true if we found that we know dateFormat and it applied for given string
     */
    public static boolean isValidDateFormatToStringDate(String dateFormat, String dateString, String locale) {
        String detectedDateFormat = dateFormat != null ? detectDateFormatBiased(dateString, locale, dateFormat)
                : detectDateFormat(dateString, locale);
        if ((dateFormat != null) && (dateFormat.equals(detectedDateFormat))) {
            return true;
        }
        return false;
    }
}