Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.core.util; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.commons.collections.BidiMap; import org.apache.commons.collections.bidimap.DualHashBidiMap; public class DateDetector { private static final String LOCALE_en_US = "en_US"; @SuppressWarnings("serial") static final BidiMap DATE_FORMAT_TO_REGEXPS_US = new DualHashBidiMap() { { put("MM-dd-yyyy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{4}$"); put("dd/MM/yyyy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{4}$"); put("MM-dd-yy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{2}$"); put("dd/MM/yy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{2}$"); put("yyyyMMdd", "^\\d{8}$"); put("dd-MM-yy", "^\\d{1,2}-\\d{1,2}-\\d{2}$"); put("dd-MM-yyyy", "^\\d{1,2}-\\d{1,2}-\\d{4}$"); put("dd.MM.yy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}$"); put("dd.MM.yyyy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$"); put("MM/dd/yy", "^\\d{1,2}/\\d{1,2}/\\d{2}$"); put("MM/dd/yyyy", "^\\d{1,2}/\\d{1,2}/\\d{4}$"); put("yyyy-MM-dd", "^\\d{4}-\\d{1,2}-\\d{1,2}$"); put("yyyy.MM.dd", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$"); put("yyyy/MM/dd", "^\\d{4}/\\d{1,2}/\\d{1,2}$"); put("dd MMM yyyy", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$"); put("dd MMMM yyyy", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$"); put("yyyyMMddHHmm", "^\\d{12}$"); put("yyyyMMdd HHmm", "^\\d{8}\\s\\d{4}$"); put("dd-MM-yy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}$"); put("dd-MM-yyyy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}$"); put("dd.MM.yy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}$"); put("dd.MM.yyyy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}$"); put("MM/dd/yy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}$"); put("MM/dd/yyyy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}$"); put("yyyy-MM-dd HH:mm", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}$"); put("yyyy.MM.dd HH:mm", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}$"); put("yyyy/MM/dd HH:mm", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}$"); put("dd MMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}$"); put("dd MMMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}$"); put("yyyyMMddHHmmss", "^\\d{14}$"); put("yyyyMMdd HHmmss", "^\\d{8}\\s\\d{6}$"); put("dd-MM-yy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd-MM-yyyy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd.MM.yy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd.MM.yyyy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("MM/dd/yy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("MM/dd/yyyy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("yyyy-MM-dd HH:mm:ss", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("yyyy.MM.dd HH:mm:ss", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("yyyy/MM/dd HH:mm:ss", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd MMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd MMMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd-MM-yy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd-MM-yyyy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd.MM.yy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd.MM.yyyy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("MM/dd/yy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("MM/dd/yyyy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("yyyy-MM-dd HH:mm:ss.SSS", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("yyyy.MM.dd HH:mm:ss.SSS", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("yyyy/MM/dd HH:mm:ss.SSS", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd MMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd MMMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); } }; @SuppressWarnings("serial") static final BidiMap DATE_FORMAT_TO_REGEXPS = new DualHashBidiMap() { { put("MM-dd-yyyy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{4}$"); put("dd/MM/yyyy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{4}$"); put("MM-dd-yy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{2}$"); put("dd/MM/yy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{2}$"); put("yyyyMMdd", "^\\d{8}$"); put("dd-MM-yy", "^\\d{1,2}-\\d{1,2}-\\d{2}$"); put("dd-MM-yyyy", "^\\d{1,2}-\\d{1,2}-\\d{4}$"); put("dd.MM.yy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}$"); put("dd.MM.yyyy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$"); put("dd/MM/yy", "^\\d{1,2}/\\d{1,2}/\\d{2}$"); put("dd/MM/yyyy", "^\\d{1,2}/\\d{1,2}/\\d{4}$"); put("yyyy-MM-dd", "^\\d{4}-\\d{1,2}-\\d{1,2}$"); put("yyyy.MM.dd", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$"); put("yyyy/MM/dd", "^\\d{4}/\\d{1,2}/\\d{1,2}$"); put("dd MMM yyyy", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$"); put("dd MMMM yyyy", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$"); put("yyyyMMddHHmm", "^\\d{12}$"); put("yyyyMMdd HHmm", "^\\d{8}\\s\\d{4}$"); put("dd-MM-yy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}$"); put("dd-MM-yyyy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}$"); put("dd.MM.yy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}$"); put("dd.MM.yyyy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}$"); put("dd/MM/yy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}$"); put("dd/MM/yyyy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}$"); put("yyyy-MM-dd HH:mm", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}$"); put("yyyy.MM.dd HH:mm", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}$"); put("yyyy/MM/dd HH:mm", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}$"); put("dd MMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}$"); put("dd MMMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}$"); put("yyyyMMddHHmmss", "^\\d{14}$"); put("yyyyMMdd HHmmss", "^\\d{8}\\s\\d{6}$"); put("dd-MM-yy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd-MM-yyyy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd.MM.yy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd.MM.yyyy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd/MM/yy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd/MM/yyyy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("yyyy-MM-dd HH:mm:ss", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("yyyy.MM.dd HH:mm:ss", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("yyyy/MM/dd HH:mm:ss", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd MMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd MMMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$"); put("dd-MM-yy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd-MM-yyyy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd.MM.yy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd.MM.yyyy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd/MM/yy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd/MM/yyyy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("yyyy-MM-dd HH:mm:ss.SSS", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("yyyy.MM.dd HH:mm:ss.SSS", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("yyyy/MM/dd HH:mm:ss.SSS", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd MMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); put("dd MMMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$"); } }; // util class, hide constructor private DateDetector() { }; /** * * @param dateFormat - date format for get regexp * @return regexp for given date format */ public static String getRegexpByDateFormat(String dateFormat) { return getRegexpByDateFormat(dateFormat, null); } /** * * @param dateFormat - date format for get regexp by locale * @return regexp for given date format */ public static String getRegexpByDateFormat(String dateFormat, String locale) { if (locale != null && LOCALE_en_US.equalsIgnoreCase(locale)) { return (String) DATE_FORMAT_TO_REGEXPS_US.get(dateFormat); } return (String) DATE_FORMAT_TO_REGEXPS.get(dateFormat); } /** * * @param regex - regexp for parse date format from string * <br> * <b>NOTES:</b> if regex could be used for US and EU locale. * It returns europeans locale. For en_US locale please use * * {@link #getDateFormatByRegex( String regex, String locale ) } * * @return {@link java.lang.String} string wich represented Date Format */ public static String getDateFormatByRegex(String regex) { return getDateFormatByRegex(regex, null); } /** * * @param regex * - regexp for parse date format from string by locale * @return {@link java.lang.String} string wich represented Date Format */ public static String getDateFormatByRegex(String regex, String locale) { if (locale != null && LOCALE_en_US.equalsIgnoreCase(locale)) { return (String) DATE_FORMAT_TO_REGEXPS_US.getKey(regex); } return (String) DATE_FORMAT_TO_REGEXPS.getKey(regex); } /** * * @param dateString * date string for parse * @return {@link java.util.Date} converted from dateString by detected format * @throws ParseException * - if we can not detect date format for string or we can not parse date string */ public static Date getDateFromString(String dateString) throws ParseException { String dateFormat = detectDateFormat(dateString); if (dateFormat == null) { throw new ParseException("Unknown date format.", 0); } return getDateFromStringByFormat(dateString, dateFormat); } /** * * @param dateString * date string for parse * @return {@link java.util.Date} converted from dateString by detected format * @throws ParseException * - if we can not detect date format for string or we can not parse date string */ public static Date getDateFromString(String dateString, String locale) throws ParseException { String dateFormat = detectDateFormat(dateString, locale); if (dateFormat == null) { throw new ParseException("Unknown date format.", 0); } return getDateFromStringByFormat(dateString, dateFormat); } /** * * @param dateString * date string for parse * @param dateFormat * format which should be applied for string * @return {@link java.util.Date} converted from dateString by format * @throws ParseException * if we can not parse date string */ public static Date getDateFromStringByFormat(String dateString, String dateFormat) throws ParseException { if (dateFormat == null) { throw new ParseException("Unknown date format. Format is null. ", 0); } if (dateString == null) { throw new ParseException("Unknown date string. Date string is null. ", 0); } SimpleDateFormat simpleDateFormat = new SimpleDateFormat(dateFormat); simpleDateFormat.setLenient(false); // Don't automatically convert invalid date. return simpleDateFormat.parse(dateString); } /** * * @param dateString * - date string for detect date format * @return {@link java.lang.String} string which represented Date Format or null * */ public static String detectDateFormat(String dateString) { return detectDateFormat(dateString, null); } /** * * @param dateString * - date string for detect date format * @return {@link java.lang.String} string which represented Date Format or null */ public static String detectDateFormat(String dateString, String locale) { if (dateString == null) { return null; } for (Object regexp : getDateFormatToRegExps(locale).values()) { if (dateString.toLowerCase().matches((String) regexp)) { return (String) getDateFormatToRegExps(locale).getKey(regexp); } } return null; } /** * Finds a date format that matches the date value given. Will try the desiredKey format before attempting others. The * first to match is returned. * * @param dateString * the literal value of the date (eg: "01/01/2001") * @param locale * the locale in play * @param desiredKey * the desired format (should be a valid key to DATE_FORMAT_TO_REGEXPS) * @return The key to the format that matched or null if none found. */ public static String detectDateFormatBiased(String dateString, String locale, String desiredKey) { if (dateString == null) { return null; } String regex = (String) getDateFormatToRegExps(locale).get(desiredKey); if (regex != null && dateString.toLowerCase().matches(regex)) { return desiredKey; } else { return detectDateFormat(dateString, locale); } } public static BidiMap getDateFormatToRegExps(String locale) { if (locale == null || LOCALE_en_US.equalsIgnoreCase(locale)) { return DATE_FORMAT_TO_REGEXPS_US; } else { return DATE_FORMAT_TO_REGEXPS; } } /** * * @param dateString - string for check * @param dateFormat - format for check * @return true if we can parse string by format without exception */ public static boolean isValidDate(String dateString, String dateFormat) { try { getDateFromStringByFormat(dateString, dateFormat); return true; } catch (ParseException e) { return false; } } /** * @param dateString - string for check * @return true if we can parse string without exception */ public static boolean isValidDate(String dateString) { try { getDateFromString(dateString); return true; } catch (ParseException e) { return false; } } /** * * @param dateFormat - format which we will try to apply for string * @param dateString - string which contains date * @return true if we found that we know dateFormat and it applied for given string */ public static boolean isValidDateFormatToStringDate(String dateFormat, String dateString) { String detectedDateFormat = detectDateFormat(dateString); if ((dateFormat != null) && (dateFormat.equals(detectedDateFormat))) { return true; } return false; } /** * * @param dateFormat - format which we will try to apply for string * @param dateString - string which contains date * @param locale - locale for date format * @return true if we found that we know dateFormat and it applied for given string */ public static boolean isValidDateFormatToStringDate(String dateFormat, String dateString, String locale) { String detectedDateFormat = dateFormat != null ? detectDateFormatBiased(dateString, locale, dateFormat) : detectDateFormat(dateString, locale); if ((dateFormat != null) && (dateFormat.equals(detectedDateFormat))) { return true; } return false; } }