org.tsm.concharto.util.TimeRangeFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.tsm.concharto.util.TimeRangeFormat.java

Source

/*******************************************************************************
 * Copyright 2009 Time Space Map, LLC
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package org.tsm.concharto.util;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.oro.text.perl.Perl5Util;
import org.tsm.concharto.model.time.SimpleTimeRange;
import org.tsm.concharto.model.time.TimeRange;
import org.tsm.concharto.model.time.VariablePrecisionDate;

/**
 * Converts string to TimeRange and vica versa. Supports many human readable
 * formats.  The best way to try to understand this code is to look at the 
 * unit tests.
 * 
 * Some examples:
 * <pre>
 * Parsing:
 *   1941 = 1/1/1941 00:00:00 to 1/1/1942 00:00:00
 *   December 1, 1941 - December 2, 1941 =  12/1/1941 00:00:00 to 12/3/1941 00:00:00
 *   
 * Formatting:
 *   if 12/7/1941 00:00 - 12/8/1941 00:00 = December 7, 1941
 *   if 12/7/1941 00:00 - 12/9/1941 00:00 = December 7, 1941, December 8, 1941 (subtract)
 *   if 12/7/1941 00:00 - 12/7/1942 00:00 = December 7, 1941, December 7, 1941 (don't subtract)
 * </pre>
 */
public class TimeRangeFormat {

    private static final int DIGITS_IN_YEAR = 4;
    private static final int MAX_YEAR_TO_DISLPAY_ERA = 1000;
    private static final String ERA_BC = "BC";
    private static final String ERA_AD = "AD";

    private static final String FMT_TO_YEAR = "yyyy";
    private static final String FMT_TO_MONTH = "MMMM, yyyy";
    private static final String FMT_TO_DAY = "MMMM dd, yyyy";
    private static final String FMT_TO_HOUR = "MMMM dd, yyyy, hha";
    private static final String FMT_TO_MINUTE = "MMMM dd, yyyy, hh:mma";
    private static final String FMT_TO_SECOND = "MMMM dd, yyyy, hh:mm:ssa";

    private static final String FMT_TO_YEAR_ERA = "yyyy G";
    private static final String FMT_TO_MONTH_ERA = "MMMM, yyyy G";
    private static final String FMT_TO_DAY_ERA = "MMMM dd, yyyy G";
    private static final String FMT_TO_HOUR_ERA = "MMMM dd, yyyy G, hha";
    private static final String FMT_TO_MINUTE_ERA = "MMMM dd, yyyy G, hh:mma";
    private static final String FMT_TO_SECOND_ERA = "MMMM dd, yyyy G, hh:mm:ssa";

    private final static String[] yearPatterns = { "yyyy", "yyyyG", "yyyy G" };

    private final static String[] monthPatterns = { "MMM yy", "MMM, yy", "yyyy, MMM", "MMM, yyyy G",
            "yyyy G, MMM", };

    private final static String[] dayPatterns = { "MM/dd/yy", "yyyy/MM/dd", "MMM dd, yy", "MMM dd, yyyy",
            "yyyy, dd MMM",

            "MM/dd/yyyy G", "yyyy G/MM/dd", "MMM dd, yyyy G", "yyyy G, dd MMM", };
    private final static String[] hourPatterns = { " hha", " HH", };
    private final static String[] minutePatterns = { " hh:mma", " HH:mm", };
    private final static String[] secondPatterns = { " hh:mm:ssa", " HH:mm:ss", };

    private static String[] patterns;
    private static List<CalendarPrecision> calendarPrecisions = new ArrayList<CalendarPrecision>();
    static {
        List<String> dayMonthPatterns = new ArrayList<String>();
        addPatterns(dayPatterns, dayMonthPatterns);
        addPatterns(monthPatterns, dayMonthPatterns);

        List<String> timePatterns = new ArrayList<String>();
        addPatterns(hourPatterns, timePatterns);
        addPatterns(minutePatterns, timePatterns);
        addPatterns(secondPatterns, timePatterns);

        //All patterns.  Order is important here
        List<String> tmpPatterns = new ArrayList<String>();
        addPatterns(dayMonthPatterns, tmpPatterns);
        addPatterns(timePatterns, tmpPatterns);
        addPatterns(yearPatterns, tmpPatterns);

        //combined format patterns
        for (String dayPattern : dayMonthPatterns) {
            for (String timePattern : timePatterns) {
                tmpPatterns.add(dayPattern + timePattern);
                tmpPatterns.add(new StringBuffer(dayPattern).append(',').append(timePattern).toString());
            }
        }
        patterns = tmpPatterns.toArray(new String[tmpPatterns.size()]);

        //for calculating the date precision 
        calendarPrecisions.add(new CalendarPrecision(VariablePrecisionDate.PRECISION_SECOND, Calendar.SECOND, 0,
                FMT_TO_SECOND, FMT_TO_SECOND_ERA, combineTimePatterns(dayMonthPatterns, secondPatterns)));
        calendarPrecisions.add(new CalendarPrecision(VariablePrecisionDate.PRECISION_MINUTE, Calendar.MINUTE, 0,
                FMT_TO_MINUTE, FMT_TO_MINUTE_ERA, combineTimePatterns(dayMonthPatterns, minutePatterns)));
        calendarPrecisions.add(new CalendarPrecision(VariablePrecisionDate.PRECISION_HOUR, Calendar.HOUR, 0,
                FMT_TO_HOUR, FMT_TO_HOUR_ERA, combineTimePatterns(dayMonthPatterns, hourPatterns)));
        calendarPrecisions.add(new CalendarPrecision(VariablePrecisionDate.PRECISION_DAY, Calendar.DAY_OF_MONTH, 1,
                FMT_TO_DAY, FMT_TO_DAY_ERA, dayPatterns));
        calendarPrecisions.add(new CalendarPrecision(VariablePrecisionDate.PRECISION_MONTH, Calendar.MONTH,
                Calendar.JANUARY, FMT_TO_MONTH, FMT_TO_MONTH_ERA, monthPatterns));
        calendarPrecisions.add(new CalendarPrecision(VariablePrecisionDate.PRECISION_YEAR, Calendar.YEAR, -1,
                FMT_TO_YEAR, FMT_TO_YEAR_ERA, yearPatterns)); //no empty value

    }

    private static List<String> addPatterns(String[] patterns, List<String> list) {
        for (String pattern : patterns) {
            list.add(pattern);
        }
        return list;
    }

    private static void addPatterns(List<String> patterns, List<String> list) {
        for (String pattern : patterns) {
            list.add(pattern);
        }
    }

    private static String[] combineTimePatterns(List<String> dayMonthPatterns, String[] timePatterns) {
        List<String> results = new ArrayList<String>();
        for (String dayMonthPattern : dayMonthPatterns) {
            for (String timePattern : timePatterns) {
                results.add(dayMonthPattern + timePattern);
                results.add(new StringBuffer(dayMonthPattern).append(',').append(timePattern).toString());
            }
        }
        return results.toArray(new String[results.size()]);
    }

    /**
     * Parse a time range from a wide variety of formats.  Some examples:
     * '1941', 'May 2006', '1948 - 1950', 'Jan 23, 2002 10:23 - Feb 2005'
     * @param text to parse
     * @return TimeRange 
     * @throws ParseException if parsing failed
     */
    public static TimeRange parse(String text) throws ParseException {
        TimeRange tr = null;
        if (!StringUtils.isEmpty(text)) {
            // first separate the begin and the end
            // we will try the '-' first
            String[] split = StringUtils.split(text, '-');
            boolean isRange;
            if (split.length == 1) {
                isRange = false;
            } else if (split.length == 2) {
                isRange = true;
            } else {
                throw new ParseException(text, 0);
            }

            // if there are two dates, then we parse each one
            if (isRange) {
                tr = parseRange(split);
            } else {
                // turn this single date into a range
                tr = parseSingleDate(text);
            }
        }
        return tr;
    }

    /**
     * Format a time range to a string.  
     * @param timeRange TimeRange
     * @return TimeRange formatted as a string
     */
    public static String format(SimpleTimeRange timeRange) {
        if (timeRange == null) {
            return null;
        }

        //1) if 12/7/1941 10:00 - 2/1/1942 00:00 = December 7, 1941 10AM, January, 1942 (don't subtract)
        //2) if 12/7/1941 00:00 - 12/8/1941 00:00 = December 7, 1941
        //3) if 12/7/1941 00:00 - 12/7/1942 00:00 = December 7, 1941, December 7, 1941 (don't subtract)
        //4) if 12/7/1941 00:00 - 12/9/1941 00:00 = December 7, 1941, December 8, 1941 (subtract)
        CalendarPrecision beginCp = getPrecision(timeRange.getBegin());
        CalendarPrecision endCp = getPrecision(timeRange.getEnd());
        CalendarPrecision cp = getLeastPrecision(beginCp, endCp);
        //case 1
        if ((beginCp.getRank() != endCp.getRank()) && timeRange.getBegin().getPrecision() != null
                && timeRange.getEnd().getPrecision() != null) {
            SimpleTimeRange adjusted = subtractOneFromEnd(cp, timeRange);
            return rangeFormat(adjusted.getBegin().getDate(), beginCp, adjusted.getEnd().getDate(), endCp);
        } else if (isOneApart(cp.getCalendarField(), timeRange)) { //case 2
            return dateFormat(timeRange.getBegin().getDate(), cp);
        } else if (isEqual(cp.getCalendarField(), timeRange)) { //case 3
            return rangeFormat(timeRange, cp);
        } else { //case 4
            SimpleTimeRange adjusted = subtractOneFromEnd(cp, timeRange);
            return rangeFormat(adjusted, cp);
        }
    }

    private static SimpleTimeRange subtractOneFromEnd(CalendarPrecision cp, SimpleTimeRange timeRange) {
        TimeRange adjusted = new TimeRange(timeRange.getBegin(), timeRange.getEnd());
        Calendar end = getCalendar(timeRange.getEnd().getDate());
        int calendarField = cp.getCalendarField();
        end.add(calendarField, -1);
        adjusted.setEnd(new VariablePrecisionDate(end.getTime(), cp.getRank()));
        return adjusted;
    }

    private static boolean isOneApart(int calendarField, SimpleTimeRange tr) {
        return isSeparatedBy(calendarField, 1, tr);
    }

    private static boolean isEqual(int calendarField, SimpleTimeRange tr) {
        return isSeparatedBy(calendarField, 0, tr);
    }

    /**
     * evaluates whether the separation between the begin and end is equal to 'separation' parameter
     * @param calendarField calendar field (e.g. Calendar.MONTH)
     * @param separation number of places of separation
     * @param tr SimpleTimeRange
     * @return true if begin-end = separation for the given calendar field (e.g. Calendar.MONTH)
     */
    private static boolean isSeparatedBy(int calendarField, int separation, SimpleTimeRange tr) {
        GregorianCalendar begin = getCalendar(tr.getBegin().getDate());
        GregorianCalendar end = getCalendar(tr.getEnd().getDate());
        //roll begin by the separation ammount (takes into account boundaries e.g. month 12 + 1 = month 1) 
        if (calendarField == Calendar.YEAR) {
            if (end.get(Calendar.ERA) == GregorianCalendar.BC) {
                separation = -separation;
            }
        }
        begin.roll(calendarField, separation);
        int endField = end.get(calendarField);
        int beginField = begin.get(calendarField);

        return (0 == (endField - beginField));
    }

    private static GregorianCalendar getCalendar(Date date) {
        GregorianCalendar cal = new GregorianCalendar();
        cal.setTime(date);
        return cal;
    }

    /**
     * @param tr SimpleTimeRange
     * @param cp CalendarPrecision for getting one of the format strings
     * @return a string with both beginning and end in the format fmt - fmt
     *         (e.g. yyyy - yyyy or MMM yyyy - MMM yyyy)
     */
    private static String rangeFormat(SimpleTimeRange tr, CalendarPrecision cp) {
        return rangeFormat(tr.getBegin().getDate(), cp, tr.getEnd().getDate(), cp);
    }

    private static String rangeFormat(Date begin, CalendarPrecision beginCp, Date end, CalendarPrecision endCp) {
        StringBuffer range = new StringBuffer(dateFormat(begin, beginCp));
        range.append(" - ");
        range.append(dateFormat(end, endCp));
        return range.toString();
    }

    /** 
     * format a date
     * @param date date
     * @param cp CalendarPrecision for getting one of the format strings
     * @return String formatted string
     */
    private static String dateFormat(Date date, CalendarPrecision cp) {
        String format;
        GregorianCalendar cal = new GregorianCalendar();
        cal.setTime(date);
        //if the year is an AD date and it is pretty old (e.g. less than 1000AD), then append the era
        //always display the era for BC dates
        if ((cal.get(Calendar.ERA) == GregorianCalendar.BC) || (cal.get(Calendar.YEAR) < MAX_YEAR_TO_DISLPAY_ERA)) {
            format = cp.getFormatWithEra();
        } else {
            format = cp.getFormat();
        }

        String text = DateFormatUtils.format(date, format);
        return stripLeadingZeros(date, text);
    }

    /**
     * Make formatted date more user friendly.  For example, '0092 BC' is converted
     * to '92 BC'
     * 
     * @param date original date
     * @param text formatted text representation of that date
     * @return stripped text
     */
    private static String stripLeadingZeros(Date date, String text) {
        //first we have to find the formatted year.
        GregorianCalendar cal = new GregorianCalendar();
        cal.setTime(date);
        String year = Integer.toString(cal.get(Calendar.YEAR));
        //only do this for four digit years (e.g. no need to do it for 20000 BC)
        if (year.length() <= DIGITS_IN_YEAR) {
            //find the start pos of the year.  If the year is 0093, we will get
            //year = '93' so the start of year = pos of '93' minus 2.
            int yearPos = text.indexOf(year) - DIGITS_IN_YEAR + year.length();
            StringBuffer adjusted = new StringBuffer();
            if (yearPos != 0) {
                adjusted.append(text.substring(0, yearPos));
            }
            adjusted.append(year);
            adjusted.append(text.substring(yearPos + DIGITS_IN_YEAR, text.length()));
            return adjusted.toString();
        } else {
            return text;
        }
    }

    /**
     * set the end date = the next tick of whatever precision has been
     * specified.
     * 
     * <pre>
     *   
     *   If begin = 1941, then end = 1942
     *   if begin = Sept 30, 1941, then end is Oct 1, 1941
     * </pre>
     * 
     * @param text text to parse
     * @return TimeRange a time range
     * @throws ParseException when text can't be parsed
     */
    private static TimeRange parseSingleDate(String text) throws ParseException {

        text = StringUtils.trimToEmpty(text);

        Date begin = parseDate(text);

        CalendarPrecision cp = getPrecision(text);
        //add 1 to the end at the given precision (e.g. when someone says 
        //December 1 to December 2 they mean 12/1 00:00:00 to 12/3 00:00:00) 
        Calendar cal = getCalendar(begin);
        cal.add(cp.getCalendarField(), 1);
        Date end = cal.getTime();

        return new TimeRange(begin, cp.getRank(), end, cp.getRank());
    }

    /**
     * Parses two strings into a SimpleTimeRange
     *  
     * @param split an arr
     * @return SimpleTimeRange new time range
     * @throws ParseException exception if there is a parsing problem
     */
    private static TimeRange parseRange(String[] split) throws ParseException {

        Date begin = parseDate(split[0]);
        Date end = parseDate(split[1]);

        CalendarPrecision beginCp = getPrecision(split[0]);
        CalendarPrecision endCp = getPrecision(split[1]);
        //add 1 to the end at the given precision/  When someone says 
        //December 1 to December 2 they mean 12/1 00:00:00 to 12/3 00:00:00)
        //OR
        //Dec 7, 1940 10am - Jan, 1941 they mean 12/7/40, 10:00:00 to 2/1/41, 00:00:00 
        Calendar cal = getCalendar(end);
        cal.add(endCp.getCalendarField(), 1);
        end = cal.getTime();
        return new TimeRange(begin, beginCp.getRank(), end, endCp.getRank());
    }

    /**
     * Parses dates from a very wide variety of formats and precisions
     * @param text text to parse
     * @return Date date
     * @throws ParseException if there is a parsing problem
     */
    private static Date parseDate(String text) throws ParseException {
        text = normalizeDateText(text);
        SimpleDateFormat sdf = new SimpleDateFormat();
        sdf.setLenient(false);
        return DateUtils.parseDate(sdf, text, patterns);
    }

    /**
     * Removes double and triple spaces, normalizes commas and  
     * cleans up era designators
     * @param text to normalize
     * @return normalize date string
     */
    private static String normalizeDateText(String text) {
        // first clean the spaces from front and back
        text = StringUtils.trimToEmpty(text);
        // convert double and triple space to single space
        text = StringUtils.replace(text, "    ", " ");
        text = StringUtils.replace(text, "   ", " ");
        text = StringUtils.replace(text, "  ", " ");
        text = StringUtils.replace(text, ".", ""); //B.C. = BC
        //only replace CE when it is not in DECEMBER  regexp = [^E]CE
        Perl5Util myRegularExpression = new Perl5Util();
        text = myRegularExpression.substitute("s/BCE/BC/gi", text);
        text = myRegularExpression.substitute("s/[^E]CE/AD/gi", text);
        text = normalizeCommas(text);
        text = adjustADBC(text);
        return text;
    }

    /**
     * Fix the date if there is a missing space between
     * the year and the ERA designator.  For instance,
     * '5000BC' is converted to '5000 BC' and 'March, 50BC' is 
     * converted to 'March, 50 BC'.  If '5000 BC' is passed in
     * no changes are made
     * 
     * @param text to convert
     * @return converted text if necessary  
     */
    private static String adjustADBC(String text) {

        if (StringUtils.contains(text, ERA_AD)) {
            text = padEra(ERA_AD, text);
        } else if (StringUtils.contains(text, ERA_BC)) {
            text = padEra(ERA_BC, text);
        }
        return text;
    }

    /**
     * @see TimeRangeFormat#adjustADBC
     * 
     * @param era 'AD' or 'BC'
     * @param text text to convert
     * @return converted text if necessary
     */
    private static String padEra(String era, String text) {
        //get the character just before the ERA text (e.g. AD or BC)
        String before = StringUtils.substringBefore(text, era);

        if (' ' != before.charAt(before.length() - 1)) {
            //ok we need to insert a space right here
            text = before + " " + text.subSequence(before.length(), text.length());
        }
        return text;
    }

    /**
     * Converts strings in the form "a,b" to "a, b"
     * @param text text to normalize
     * @return converted text
     */
    private static String normalizeCommas(String text) {
        StringBuffer normalized = new StringBuffer();
        byte[] b = text.getBytes();
        for (int i = 0; i < b.length; i++) {
            normalized.append((char) b[i]);
            if (b[i] == ',') {
                if ((i != (b.length - 1)) && //not at end
                        (b[i + 1] != ' ')) { //missing a trailing space
                    //add a trailing space
                    normalized.append(' ');
                }
            }

        }
        return normalized.toString();
    }

    /**
     * Calculate the precision for this date by looking at 
     * the pattern of default empty values.  E.g. 12/1/2007 00:00:00 
     * has a precision of "month" and 1/1/2007 00:00:00 has a precision
     * of "year" 
     * @param date Date to check
     * @return CalendarPrecision precision
     */
    private static CalendarPrecision getPrecision(VariablePrecisionDate date) {
        if (date.getPrecision() != null) {
            return getPrecision(date.getPrecision());
        }
        Calendar cal = new GregorianCalendar();
        cal.setTime(date.getDate());
        CalendarPrecision precision = null;
        //start with second and check until we find a non-empty value
        for (CalendarPrecision cp : calendarPrecisions) {
            if (cal.get(cp.getCalendarField()) != cp.getEmptyValue()) {
                precision = cp;
                break;
            }
        }
        return precision;
    }

    /**
     * Find the CalendarPrecision based on the given rank
     * @see org.tsm.concharto.model.time.VariablePrecisionDate
     * @param precision  integer value (see VariablePrecisionDate)
     * @return matching CalendarPrecision.  Null if none is found (which should never happen)
     */
    private static CalendarPrecision getPrecision(Integer precision) {
        for (CalendarPrecision cp : calendarPrecisions) {
            if (cp.getRank() == precision) {
                return cp;
            }
        }
        return null;
    }

    /**
     * Calculate the precision for this date by looking at 
      * what the user entered.  E.g. 2007 has a year precision, Jan 2007 has a 
      * month precision, Jan 1, 2007 has a day precision, etc.  NOTE: We need two ways of 
      * checking precision - one for parsing user input (this one) and one for for formatting
      * time ranges for users.
      * 
      * @param text date string
      * @return CalendarPrecision precision
      */
    private static CalendarPrecision getPrecision(String text) {
        text = normalizeDateText(text);
        int rank = VariablePrecisionDate.PRECISION_YEAR;
        for (CalendarPrecision cp : calendarPrecisions)
            if (isParseable(text, cp.getPrecisionTestFormat())) {
                if (cp.getRank() < rank) {
                    rank = cp.getRank();
                }
            }
        return calendarPrecisions.get(rank);
    }

    /**
     * Utility for checking precision of user entered dates 
     * @param text date string
     * @param patterns array of patterns (see SimpleDateFormat)
     * @return true if the date is parseable using the provided pattern
     */
    private static boolean isParseable(String text, String[] patterns) {
        SimpleDateFormat sdf = new SimpleDateFormat();
        sdf.setLenient(false);
        for (String pattern : patterns) {
            try {
                sdf.applyPattern(pattern);
                sdf.parse(text);
                return true;
            } catch (ParseException e) {
                //no action
            }
        }
        return false;
    }

    /**
     * Get the least precise precision of begin and end for this time range
     * @param beginCp first CalendarPrecision to compare
     * @param endCp   second CalendarPrecision to compare
     * @return CalendarPrecision precision
     */
    private static CalendarPrecision getLeastPrecision(CalendarPrecision beginCp, CalendarPrecision endCp) {
        //order is specified by its position in the array
        if (beginCp.getRank() < endCp.getRank()) {
            return beginCp;
        } else {
            return endCp;
        }
    }
}